In [None]:
import boto3
s3=boto3.resource('s3')

In [None]:
bucket_name='FractionatorData'
try:
    s3.create_bucket(Bucket=bucket_name)
    print("S3 bucket was created")
except Exception as e:
    print("S3 error: ",e)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
from scipy import stats
from sklearn.metrics import mean_absolute_error 
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import median_absolute_error
from datetime import date
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb


In [None]:
df=pd.read_csv('Fractionator.csv')

In [None]:
df['Time']=pd.to_datetime(df['Time'])

In [None]:
df=df[(np.abs(stats.zscore(df["AI_2020_OVERHEAD_C5'S_MOL"])) < 3)]

In [None]:
df.rename(columns = {"AI_2020_OVERHEAD_C5'S_MOL":"AI_top","AI_2021_MIDDLE_C7'S_MOL":"AI_middle","AI_2022_BOTTOM_C3'S_MOL":"AI_bottom",
                          'FIC_2100_PV_FEED_FURNACE_FUEL_SCFH':'Furnace' ,'FIC_2004_PV_TOP_REFLUX_MBBL/D':'Reflux_top',
                    'FI-2005_PV_FEED_FLOW_MBBL/D':'Feed'
 }, inplace = True)

In [None]:
df_uni=df[['AI_top','Furnace','Feed','Reflux_top',]]

In [None]:
df_Xgb=df_uni[['AI_top']]

In [None]:
df_Xgb.head()

In [None]:
def df_to_X_y(df, window_size=10):
  df_np = df_Xgb.to_numpy()
  X = []
  y = []
  for i in range(len(df_np)-window_size):
    row = [r for r in df_np[i:i+window_size]]
    X.append(row)
    label = df_np[i+window_size][0]
    y.append(label)
  return np.array(X), np.array(y)

In [None]:
X,y= df_to_X_y(df_Xgb,window_size=10)
X.shape, y.shape

In [None]:
y=y.reshape([5719])
y.shape
X=X.reshape(5719, 10)
X.shape

In [None]:
split_index = int(len(X) * 0.8)

split_index

In [None]:
X_train = X[:split_index]
X_test = X[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
import os
prefix='xgboost_AI_top"
train_csv_path='s3://{}/{}/{}/{}'.format(bucket_name,prefix,'train','train.csv')
test_csv_path='s3://{}/{}/{}/{}'.format(bucket_name,prefix,'test','test.csv')
print(train_csv_path)
print(test_csv_path)


In [None]:
train_data.to_csv(train_csv_path,index=False,header=False)
test_data.to_csv(test_csv_path,index=False,header=False)


In [None]:
import sagemaker
from sagemaker import image_uris
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput

In [None]:
xgboost_container=image_uris.retrieve('xgboost',boto3.Session().region_name,"1.2-2")
display(xgboost_container)

In [None]:
hyperparameters={"base_score":0.5, 
                 "booster":'gbtree',    
                 "n_estimators":1000,
                 "early_stopping_rounds":50,
                 "objective":'reg:linear',
                 "max_depth":'3",
                 "learning_rate":0.001
                }

In [None]:
output_path="s3://{}/{}/{}/".format(bucket_name,prefix,'output')
print(output_path)

In [None]:
estimator=sagemaker.estimator.Estimator(image_uri=xgboost_container,
                                        hyperparameters=hyperparameters,
                                        role=sagemaker.get_excution_role(),
                                        instance_count=1,
                                        instance_type='ml.m4.xlarge',
                                        volume_size=5,
                                        use_spot_instances=True,
                                        max_run=300,
                                        max_wait=600
                                       )

In [None]:
content_type='csv'
train_input=TrainingInput("s3://{}/{}/{}/".format(bucket_name,prefix,'train',content_type=content_type)
test_input=TrainingInput("s3://{}/{}/{}/".format(bucket_name,prefix,'test',content_type=content_type)

In [None]:
estimator.fit({'train':train_input,'validation':test_input})

In [None]:
from sagemaker.serializers import CSVSerlializer
xgb_predictor=estimaor.deploy(instance_cout=1,instance_type='ml.m4.xlarge',serializer=CSVSerlializer)

In [None]:
endpoint_AI_top=xgb_predictor.endpoint_name