In [1]:
from pathlib import Path
from seeq import spy
import requests
from matplotlib import pyplot
import numpy as np
import pickle
import gzip
import math
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression

from numpy import absolute
from numpy import std
from numpy import mean

from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
from autogluon.tabular import TabularDataset, TabularPredictor

In [3]:
def get_data(tagstosearch, datasource):
    my_items = pd.DataFrame({
                'Name': tagstosearch
                ,'Datasource Name': datasource
                ,'Type': 'Signal'
                })

    spy_search = spy.search(my_items)
    return spy_search


def pull_data(itemssearch, starttime, endtime, sampletime):
    return spy.pull(
                items= itemssearch
                , start=starttime
                , end = endtime
                , grid= sampletime
                , calculation = None
                    )    


In [1]:
tagstosearch = ['43FC0080_O', '43FC0080', '43TI0076']
datasource = 'asdfasdf'


itemssearch = get_data(tagstosearch, datasource)
starttime = '8/01/2022'
endtime = '1/24/2023'
sampletime = '1hr'

raw_data = pull_data(itemssearch, starttime, endtime, sampletime)

In [None]:
# raw_data.to_csv('raw_data.txt')
# raw_data = pd.read_csv('raw_data.txt')

In [5]:
# data = raw_data.values
y_total = raw_data['43FC0080'].copy()
x_total = raw_data.drop(columns='43FC0080').copy()


In [6]:
X_train, X_test, y_train, y_test = train_test_split(x_total, y_total, test_size=0.33, random_state=42, shuffle=True)

In [7]:
tree = DecisionTreeRegressor(max_depth = 2)
tree.fit(X_train.values, y_train.values)


y_pred_test_tree = tree.predict(X_test.values)

r2_score_lasso = r2_score(y_test, y_pred_test_tree)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

scores = cross_val_score(tree, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
scores = absolute(scores)
print('Mean MAE: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean MAE: 0.236 (0.012)


In [8]:
alpha = 0.1
model = Lasso(alpha=alpha)
model.fit(X_train, y_train)

y_pred_test_lasso = model.predict(X_test)

r2_score_lasso = r2_score(y_test, y_pred_test_lasso)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_val_score(model, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
scores = absolute(scores)
print('Mean MAE: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean MAE: 0.282 (0.010)


In [10]:
regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=2), n_estimators=300)

regr_2.fit(X_train.values, y_train.values)

y_pred_test_tree_ada = regr_2.predict(X_test.values)


r2_score_lasso = r2_score(y_test, y_pred_test_tree_ada)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

scores = cross_val_score(regr_2, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
scores = absolute(scores)
print('Mean MAE: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean MAE: 0.239 (0.013)


In [11]:
model_linear = LinearRegression()
model_linear.fit(X_train, y_train)

y_pred_test_linear = model_linear.predict(X_test)

r2_score_lasso = r2_score(y_test, y_pred_test_linear)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_val_score(model_linear, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
scores = absolute(scores)
print('Mean MAE: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean MAE: 0.274 (0.012)


In [17]:
test_df = pd.concat([X_train, y_train], axis=1)

In [18]:
test_df.head(1)

Unnamed: 0,43FC0080_O,43TI0076,43FC0080
2022-09-08 14:00:00-05:00,66.722413,86.149544,11.440933


In [19]:
save_path = 'agModels-predictClass'  # specifies folder to store trained models
predictor = TabularPredictor(label='43FC0080', path=save_path).fit(test_df)

Beginning AutoGluon training ...
AutoGluon will save models to "agModels-predictClass\"
AutoGluon Version:  0.6.2
Python Version:     3.9.13
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22000
Train Data Rows:    2831
Train Data Columns: 2
Label Column: 43FC0080
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (13.4946400293448, 9.902973376357657, 11.38658, 0.66689)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2057.96 MB
	Train Data (Original)  Memory Usage: 0.05 MB (0.0% of available memory)
	Inferring data type of each

[1000]	valid_set's rmse: 0.267226
[2000]	valid_set's rmse: 0.260232
[3000]	valid_set's rmse: 0.258673
[4000]	valid_set's rmse: 0.256921
[5000]	valid_set's rmse: 0.256334
[6000]	valid_set's rmse: 0.255501
[7000]	valid_set's rmse: 0.25548
[8000]	valid_set's rmse: 0.255413
[9000]	valid_set's rmse: 0.255855


	-0.2552	 = Validation score   (-root_mean_squared_error)
	13.99s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: LightGBM ...
	-0.2551	 = Validation score   (-root_mean_squared_error)
	1.38s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.2638	 = Validation score   (-root_mean_squared_error)
	0.77s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: CatBoost ...
	-0.2575	 = Validation score   (-root_mean_squared_error)
	2.51s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.2533	 = Validation score   (-root_mean_squared_error)
	0.74s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.2801	 = Validation score   (-root_mean_squared_error)
	4.97s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	-0.2606	 = Validation score   (-root_mean_squared_error)
	0.95s	 = Training   runtime
	0.01s	 = Validation runtime
Fit

In [21]:
# predictor.predict(test_data, model='LightGBM')
# predictor = TabularPredictor.load("agModels-predictClass/models/WeightedEnsemble_L2")

predictor.get_model_best()


'WeightedEnsemble_L2'

In [24]:
model_to_use = predictor.get_model_best()
model_to_use

'WeightedEnsemble_L2'

In [25]:
model_pred = predictor.predict(X_test, model=model_to_use)

In [None]:
# y_pred = predictor.predict(X_test)

In [27]:

final_test_tree = pd.DataFrame()
final_test_Laso = pd.DataFrame()
final_test_linear = pd.DataFrame()
final_test_tree_ada = pd.DataFrame()
autoglo_df = pd.DataFrame()

final_test_tree['y_pred_test']=pd.Series(y_pred_test_tree)
final_test_tree.index = X_test.index
final_test_tree = final_test_tree.sort_index()

final_test_tree_ada['y_pred_test_tree_ada']=pd.Series(y_pred_test_tree_ada)
final_test_tree_ada.index = X_test.index
final_test_tree_ada = final_test_tree_ada.sort_index()

final_test_Laso['y_pred_test_lasso']=pd.Series(y_pred_test_lasso)
final_test_Laso.index = X_test.index
final_test_Laso = final_test_Laso.sort_index()

final_test_linear['y_pred_test_linear']=pd.Series(y_pred_test_linear)
final_test_linear.index = X_test.index
final_test_linear = final_test_linear.sort_index()


autoglo_df['autoglo_ypredictor']=pd.Series(model_pred)
autoglo_df.index = X_test.index
autoglo_df = autoglo_df.sort_index()

In [28]:
autoglo_df

Unnamed: 0,autoglo_ypredictor
2022-08-01 00:00:00-05:00,11.995021
2022-08-01 06:00:00-05:00,11.994965
2022-08-01 07:00:00-05:00,11.984920
2022-08-01 08:00:00-05:00,11.985845
2022-08-01 12:00:00-05:00,11.979798
...,...
2023-01-23 10:00:00-06:00,11.629722
2023-01-23 14:00:00-06:00,11.495823
2023-01-23 16:00:00-06:00,11.886750
2023-01-23 23:00:00-06:00,11.520061


In [29]:
pio.templates.default = "plotly_white"

plot_template = dict(
    layout=go.Layout({
        "font_size": 18,
        "xaxis_title_font_size": 24,
        "yaxis_title_font_size": 24})
)

fig = px.line(raw_data, labels=dict(created_at="Date", value="SensorValues", variable="Sensor"))


fig.add_scatter(x=final_test_tree.index, y=final_test_tree['y_pred_test'], name='y_pred_test_tree 43FC0080')
fig.add_scatter(x=final_test_tree_ada.index, y=final_test_tree_ada['y_pred_test_tree_ada'], name='y_pred_test_tree_ada 43FC0080')
fig.add_scatter(x=final_test_Laso.index, y=final_test_Laso['y_pred_test_lasso'], name='y_pred_test_lasso 43FC0080')
fig.add_scatter(x=final_test_linear.index, y=final_test_linear['y_pred_test_linear'], name='y_pred_test_linear 43FC0080')
fig.add_scatter(x=autoglo_df.index, y=autoglo_df['autoglo_ypredictor'], name='y_pred_test_autoGluon 43FC0080')

fig.update_layout(template=plot_template, legend=dict(orientation='h', y=1.02, title_text=""))


fig.show()