In [None]:
#!/usr/bin/env python3
import pandas as pd
import requests
import time
from datetime import datetime, timedelta
import pytz
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import elasticsearch
import os
pd.options.plotting.backend = "plotly"


In [None]:
avg_over_min = 1
days_look_back = 30
en_date = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
st_date = (datetime.now() - timedelta(days=days_look_back)).strftime('%Y-%m-%dT%H:%M:%S')

en_date_dt = datetime.now().astimezone(pytz.timezone('America/New_York'))
st_date_dt = (datetime.now() - timedelta(days=days_look_back)).astimezone(pytz.timezone('America/New_York'))

st_date_utc = datetime.strptime(st_date, '%Y-%m-%dT%H:%M:%S').astimezone(pytz.UTC).strftime('%Y-%m-%dT%H:%M:%SZ')
en_date_utc = datetime.strptime(en_date, '%Y-%m-%dT%H:%M:%S').astimezone(pytz.UTC).strftime('%Y-%m-%dT%H:%M:%SZ')


In [None]:
import glob
filePath = glob.glob('*.csv')
for i in filePath:
    if os.path.exists(i):
        os.remove(i)
    else:
        print("Can not delete the file as it doesn't exists")
os.system('python3 weather_scraper.py')

In [None]:
# Import and format Praxis data

uri = 'https://aws.southcoastscience.com/topicMessages?topic=nyu/brooklyn/loc/3/particulates&' \
'startTime=%s&endTime=%s&checkpoint=**:/%i:00' \
% (st_date_utc, en_date_utc, avg_over_min)
print(uri)
praxis_df = pd.DataFrame([])

while uri != '':
    header = {"authorization": "api-key nyu-brooklyn"}
    response = requests.get(uri, headers=header)
    json = response.json()

    data = {}

    data['ts'] = pd.to_datetime([ele['rec'] for ele in json['Items']]).tz_convert(tz='US/Eastern')

    data['praxis_pm1_vals'] = [ele['val']['pm1'] for ele in json['Items']]
    data['praxis_pm2p5_vals'] = [ele['val']['pm2p5'] for ele in json['Items']]
    data['praxis_pm10_vals'] = [ele['val']['pm10'] for ele in json['Items']]

    data['praxis_pm1_vals_adj'] = [ele['exg']['rn20']['pm1'] for ele in json['Items']]
    data['praxis_pm2p5_vals_adj'] = [ele['exg']['rn20']['pm2p5'] for ele in json['Items']]
    data['praxis_pm10_vals_adj'] = [ele['exg']['rn20']['pm10'] for ele in json['Items']]

#     praxis_df = pd.DataFrame(data).set_index('ts').resample(avg_over).mean()

    if 'next' in json:
        uri = json['next']
    else:
        uri = ''
    praxis_df = pd.concat([praxis_df, pd.DataFrame(data)])
    
    time.sleep(0.5)
praxis_df = praxis_df.set_index('ts').resample('%iT' % avg_over_min).mean()

In [None]:
praxis_df

In [None]:
df_weather = pd.read_csv(glob.glob('*.csv')[0])
print(df_weather)

In [None]:
# df_weather['ts'] = pd.to_datetime(df_weather['Date'] + ' ' + df_weather['Time']).dt.tz_localize(tz='US/Eastern')
# df_weather = df_weather.set_index('ts').resample('%iT' % avg_over_min).mean()
# df_weather = df_weather.fillna(df_weather.mean())
df_weather['ts'] =pd.to_datetime(df_weather["Date"] + ' ' + df_weather['Time']).dt.tz_localize(tz='US/Eastern')
df_weather= df_weather.set_index('ts').resample('%iT' % avg_over_min).mean().fillna(df_weather.mean())
df_weather = df_weather.loc[st_date:en_date]


In [None]:
# df_weather.tail() 

df_weather = df_weather[['Temperature_C', 'Humidity_%']]
df_weather

In [None]:
praxis_df = praxis_df.loc[str(df_weather.index[0]):str(df_weather.index[-1])]
praxis_df


In [None]:
GROUP = time.time()

def scroll(es, index, body, scroll='2m', size=1000, timeout=25, **kw):
    if isinstance(timeout, int):
        timeout = '{}s'.format(int(timeout))
    page = es.search(index=index, body=body, scroll=scroll, size=size, timeout=timeout, **kw)
    scroll_id, hits = page['_scroll_id'], page['hits']['hits']
    while len(hits):
        yield hits
        page = es.scroll(scroll_id=scroll_id, scroll=scroll)
        scroll_id, hits = page['_scroll_id'], page['hits']['hits']
        
def sensor_query(key=None, nodeid=None, start=None, end=None, k_time="time", group=GROUP):
    match = []
    if key and nodeid:
        match.append({"term": {f'{key}.keyword': nodeid}})
    end = end or 'now'
    if start:
        match.append({"range" : {k_time : {"gte" : start, "lte" : end}}})
    elif end:
        match.append({"range" : {k_time : {"lte" : end}}})
    return { "query": { "bool": {"must": match} } } if match else {}

def download_sensor_data(table, key=None, nodeid=None, start=None, end=None, save=True, k_time='time', **kw):
    query = sensor_query(key, nodeid, start, end, k_time=k_time, **kw)
    print(query)
    
    def pull():
        with tqdm(scroll(es, table, query)) as pbar:
            for i, hits in enumerate(pbar):
                hits = [h['_source'] for h in hits]
                times = [h[k_time] for h in hits]
                pbar.write('{}. n hits: {}. {} - {}'.format(i, len(hits), min(times), max(times)))
                for h in hits:
                    yield h
    if not save:
        return list(pull())

    fname = 'data/{}/{}.json'.format(group, nodeid or table)
    os.makedirs(os.path.dirname(fname), exist_ok=True)
    print(f'Pulling node={nodeid} for ({start} -> {end}) ... saving to {fname}')
    with open(fname, 'w') as f:
        for h in pull():
            f.write(json.dumps(h) + '\n')
    print('all done!')
    return fname

In [None]:
from importlib import reload
import settings
reload(settings)
es = elasticsearch.Elasticsearch('https://es.master1.sonycproject.com', http_auth=('elastic', settings.es_password))
ss = download_sensor_data('status', 'fqdn', 'sonycnode-dca632ceb490', start='now-%id' % (days_look_back+1), save=False)
data = {}
data['ts'] = pd.to_datetime([datetime.fromtimestamp(int(ele['aq']['dt'])) for ele in ss]).tz_localize(tz='US/Eastern')
    
data['piera7100_pm1_vals'] = [ele['aq']['PM1.0'] for ele in ss]
data['piera7100_pm2p5_vals'] = [ele['aq']['PM2.5'] for ele in ss]
data['piera7100_pm10_vals'] = [ele['aq']['PM10'] for ele in ss]

piera7100_df = pd.DataFrame(data)
    
piera7100_df = piera7100_df.set_index('ts').resample('%iT' % avg_over_min).mean()
# purple_df = purple_df.set_index('ts').resample('%iT' % avg_over_min).mean()
# piera7100_df = piera7100_df.loc[st_date_dt:en_date_dt]

In [None]:
piera7100_df = piera7100_df[str(df_weather.index[0]):str(df_weather.index[-1])]
piera7100_df

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import OneClassSVM
df_main = pd.DataFrame({'RefSt': praxis_df["praxis_pm2p5_vals_adj"], 'Sensor_O1': piera7100_df["piera7100_pm2p5_vals"], 'Temp': df_weather["Temperature_C"], 'RelHum': df_weather["Humidity_%"]})

X = df_main[['Sensor_O1','Temp', 'RelHum']]
Y = df_main['RefSt']
X = X.fillna(X.mean())
Y =Y.fillna(Y.mean())


In [None]:
X_MLRtrain, X_MLRtest, Y_MLRtrain, Y_MLRtest = train_test_split(X, Y, test_size = 0.33, random_state = None, shuffle = False)

# ee = OneClassSVM(nu=0.51)
# yhat = ee.fit_predict(X_MLRtrain)
# # select all rows that are not outliers
# mask = yhat != -1
# X_MLRtrain, Y_MLRtrain = X_MLRtrain[mask], Y_MLRtrain[mask]


df_MLRtrain = pd.DataFrame({'RefSt': Y_MLRtrain, 'Sensor_O1': X_MLRtrain["Sensor_O1"],'Temp': X_MLRtrain["Temp"], 'RelHum': X_MLRtrain["RelHum"]})
df_MLRtest = pd.DataFrame({'RefSt': Y_MLRtest, 'Sensor_O1': X_MLRtest["Sensor_O1"],'Temp': X_MLRtest["Temp"], 'RelHum': X_MLRtest["RelHum"]})


In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

def loss_functions(y_true, y_pred):
    print("Loss functions:")
    print("* R-squared =", r2_score(y_true, y_pred))
    print("* RMSE =", mean_squared_error(y_true, y_pred))
    print("* MAE =", mean_absolute_error(y_true, y_pred))


# %%
# Normalise sensor data
def normalize(col):
    μ = col.mean()
    sig = col.std()
    return (col - μ)/sig

df_main["normRefSt"] = normalize(df_main["RefSt"])
df_main["normSensor_O3"] = normalize(df_main["Sensor_O1"])
df_main["normTemp"] = normalize(df_main["Temp"])
df_main["normRelHum"] = normalize(df_main["RelHum"])

In [None]:
Sensor_O3_RefSt_factor = df_main[["Sensor_O1", "RefSt"]]
#Sensor_O3_RefSt_factor["RefSt"] = Sensor_O3_RefSt_factor["RefSt"]
Sensor_O3_RefSt_factor.plot()

In [None]:
from sklearn.linear_model import LinearRegression

import seaborn as sns
# Model
lr = LinearRegression()


# Fit
lr.fit(X_MLRtrain, Y_MLRtrain)

# Get MLR coefficients
print('Intercept: \n', lr.intercept_)
print('Coefficients: \n', lr.coef_)

# Predict
df_MLRtest["MLR_Pred"] = lr.intercept_ + lr.coef_[0]*df_MLRtest["Sensor_O1"] + lr.coef_[1]*df_MLRtest["Temp"] + lr.coef_[2]*df_MLRtest["RelHum"]

# Plot linear
df_MLRtest[["RefSt", "MLR_Pred"]].plot()
print(lr.score(X_MLRtrain, Y_MLRtrain))


# Plot regression
sns.lmplot(x = 'RefSt', y = 'MLR_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 

# Loss
loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["MLR_Pred"])
df_MLRtest[["RefSt", "MLR_Pred"]].plot()

In [None]:
plt.figure(figsize=(30,7))
plt.plot(df_MLRtest[["RefSt", "MLR_Pred"]])

In [None]:
#With stochastic Gradient Descent
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
# Model
# sgdr = SGDRegressor(loss='squared_loss', alpha=.001, tol=1e-5)
sgdr = SGDRegressor(loss = 'squared_loss', max_iter = 5)

# Normalize
sc = StandardScaler()
X_MLR_SGDtrain = sc.fit_transform(X_MLRtrain)
X_MLR_SGDtest = sc.transform(X_MLRtest)

# Fit
sgdr.fit(X_MLR_SGDtrain, Y_MLRtrain)

# Get MLR coefficients
print('Intercept: \n', sgdr.intercept_)
print('Coefficients: \n', sgdr.coef_)
print('Iters: \n', sgdr.n_iter_)
print(sgdr.get_params())


# Predict
#df_MLRtest["MLR_SGD_Pred"] = sgdr.intercept_ + sgdr.coef_[0]*df_MLRtest["Sensor_O1"] + sgdr.coef_[1]*df_MLRtest["Temp"]+sgdr.coef_[2]*df_MLRtest["RelHum"]
df_MLRtest["MLR_SGD_Pred"] = sgdr.predict(X_MLR_SGDtest)

# Plot linear
df_MLRtest[["RefSt", "MLR_SGD_Pred"]].plot()


# Plot regression
sns.lmplot(x = 'RefSt', y = 'MLR_SGD_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 

# Loss
loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["MLR_SGD_Pred"])
df_MLRtest[["RefSt", "MLR_SGD_Pred"]].plot()

In [None]:
plt.figure(figsize=(30,7))
plt.plot(df_MLRtest[["RefSt", "MLR_SGD_Pred"]])

In [None]:
# %%
# K-Nearest Neighbor
from sklearn.neighbors import KNeighborsRegressor

# Model
knn = KNeighborsRegressor(n_neighbors = 19)

# Fit
knn.fit(X_MLRtrain, Y_MLRtrain)

# Predict
df_MLRtest["KNN_Pred"] = knn.predict(X_MLRtest)
# print(df_MLRtest)

# Plot linear
# df_MLRtest[["RefSt", "KNN_Pred"]].plot()
# plt.xticks(rotation=20)

# Plot regression
# sns.lmplot(x = 'RefSt', y = 'KNN_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 

# Loss
loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["KNN_Pred"])
df_MLRtest[["RefSt", "KNN_Pred"]].plot()

In [None]:
plt.figure(figsize=(30,7))
plt.plot(df_MLRtest[["RefSt", "KNN_Pred"]])

In [None]:
# # %% uncomment to run KNN with hyper parameters
# # K-Nearest Neighbor stats vs. hyperparameters
# def knn_stats():
#     knn_aux = pd.DataFrame({'RefSt': Y_MLRtest})

#     n_neighbors = [*range(1, 151, 1)]
#     r_squared = []
#     rmse = []
#     mae = []
#     time_ms = []

#     for i in n_neighbors:
#         # Model
#         knn = KNeighborsRegressor(n_neighbors=i)

#         # Fit
#         start_time = float(datetime.now().strftime('%S.%f'))
#         knn.fit(X_MLRtrain, Y_MLRtrain)
#         end_time = float(datetime.now().strftime('%S.%f'))
#         execution_time = (end_time - start_time) * 1000

#         # Predict
#         knn_aux["KNN_Pred"] = knn.predict(X_MLRtest)
        

#         # Loss
#         r_squared.append(r2_score(knn_aux["RefSt"], knn_aux["KNN_Pred"]))
#         rmse.append(mean_squared_error(knn_aux["RefSt"], knn_aux["KNN_Pred"]))
#         mae.append(mean_absolute_error(knn_aux["RefSt"], knn_aux["KNN_Pred"]))
#         time_ms.append(execution_time)

#     knn_stats = pd.DataFrame({'k': n_neighbors, 'r_squared': r_squared, 'rmse': rmse, 'mae': mae, 'time_ms': time_ms})
#     knn_stats = knn_stats.set_index('k') # index column (X axis for the plots)
#     print(knn_stats)
#     plt.plot(knn_aux[["RefSt", "KNN_Pred"]])
#     # plot
#     # knn_stats[["r_squared"]].plot()
#     # knn_stats[["rmse"]].plot()
#     # knn_stats[["mae"]].plot()
#     # knn_stats[["time_ms"]].plot()
    
# knn_stats()

In [None]:
df_KNN = pd.DataFrame({'RefSt': praxis_df["praxis_pm2p5_vals_adj"], 'Sensor_O1': piera7100_df["piera7100_pm2p5_vals"], 'Temp': df_weather["Temperature_C"], 'RelHum': df_weather["Humidity_%"]})
df_KNN["KNN"] = knn.predict(X)
df_KNN = pd.DataFrame({'RefSt': praxis_df["praxis_pm2p5_vals_adj"], 'Sensor_KNN': df_KNN["KNN"]})
Sensor_plot_KNN = df_KNN[["RefSt","Sensor_KNN"]]
Sensor_plot_KNN.plot()

In [None]:
# %%
# Random Forest
from sklearn.ensemble import RandomForestRegressor

# Model
rf = RandomForestRegressor(n_estimators = 20 ,random_state = 0)

# Fit
rf.fit(X_MLRtrain, Y_MLRtrain)

# Predict
df_MLRtest["RF_Pred"] = rf.predict(X_MLRtest)
# print(df_MLRtest)

# Plot linear
# df_MLRtest[["RefSt", "RF_Pred"]].plot()
# plt.xticks(rotation = 20)

# Plot regression
# sns.lmplot(x = 'RefSt', y = 'RF_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 

# Loss
loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["RF_Pred"])

# RF feature importances
print('Feature importances:\n', list(zip(X.columns, rf.feature_importances_)))
df_MLRtest[["RefSt", "RF_Pred"]].plot()

In [None]:
plt.figure(figsize=(30,7))
plt.plot(df_MLRtest[["RefSt","RF_Pred"]])

In [None]:
df_RF = pd.DataFrame({'RefSt': praxis_df["praxis_pm2p5_vals_adj"], 'Sensor_O1': piera7100_df["piera7100_pm2p5_vals"], 'Temp': df_weather["Temperature_C"], 'RelHum': df_weather["Humidity_%"]})
df_RF["RF"] = rf.predict(X)
df_RF = pd.DataFrame({'RefSt': praxis_df["praxis_pm2p5_vals_adj"], 'Sensor_RF': df_RF["RF"]})
Sensor_plot_RF = df_RF[["RefSt", "Sensor_RF"]]
Sensor_plot_RF.plot()

In [None]:
# # %%
# # Kernel Regression
# # from sklearn_extensions.kernel_regression import KernelRegression
# from sklearn.kernel_ridge import KernelRidge

# # Models
# kr_rbf = KernelRidge(kernel = "rbf")
# kr_poly = KernelRidge(kernel = "poly", degree = 4)

# # Fit
# kr_rbf.fit(X_MLRtrain, Y_MLRtrain)
# kr_poly.fit(X_MLRtrain, Y_MLRtrain)

# # Predict
# df_MLRtest["KR_RBF_Pred"] = kr_rbf.predict(X_MLRtest)
# df_MLRtest["KR_Poly_Pred"] = kr_poly.predict(X_MLRtest)

# # Plot linear
# df_MLRtest[["RefSt", "KR_RBF_Pred", "KR_Poly_Pred"]].plot()
# plt.xticks(rotation=20)

# # Plot regression
# sns.lmplot(x = 'RefSt', y = 'KR_RBF_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 
# sns.lmplot(x = 'RefSt', y = 'KR_Poly_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 

# # Loss
# loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["KR_RBF_Pred"])
# loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["KR_Poly_Pred"])


In [None]:
# # %%
# # Gaussian Process
# from sklearn.gaussian_process import GaussianProcessRegressor
# from sklearn.gaussian_process.kernels import ConstantKernel, RBF, DotProduct, WhiteKernel

# # Kernels definition
# # rbf = ConstantKernel(constant_value=1.0, constant_value_bounds=(1e-10, 1e10)) * RBF(length_scale=1.0, length_scale_bounds=(1e-10, 1e10))
# rbf = ConstantKernel() * RBF()
# dpwh = DotProduct() + WhiteKernel()

# # Models
# gp_rbf = GaussianProcessRegressor(kernel = rbf, alpha = 150, random_state = 0)
# gp_dpwh = GaussianProcessRegressor(kernel = dpwh, alpha = 150, random_state = 0)

# # Fit
# gp_rbf.fit(X_MLRtrain, Y_MLRtrain)
# gp_dpwh.fit(X_MLRtrain, Y_MLRtrain)

# # Predict
# df_MLRtest["GP_RBF_Pred"] = gp_rbf.predict(X_MLRtest)
# df_MLRtest["GP_DPWK_Pred"] = gp_dpwh.predict(X_MLRtest)

# # Obtain optimized kernel parameters
# # l = gp.kernel_.k2.get_params()['length_scale']
# # sigma_f = np.sqrt(gp.kernel_.k1.get_params()['constant_value'])

# # Print parameters
# print("RBF params", gp_rbf.get_params())
# print("Dot params", gp_dpwh.get_params())

# # Plot linear
# df_MLRtest[["RefSt", "GP_RBF_Pred", "GP_DPWK_Pred"]].plot()
# plt.xticks(rotation = 20)

# # Plot regression
# sns.lmplot(x = 'RefSt', y = 'GP_RBF_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 
# sns.lmplot(x = 'RefSt', y = 'GP_DPWK_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 

# # Loss
# loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["GP_RBF_Pred"])
# loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["GP_DPWK_Pred"])

In [None]:
##Using radial basis function kernel
# Support Vector Regression
from sklearn.svm import SVR

# Models
svr_rbf = SVR(kernel = 'rbf', C = 1e3)#, gamma=0.1)

# Fit
svr_rbf.fit(X_MLRtrain, Y_MLRtrain)

print('Intercept: \n', svr_rbf.intercept_)

# Predict
df_MLRtest["SVR_RBF_Pred"] = svr_rbf.predict(X_MLRtest)

# Plot regression
sns.lmplot(x = 'RefSt', y = 'SVR_RBF_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 

# Loss
loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["SVR_RBF_Pred"])

df_MLRtest[["RefSt", "SVR_RBF_Pred"]].plot()

In [None]:
plt.figure(figsize=(30,7))
plt.plot(df_MLRtest[["RefSt","SVR_RBF_Pred"]])

In [None]:
# # Not Ideal as it will take a lot of time to compute
# # Support Vector Regression
# from sklearn.svm import SVR

# # Models
# svr_lin = SVR(kernel = 'linear', C = 1e3)


# # Fit

# svr_lin.fit(X_MLRtrain, Y_MLRtrain)

# print('Intercept: \n', svr_lin.intercept_)
# # print('Coefficients: \n', svr_lin.coef_)

# # Predict
# df_MLRtest["SVR_Line_Pred"] = svr_lin.predict(X_MLRtest)


# # Plot regression
# sns.lmplot(x = 'RefSt', y = 'SVR_Line_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 


# # Loss
# loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["SVR_Line_Pred"])

# df_MLRtest[["RefSt", "SVR_Line_Pred"]].plot()

In [None]:
# # # Support Vector Regression using poly (Not Ideal as it takes a lot of time to train)
# from sklearn.svm import SVR

# # Models
# svr_poly = SVR(kernel = 'poly', C = 1e3, degree = 3)

# # Fit
# svr_poly.fit(X_MLRtrain, Y_MLRtrain)
# print('Intercept: \n', svr_poly.intercept_)
# #print('Coefficients: \n', svr_poly.coef_)

# # Predict
# df_MLRtest["SVR_Poly_Pred"] = svr_poly.predict(X_MLRtest)

# # Plot regression

# sns.lmplot(x = 'RefSt', y = 'SVR_Poly_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 

# # Loss

# loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["SVR_Poly_Pred"])
# df_MLRtest[["RefSt", "SVR_Poly_Pred"]].plot()

In [None]:
# df_SVR = pd.DataFrame({'RefSt': praxis_df2["praxis_pm2p5_vals_adj"], 'Sensor_O1': canary_df["canary_pm2p5_vals"], 'Temp': canary_df["canary_temp"], 'RelHum': canary_df["canary_rh"]})
# df_SVR["SVR_rbf"] = svr_rbf.predict(X)
# # df_SVR["SVR_lin"] = svr_lin.predict(X)
# # df_SVR["SVR_poly"] = svr_poly.predict(X)
# df_SVR = pd.DataFrame({'RefSt': praxis_df2["praxis_pm2p5_vals_adj"], 'Sensor_RF': df_SVR["SVR_rbf"]})
# # df_SVR = pd.DataFrame({'RefSt': praxis_df2["praxis_pm2p5_vals_adj"], 'Sensor_RF_rbf': df_SVR["SVR_rbf"], 'Sensor_RF_lin': df_SVR["SVR_lin"], 'Sensor_RF_poly': df_SVR["SVR_poly"]})
# Sensor_plot_SVR = df_SVR[["Sensor_RF", "RefSt"]]
# Sensor_plot_SVR.plot()

In [None]:
# %%
# Neural Network
import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, InputLayer
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import StandardScaler

print(tf.__version__)

# Normalise data
sc = StandardScaler()
X_train_NN = sc.fit_transform(X_MLRtrain)
X_test_NN = sc.transform(X_MLRtest)
Y_MLRtrain_NN = Y_MLRtrain
# Model
nn = Sequential()

# Model - Layers
nn.add(InputLayer(input_shape = (3))) # Input layer
nn.add(Dense(units = 64, activation = 'relu')) # 1st hidden layer
nn.add(Dense(units = 64, activation = 'relu')) # 2nd hidden layer
nn.add(Dense(units = 64, activation = 'relu')) # 3rd hidden layer
nn.add(Dense(units = 64, activation = 'relu')) # 4th hidden layer
nn.add(Dense(units = 64, activation = 'relu')) # 5th hidden layer
nn.add(Dense(units = 64, activation = 'relu')) # 6th hidden layer
nn.add(Dense(units = 64, activation = 'relu')) # 7th hidden layer
nn.add(Dense(units = 1)) # Output layer

nn.compile(optimizer = 'adam', loss = 'mean_squared_error')

# Fit
history = nn.fit(X_train_NN, Y_MLRtrain_NN, batch_size = 5, epochs = 20)

# Plot loss
plt.plot(history.history['loss'][5:])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
# plt.legend(['train', 'val'], loc='upper left')
plt.show()

# Predict
df_MLRtest["NN_Pred"] = nn.predict(X_MLRtest)
print(df_MLRtest)

# Plot linear
# df_MLRtest[["RefSt", "NN_Pred"]].plot()
# plt.xticks(rotation=20)

# Plot regression
sns.lmplot(x = 'RefSt', y = 'NN_Pred', data = df_MLRtest, fit_reg = True, line_kws = {'color': 'orange'}) 

# Loss
loss_functions(y_true = df_MLRtest["RefSt"], y_pred = df_MLRtest["NN_Pred"])

In [None]:
plt.figure(figsize=(30,7))
# df_SVR = pd.DataFrame({'RefSt': praxis_df2["praxis_pm2p5_vals_adj"], 'Sensor_O1': canary_df["canary_pm2p5_vals"], 'Temp': canary_df["canary_temp"], 'RelHum': canary_df["canary_rh"]})
# df_SVR["SVR_rbf"] = svr_rbf.predict(X)
# df_SVR["SVR_lin"] = svr_lin.predict(X)
# df_SVR["SVR_poly"] = svr_poly.predict(X)
df_NN = pd.DataFrame({'RefSt': praxis_df["praxis_pm2p5_vals_adj"], 'Sensor_NN': df_MLRtest["NN_Pred"]})
# df_SVR = pd.DataFrame({'RefSt': praxis_df2["praxis_pm2p5_vals_adj"], 'Sensor_RF_rbf': df_SVR["SVR_rbf"], 'Sensor_RF_lin': df_SVR["SVR_lin"], 'Sensor_RF_poly': df_SVR["SVR_poly"]})
Sensor_plot_NN = df_NN[["Sensor_NN", "RefSt"]]
plt.plot(Sensor_plot_NN)