In [8]:
%%capture
import warnings
warnings.filterwarnings('ignore')
import ipywidgets as widgets
from IPython.display import display, clear_output

!jupyter nbextension enable --py widgetsnbextension --sys-prefix
!jupyter serverextension enable voila 

In [9]:
%%capture
!jupyter serverextension list

In [10]:
%%capture
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn import model_selection, preprocessing, feature_selection, ensemble, linear_model, metrics, decomposition

dtf1 = pd.read_csv('arkansas1Bayer.csv')
dtf2 = pd.read_csv('arkansasBayer.csv')

X = dtf1.drop([], axis=1).dropna(axis='rows')
Y = dtf2.drop([], axis=1).dropna(axis='rows')
listOfColumnNames = list(X)


from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.7, random_state=42)

from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, Y_train)
importance = abs(lr.coef_.flatten())
#print(importance)
#for i,v in enumerate(importance):
#	print('Feature: %0d, Score: %.5f' % (i,v))
# plot feature importance
y_lr_train_pred = lr.predict(X_train)
y_lr_test_pred = lr.predict(X_test)

from sklearn.metrics import mean_squared_error, r2_score
lr_train_mse = mean_squared_error(Y_train, y_lr_train_pred)
lr_train_r2 = r2_score(Y_train, y_lr_train_pred)
lr_test_mse = mean_squared_error(Y_test, y_lr_test_pred)
lr_test_r2 = r2_score(Y_test, y_lr_test_pred)
lr_results = pd.DataFrame(['Linear regression',lr_train_mse, lr_train_r2, lr_test_mse, lr_test_r2]).transpose()
lr_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=800,max_depth=15,min_samples_split=2,random_state=42)
rf.fit(X_train, Y_train)
y_rf_train_pred = rf.predict(X_train)
y_rf_test_pred = rf.predict(X_test)
rf_train_mse = mean_squared_error(Y_train, y_rf_train_pred)
rf_train_r2 = r2_score(Y_train, y_rf_train_pred)
rf_test_mse = mean_squared_error(Y_test, y_rf_test_pred)
rf_test_r2 = r2_score(Y_test, y_rf_test_pred)
rf_results = pd.DataFrame(['Random forest',rf_train_mse, rf_train_r2, rf_test_mse, rf_test_r2]).transpose()
rf_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']



from sklearn.neighbors import KNeighborsRegressor
model = KNeighborsRegressor(n_neighbors=5,weights='distance',algorithm='brute',leaf_size=25, p=1)
model.fit(X_train, Y_train)
y_NR_train_pred = model.predict(X_train)
y_NR_test_pred = model.predict(X_test)

NR_train_mse = mean_squared_error(Y_train, y_NR_train_pred)
NR_train_r2 = r2_score(Y_train, y_NR_train_pred)
NR_test_mse = mean_squared_error(Y_test, y_NR_test_pred)
NR_test_r2 = r2_score(Y_test, y_NR_test_pred)

NR_results = pd.DataFrame(['KNeighborsRegressor',NR_train_mse, NR_train_r2, NR_test_mse, NR_test_r2]).transpose()
NR_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from xgboost.sklearn import XGBRegressor
model1 = XGBRegressor(base_score=0.5,learning_rate=0.2,
       max_depth=1, min_child_weight=1,  n_estimators=100,random_state=42,
       importance_type='gain')
model1.fit(X_train, Y_train)
y_XGB_train_pred = model1.predict(X_train)
y_XGB_test_pred = model1.predict(X_test)

XGB_train_mse = mean_squared_error(Y_train, y_XGB_train_pred)
XGB_train_r2 = r2_score(Y_train, y_XGB_train_pred)
XGB_test_mse = mean_squared_error(Y_test, y_XGB_test_pred)
XGB_test_r2 = r2_score(Y_test, y_XGB_test_pred)

XGB_results = pd.DataFrame(['XGB',XGB_train_mse, XGB_train_r2, XGB_test_mse, XGB_test_r2]).transpose()
XGB_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


# from lightgbm import LGBMRegressor
# model2 = LGBMRegressor(boosting_type='gbdt',colsample_bytree=1,num_leaves=50,min_child_samples =4, max_depth=3, learning_rate=0.15, n_estimators=70,reg_alpha=1.0, reg_lambda=3.0)
# model2.fit(X_train, Y_train)
# y_LGBM_train_pred = model2.predict(X_train)
# y_LGBM_test_pred = model2.predict(X_test)

# LGBM_train_mse = mean_squared_error(Y_train, y_LGBM_train_pred)
# LGBM_train_r2 = r2_score(Y_train, y_LGBM_train_pred)
# LGBM_test_mse = mean_squared_error(Y_test, y_LGBM_test_pred)
# LGBM_test_r2 = r2_score(Y_test, y_LGBM_test_pred)

# LGBM_results = pd.DataFrame(['LGBM',LGBM_train_mse, LGBM_train_r2, LGBM_test_mse, LGBM_test_r2]).transpose()
# LGBM_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from catboost import CatBoostRegressor
model3 = CatBoostRegressor(iterations=500,
                        learning_rate=.1,
                        depth=2,
                        model_size_reg=None,loss_function='RMSE',l2_leaf_reg=2,verbose=False)
model3.fit(X_train, Y_train)
y_Cat_train_pred = model3.predict(X_train)
y_Cat_test_pred = model3.predict(X_test)

Cat_train_mse = mean_squared_error(Y_train, y_Cat_train_pred)
Cat_train_r2 = r2_score(Y_train, y_Cat_train_pred)
Cat_test_mse = mean_squared_error(Y_test, y_Cat_test_pred)
Cat_test_r2 = r2_score(Y_test, y_Cat_test_pred)

Cat_results = pd.DataFrame(['Cat',Cat_train_mse, Cat_train_r2, Cat_test_mse, Cat_test_r2]).transpose()
Cat_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from sklearn.linear_model import SGDRegressor
model4 = SGDRegressor(loss='squared_epsilon_insensitive',alpha=.0000010,penalty ='l1',max_iter=1000, eta0=0.000008,validation_fraction=0.1,average=100, random_state=42,power_t=0.0001,tol=5.2)
model4.fit(X_train, Y_train)
y_SGD_train_pred = model4.predict(X_train)
y_SGD_test_pred = model4.predict(X_test)

SGD_train_mse = mean_squared_error(Y_train, y_SGD_train_pred)
SGD_train_r2 = r2_score(Y_train, y_SGD_train_pred)
SGD_test_mse = mean_squared_error(Y_test, y_SGD_test_pred)
SGD_test_r2 = r2_score(Y_test, y_SGD_test_pred)

SGD_results = pd.DataFrame(['SGD',SGD_train_mse, SGD_train_r2, SGD_test_mse, SGD_test_r2]).transpose()
SGD_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from sklearn.kernel_ridge import KernelRidge
model5 = KernelRidge(alpha=1, kernel='linear', degree=5, coef0=5)
model5.fit(X_train, Y_train)
y_Kernel_train_pred = model5.predict(X_train)
y_Kernel_test_pred = model5.predict(X_test)

Kernel_train_mse = mean_squared_error(Y_train, y_Kernel_train_pred)
Kernel_train_r2 = r2_score(Y_train, y_Kernel_train_pred)
Kernel_test_mse = mean_squared_error(Y_test, y_Kernel_test_pred)
Kernel_test_r2 = r2_score(Y_test, y_Kernel_test_pred)

Kernel_results = pd.DataFrame(['Kernel',Kernel_train_mse, Kernel_train_r2, Kernel_test_mse, Kernel_test_r2]).transpose()
Kernel_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from sklearn.linear_model import ElasticNet
model6 = ElasticNet(alpha=.0001,l1_ratio=0.00005,max_iter=100, tol=0.001, random_state=42)
model6.fit(X_train, Y_train)
y_EN_train_pred = model6.predict(X_train)
y_EN_test_pred = model6.predict(X_test)

EN_train_mse = mean_squared_error(Y_train, y_EN_train_pred)
EN_train_r2 = r2_score(Y_train, y_EN_train_pred)
EN_test_mse = mean_squared_error(Y_test, y_EN_test_pred)
EN_test_r2 = r2_score(Y_test, y_EN_test_pred)

EN_results = pd.DataFrame(['EN',EN_train_mse, EN_train_r2, EN_test_mse, EN_test_r2]).transpose()
EN_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from sklearn.linear_model import BayesianRidge
model7 = BayesianRidge(n_iter=5000, tol=0.001, alpha_1=5e-03, alpha_2=1e-03, lambda_1=1e-03, lambda_2=1e-03)
model7.fit(X_train, Y_train)
y_Bay_train_pred = model7.predict(X_train)
y_Bay_test_pred = model7.predict(X_test)

Bay_train_mse = mean_squared_error(Y_train, y_Bay_train_pred)
Bay_train_r2 = r2_score(Y_train, y_Bay_train_pred)
Bay_test_mse = mean_squared_error(Y_test, y_Bay_test_pred)
Bay_test_r2 = r2_score(Y_test, y_Bay_test_pred)

Bay_results = pd.DataFrame(['Bay',Bay_train_mse, Bay_train_r2, Bay_test_mse, Bay_test_r2]).transpose()
Bay_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from sklearn.ensemble import GradientBoostingRegressor
model8 = GradientBoostingRegressor(alpha=.1,max_features=3,min_impurity_decrease=3,loss='absolute_error', learning_rate=0.1, n_estimators=500, max_depth=1,min_samples_leaf=1)
model8.fit(X_train, Y_train)
y_GBR_train_pred = model8.predict(X_train)
y_GBR_test_pred = model8.predict(X_test)

GBR_train_mse = mean_squared_error(Y_train, y_GBR_train_pred)
GBR_train_r2 = r2_score(Y_train, y_GBR_train_pred)
GBR_test_mse = mean_squared_error(Y_test, y_GBR_test_pred)
GBR_test_r2 = r2_score(Y_test, y_GBR_test_pred)

GBR_results = pd.DataFrame(['GBR',NR_train_mse, GBR_train_r2, GBR_test_mse, GBR_test_r2]).transpose()
GBR_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from sklearn.svm import SVR
model9 = SVR(kernel='linear',degree=5,gamma='auto',coef0=1.0, tol=0.5, C=1.5, epsilon=0.5,cache_size=50)
model9.fit(X_train, Y_train)
y_SVR_train_pred = model9.predict(X_train)
y_SVR_test_pred = model9.predict(X_test)

SVR_train_mse = mean_squared_error(Y_train, y_SVR_train_pred)
SVR_train_r2 = r2_score(Y_train, y_SVR_train_pred)
SVR_test_mse = mean_squared_error(Y_test, y_SVR_test_pred)
SVR_test_r2 = r2_score(Y_test, y_SVR_test_pred)

SVR_results = pd.DataFrame(['SVR',SVR_train_mse, SVR_train_r2, SVR_test_mse, SVR_test_r2]).transpose()
SVR_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


from sklearn.tree import DecisionTreeRegressor
model10 = DecisionTreeRegressor(min_impurity_decrease=.001,criterion='absolute_error',max_depth=19,min_samples_split=7,random_state=42)
model10.fit(X_train, Y_train)
y_DTR_train_pred = model10.predict(X_train)
y_DTR_test_pred = model10.predict(X_test)

DTR_train_mse = mean_squared_error(Y_train, y_DTR_train_pred)
DTR_train_r2 = r2_score(Y_train, y_DTR_train_pred)
DTR_test_mse = mean_squared_error(Y_test, y_DTR_test_pred)
DTR_test_r2 = r2_score(Y_test, y_DTR_test_pred)

DTR_results = pd.DataFrame(['DTR',DTR_train_mse, DTR_train_r2, DTR_test_mse, DTR_test_r2]).transpose()
DTR_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']

In [11]:
%%capture
df = pd.read_csv('arkansas1Bayer.csv')
df = df.drop([], axis=1).dropna(axis='rows')
mL = pd.DataFrame()
y_rf_test_pred = rf.predict(df)
mL['rf'] = y_rf_test_pred

y_lr_test_pred = lr.predict(df)
mL['lr'] = y_lr_test_pred

y_XGB_test_pred = model1.predict(df)
mL['XGB'] = y_XGB_test_pred

# y_LGBM_test_pred = model2.predict(df)
# mL['LGBM'] = y_LGBM_test_pred

y_Cat_test_pred = model3.predict(df)
mL['Cat'] = y_Cat_test_pred


y_SGD_test_pred = model4.predict(df)
mL['SGD'] = y_SGD_test_pred


y_Kernel_test_pred = model5.predict(df)
mL['Kernel'] = y_Kernel_test_pred


y_EN_test_pred = model6.predict(df)
mL['EN'] = y_EN_test_pred


y_Bay_test_pred = model7.predict(df)
mL['Bay'] = y_Bay_test_pred


y_GBR_test_pred = model8.predict(df)
mL['GBR'] = y_GBR_test_pred


y_SVR_test_pred = model9.predict(df)
mL['SVR'] = y_SVR_test_pred


y_DTR_test_pred = model10.predict(df)
mL['DTR'] = y_DTR_test_pred
mL

In [12]:
%%capture
X = mL
Y = pd.read_csv('arkansasBayer1.csv')
#Y = dtf2.drop([], axis=1).dropna(axis='rows')
r_array = []
mse_array= []
error_array= []
for i in range(1, 2):
    from sklearn.model_selection import train_test_split
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=.2, random_state=42)
    import tensorflow as tf
    from keras.models import Sequential
    from keras.layers import Dense, Conv1D, Flatten
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import mean_squared_error
    import matplotlib.pyplot as plt



    tf.keras.backend.clear_session()
    model = Sequential()
    model.add(Conv1D(32, 3, activation="relu", input_shape=(11,1)))
    model.add(Flatten())
    model.add(Dense(64, activation="relu"))
    model.add(Dense(1))
    model.compile(loss="mse", optimizer="adam")
    model.summary()
    model.fit(X_train, Y_train, batch_size=1,epochs=100, verbose=0)
    #history = model.fit(X_train, Y_train, epochs=1000,
     #                   validation_split=0.2, verbose=0)
    ypred = model.predict(X_test)
    #print(model.evaluate(X_train, Y_train))
    #print("MSE: %.3f" % mean_squared_error(Y_test, ypred))


    from sklearn.metrics import mean_squared_error, r2_score
    y_rf_train_pred = model.predict(X_train)
    y_rf_test_pred = model.predict(X_test)
    rf_train_mse = mean_squared_error(Y_train, y_rf_train_pred)
    rf_train_r2 = r2_score(Y_train, y_rf_train_pred)
    rf_test_mse = mean_squared_error(Y_test, y_rf_test_pred)
    rf_test_r2 = r2_score(Y_test, y_rf_test_pred)
    

    rf_results = pd.DataFrame(['Super Learner',rf_train_mse, rf_train_r2, rf_test_mse, rf_test_r2]).transpose()
    rf_results.columns = ['Method','Training MSE','Training R2','Test MSE','Test R2']


In [49]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact
import ipywidgets as widgets
from scipy.optimize import minimize_scalar
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd




def equation(x):
    return dataCol[x]

def find_saturation_point(dataCol, lower_bound, upper_bound, tolerance=.25):
    # Use an iterative approach to find the saturation point
    x = lower_bound
    counter = 0
    counterTolerance = 4

    while x < upper_bound and counter<counterTolerance:
        if(abs(equation(int(x+1))-equation(int(x))) < (tolerance)):
            counter+=1
        x += 1
        
        
        
    return (x)


def interactive_plot(Prototype,lowBound1,highBound1,lowBound2,highBound2):
    sheet_id = '1IEFPaOT6y0N92Vu1ItKzUhrDpizdPs9j6ooBm9iXh14'
    xls = pd.ExcelFile(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx")
    sheetNum=Prototype
    data = pd.read_excel(xls, str(sheetNum),header = 0)
    initiData=data['CO2'].iloc[0]
    referenceData = 429
    updateData = referenceData/initiData
    data["CO2"] = data["CO2"]*updateData
    data = data[data.CO2 != 0]
    data = data.fillna(0)

    global dList 
    dList=data.index.tolist()
    dataCol=data['CO2'].tolist()

    fig = plt.figure(figsize=(18, 10))  # Adjust the figsize as needed
    plt.plot(dList,dataCol)
    plt.axvline(x=lowBound1, color = 'b', linestyle='--',)
    plt.axvline(x=highBound1, color = 'g', linestyle='--',)
    plt.axvline(x=lowBound2, color = 'b', linestyle='--',)
    plt.axvline(x=highBound2, color = 'g', linestyle='--',)
    
    tempAverage = data['Temperature'].loc[lowBound1:highBound2].mean()
    moistAverage = data['Moisture'].loc[lowBound1:highBound2].mean()

    depth = 15
    crop = 0
    harvest = 0
    slope_intercept = np.polyfit(dList,dataCol,1)
    slope =abs(slope_intercept[0])

    plt.title('Prototype '+str(sheetNum)+ ' CO2 Measurements', fontsize=16)
    plt.ylabel('CO2 Concentration (ppm)', fontsize=16)
    plt.xlabel('Time (minutes)', fontsize=16)
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)

    if(lowBound1<highBound1 and lowBound2<highBound2):
        print("First Low Boundary: ",lowBound1)
        print("First High Boundary: ",highBound1)
        saturation_point = find_saturation_point(equation, lowBound1, highBound1)
        # print("Saturation Point:", saturation_point)
        # print("Saturation Y-Value:", dataCol[int(saturation_point)])
        print("Second Low Boundary: ",lowBound2)
        print("Second High Boundary: ",highBound2)
        saturation_point2 = find_saturation_point(equation, lowBound2, highBound2)
        # print("Saturation Point:", saturation_point2)
        # print("Saturation Y-Value:", dataCol[int(saturation_point2)])
        
        PO = dataCol[int(saturation_point2)]-dataCol[int(saturation_point)]
        print("Photo-Oxidated CO2: ", PO)
            
        d = {'PO':[PO], 'Depth': [depth], 'Moist': [moistAverage], 'PO_Rate': [slope,] , 'Harvest': [harvest]
             , 'Crop': [crop], 'Temp':[tempAverage]}
        df = pd.DataFrame(data=d)
        smL = pd.DataFrame()
        y_rf_test_pred = rf.predict(df)
        smL['rf'] = y_rf_test_pred

        y_lr_test_pred = lr.predict(df)
        smL['lr'] = y_lr_test_pred

        y_XGB_test_pred = model1.predict(df)
        smL['XGB'] = y_XGB_test_pred

        # y_LGBM_test_pred = model2.predict(df)
        # smL['LGBM'] = y_LGBM_test_pred

        y_Cat_test_pred = model3.predict(df)
        smL['Cat'] = y_Cat_test_pred


        y_SGD_test_pred = model4.predict(df)
        smL['SGD'] = y_SGD_test_pred


        y_Kernel_test_pred = model5.predict(df)
        smL['Kernel'] = y_Kernel_test_pred


        y_EN_test_pred = model6.predict(df)
        smL['EN'] = y_EN_test_pred


        y_Bay_test_pred = model7.predict(df)
        smL['Bay'] = y_Bay_test_pred


        y_GBR_test_pred = model8.predict(df)
        smL['GBR'] = y_GBR_test_pred


        y_SVR_test_pred = model9.predict(df)
        smL['SVR'] = y_SVR_test_pred


        y_DTR_test_pred = model10.predict(df)
        smL['DTR'] = y_DTR_test_pred

        ypred = model.predict(smL)        
        print("\033[1mSOC: \033[0m",str(*[str(row)[1:-1] for row in ypred])+"%")
        print("Average Temperature",str(round(tempAverage,2))+"°C")
        print("Average Moisture",str(round(moistAverage,2))+"%")

        
    
    else:
        print("Lower Boundary: ",lowBound1)
        print("Higher Boundary: ",highBound1)
        print("lowBound must be less than highBound")

    
interact(interactive_plot,Prototype= widgets.IntText(),
         lowBound1=widgets.IntText(min=min(dList),max=max(dList))
         ,highBound1=widgets.IntText(min=min(dList),max=max(dList))
         ,lowBound2=widgets.IntText(min=min(dList),max=max(dList))
         ,highBound2=widgets.IntText(min=min(dList),max=max(dList)))

#interact(interactive_plot, lowBound1=(min=min(dList),max=max(dList))
#         ,highBound1=(min=min(dList),max=max(dList))
#         ,lowBound2=(min=min(dList),max=max(dList))
#         ,highBound2=(min=min(dList),max=max(dList)))

interactive(children=(IntText(value=0, description='Prototype'), IntText(value=0, description='lowBound1'), In…

<function __main__.interactive_plot(Prototype, lowBound1, highBound1, lowBound2, highBound2)>

In [47]:
data = pd.read_excel(xls, '7',header = 0)
data = data[data.CO2 != 0]
data = data.fillna(0)
 
dList=data.index.tolist()
dataCol=data['CO2'].tolist()

In [48]:
dataCol[1100]

404

In [45]:
data

Unnamed: 0,Address,Date,Prototype,CO2,Temperature,Moisture,#VALUE!,Constant,CO2 Average,Temp Average,Moist Average
0,e00fce68dcaefb9db946ce06,2022-10-28 13:53:06.425,8,569,23.639999,12,437.614565,1.300231,#VALUE!,18.01729,63.938931
1,e00fce68dcaefb9db946ce06,2022-10-28 13:54:59.525,8,566,23.750000,12,435.307282,0.000000,0,0.00000,0.000000
2,e00fce68dcaefb9db946ce06,2022-10-28 13:55:56.083,8,563,24.600000,12,433.000000,0.000000,0,0.00000,0.000000
3,e00fce68dcaefb9db946ce06,2022-10-28 13:56:52.972,8,566,23.959999,12,435.307282,0.000000,0,0.00000,0.000000
4,e00fce68dcaefb9db946ce06,2022-10-28 13:57:49.545,8,562,23.430000,12,432.230906,0.000000,0,0.00000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
507,e00fce68dcaefb9db946ce06,2022-11-23 12:10:59.102,8,851,5.220000,47,0.000000,0.000000,0,0.00000,0.000000
508,e00fce68dcaefb9db946ce06,2022-11-23 12:11:56.647,8,851,5.110000,47,0.000000,0.000000,0,0.00000,0.000000
509,e00fce68dcaefb9db946ce06,2022-11-23 12:12:51.960,8,866,4.690000,47,0.000000,0.000000,0,0.00000,0.000000
510,e00fce68dcaefb9db946ce06,2022-11-23 12:15:41.142,8,825,4.480000,47,0.000000,0.000000,0,0.00000,0.000000
