In [0]:
%tensorflow_version 2.x
import tensorflow

In [0]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV, train_test_split
# Random Forest
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
# Read Attributes/Features from file
df_ip = pd.read_csv("/content/drive/My Drive/Regression Crops/swir_ds.csv")
print(df_ip.iloc[:,1:])

     Img_name Var_name  Band_001  ...  Band_254  Band_255  Band_256
0       38367     mean  33494.16  ...    907.07    882.74    866.28
1       38368     mean  30631.00  ...    662.62    654.40    633.60
2       38369     mean  30501.70  ...    812.08    797.25    793.31
3       38370     mean  31935.69  ...    894.70    877.91    849.85
4       38371     mean  31334.30  ...    839.69    830.51    789.81
..        ...      ...       ...  ...       ...       ...       ...
450     38847     mean  36645.46  ...    956.51    920.53    893.65
451     38848     mean  33674.15  ...    839.43    806.54    783.64
452     38849     mean  32276.04  ...    966.73    929.47    906.54
453     38850     mean  32733.53  ...    846.66    814.37    791.89
454     38851     mean  29857.77  ...    734.84    728.57    713.01

[455 rows x 258 columns]


In [20]:
# Read Targets Class from file
df_out = pd.read_csv("/content/drive/My Drive/Regression Crops/tgt_sugar.csv", float_precision='round_trip')
print(df_out)

     Barcode         Print Info  ...  1,1,1-Kestopentaose Total Fructan
0      38368    Cabaret-R1_L1P3  ...               0.0690        7.2098
1      38369     Hassan-R1_L1P4  ...               0.2214        8.3110
2      38370     Goldie-R1_L1P5  ...               1.1188       14.6028
3      38371    Foxtrot-R1_L1P6  ...               0.1631        3.2343
4      38372    check 3-R1_L1P7  ...               0.1852        6.9215
..       ...                ...  ...                  ...           ...
471    38864  Foxtrot-R3_L21P19  ...               0.2155       13.3492
472    38865     Drum-R3_L21P20  ...               0.2383        5.6861
473    38868  Chariot-R3_L21P23  ...               0.4065       21.3713
474    38870     Alis-R3_L21P25  ...               0.0514        5.0629
475    38859  check 3-R9_L21P14  ...               0.0679        5.7750

[476 rows x 17 columns]


In [21]:
# Joining Attributes & Targets
result = pd.merge(df_ip,
                  df_out[['Barcode','Print Info','Variety ID','Glucose','Fructose','Sucrose','Raffinose','1-Kestose','Maltose','Nystose','1,1,1-Kestopentaose','Total Fructan']],
                  left_on='Img_name',
                  right_on='Barcode',
                  how='inner')
print(result.head())

   Index  Img_name Var_name  ...  Nystose  1,1,1-Kestopentaose  Total Fructan
0      1     38367     mean  ...   0.4549               0.4934         7.4516
1      3     38368     mean  ...   0.8382               0.0690         7.2098
2      5     38369     mean  ...   1.3786               0.2214         8.3110
3      7     38370     mean  ...   2.2351               1.1188        14.6028
4      9     38371     mean  ...   0.2620               0.1631         3.2343

[5 rows x 271 columns]


In [0]:
y_all = result[['Img_name', 'Var_name', 'Barcode', 'Glucose','Fructose','Sucrose','Raffinose','1-Kestose','Maltose','Nystose','1,1,1-Kestopentaose',
                'Total Fructan','Band_001', 'Band_002', 'Band_003', 'Band_004', 'Band_005', 'Band_006', 'Band_007','Band_008', 'Band_009',
     'Band_010', 'Band_011', 'Band_012', 'Band_013', 'Band_014', 'Band_015', 'Band_016', 'Band_017', 'Band_018', 'Band_019', 'Band_020',
     'Band_021', 'Band_022', 'Band_023', 'Band_024', 'Band_025', 'Band_026', 'Band_027', 'Band_028', 'Band_029','Band_030', 'Band_031',
     'Band_032', 'Band_033', 'Band_034', 'Band_035', 'Band_036', 'Band_037', 'Band_038', 'Band_039', 'Band_040','Band_041', 'Band_042',
     'Band_043', 'Band_044', 'Band_045', 'Band_046', 'Band_047', 'Band_048', 'Band_049', 'Band_050', 'Band_051','Band_052', 'Band_053',
     'Band_054', 'Band_055', 'Band_056', 'Band_057', 'Band_058', 'Band_059', 'Band_060', 'Band_061', 'Band_062','Band_063', 'Band_064',
     'Band_065', 'Band_066', 'Band_067', 'Band_068', 'Band_069', 'Band_070', 'Band_071', 'Band_072', 'Band_073','Band_074', 'Band_075',
     'Band_076', 'Band_077', 'Band_078', 'Band_079', 'Band_080', 'Band_081', 'Band_082', 'Band_083', 'Band_084','Band_085', 'Band_086',
     'Band_087', 'Band_088', 'Band_089', 'Band_090', 'Band_091', 'Band_092', 'Band_093', 'Band_094', 'Band_095','Band_096', 'Band_097',
     'Band_098', 'Band_099', 'Band_100', 'Band_101', 'Band_102', 'Band_103', 'Band_104', 'Band_105', 'Band_106','Band_107', 'Band_108',
     'Band_109', 'Band_110', 'Band_111', 'Band_112', 'Band_113', 'Band_114', 'Band_115', 'Band_116', 'Band_117','Band_118', 'Band_119',
     'Band_120', 'Band_121', 'Band_122', 'Band_123', 'Band_124', 'Band_125', 'Band_126', 'Band_127', 'Band_128','Band_129', 'Band_130',
     'Band_131', 'Band_132', 'Band_133', 'Band_134', 'Band_135', 'Band_136', 'Band_137', 'Band_138', 'Band_139','Band_140', 'Band_141',
     'Band_142', 'Band_143', 'Band_144', 'Band_145', 'Band_146', 'Band_147', 'Band_148', 'Band_149', 'Band_150','Band_151', 'Band_152',
     'Band_153', 'Band_154', 'Band_155', 'Band_156', 'Band_157', 'Band_158', 'Band_159', 'Band_160', 'Band_161','Band_162', 'Band_163',
     'Band_164', 'Band_165', 'Band_166', 'Band_167', 'Band_168', 'Band_169', 'Band_170', 'Band_171', 'Band_172','Band_173', 'Band_174',
     'Band_175', 'Band_176', 'Band_177', 'Band_178', 'Band_179', 'Band_180', 'Band_181', 'Band_182', 'Band_183','Band_184', 'Band_185',
     'Band_186', 'Band_187', 'Band_188', 'Band_189', 'Band_190', 'Band_191', 'Band_192', 'Band_193', 'Band_194','Band_195', 'Band_196',
     'Band_197', 'Band_198', 'Band_199', 'Band_200', 'Band_201', 'Band_202', 'Band_203', 'Band_204', 'Band_205','Band_206', 'Band_207',
     'Band_208', 'Band_209', 'Band_210', 'Band_211', 'Band_212', 'Band_213', 'Band_214', 'Band_215', 'Band_216','Band_217', 'Band_218',
     'Band_219', 'Band_220', 'Band_221', 'Band_222', 'Band_223', 'Band_224', 'Band_225', 'Band_226', 'Band_227','Band_228', 'Band_229',
     'Band_230', 'Band_231', 'Band_232', 'Band_233', 'Band_234', 'Band_235', 'Band_236', 'Band_237', 'Band_238','Band_239', 'Band_240',
     'Band_241', 'Band_242', 'Band_243', 'Band_244', 'Band_245', 'Band_246', 'Band_247', 'Band_248', 'Band_249','Band_250', 'Band_251',
     'Band_252', 'Band_253', 'Band_254', 'Band_255', 'Band_256']]

In [0]:
def mse_score(tgt, mdl, iput):
    y_pred = mdl.predict(iput)
    mse = mean_squared_error(tgt, y_pred)  #Train error = 0.01248 (mse)
    return mse

In [0]:
def sugar(i):
        switcher={
                3:  'Glucose',
                4:  'Fructose',
                5:  'Sucrose',
                6:  'Raffinose',
                7:  '1-Kestose',
                8:  'Maltose',
                9:  'Nystose',
                10: 'Kestopentaose',
                11: 'Total Fructan'
             }
        return switcher.get(i,"Invalid day of week")

In [0]:
# Select the input features
X = y_all.iloc[:, 12:].values

In [54]:
# Select Target Sugar (from position 3, 11)
for c in range(3, 13, 1):
  y = y_all.iloc[:, c].values
  # standardize and train/test split
  X = preprocessing.scale( y_all.iloc[:, 12:])
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=10)
  #Fitting Random Forest Regression model
  regressor = RandomForestRegressor(random_state = 0, criterion="mae")
  params = {"n_estimators":[90, 100, 120, 150]}
  clf = GridSearchCV(regressor, n_jobs=-1,  param_grid=params, cv=10, verbose=10)
  clf.fit(X_train, y_train)
  mse_test = []
  mse_train = []
  mse_test.append(mse_score(tgt = y_test, mdl= clf, iput= X_test))
  mse_train.append(mse_score(tgt = y_train, mdl= clf, iput= X_train))
  ridge_train_pred = []
  ridge_test_pred = []
  # prediction
  ridge_train_pred.append(clf.predict(X_train))
  ridge_test_pred.append(clf.predict(X_test))
  # R-squared of training set
  ridge_r_squared_train = [r2_score(y_train, p) for p in ridge_train_pred]
  # R-squared of test set
  ridge_r_squared_test = [r2_score(y_test, p) for p in ridge_test_pred]
  print('R2 for '+str(sugar(i=c)))
  print(ridge_r_squared_train)
  print(ridge_r_squared_test)
  print('MSE for '+str(sugar(i=c)))
  print(mse_train)
  print(mse_test)
                         

Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   25.0s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.1min finished


R2 for Glucose
[0.8426150128899254]
[-0.06625074817324883]
MSE for Glucose
[0.01381787308358843]
[0.0764636111129044]
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   23.5s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   50.7s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.1min finished


R2 for Fructose
[0.8392410790456521]
[-0.1896470686448477]
MSE for Fructose
[0.02224780778710811]
[0.13442930174460668]
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   24.9s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   50.7s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.1min finished


R2 for Sucrose
[0.8415398108306604]
[-0.1511073155329712]
MSE for Sucrose
[0.8602074871719246]
[6.053181418551167]
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   22.8s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   46.7s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.9min finished


R2 for Raffinose
[0.8509970060804186]
[-0.016422338120871904]
MSE for Raffinose
[0.2979695644958606]
[1.7000738168673675]
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   26.5s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   59.8s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.5min finished


R2 for 1-Kestose
[0.8411992649377539]
[-0.07934133388863307]
MSE for 1-Kestose
[0.11706652910038944]
[0.8850431811027664]
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   24.7s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   51.0s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.1min finished


R2 for Maltose
[0.8603268166186931]
[-0.022928289985659545]
MSE for Maltose
[0.011297862800268735]
[0.07668524788231051]
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   22.9s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   50.6s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.1min finished


R2 for Nystose
[0.8446645460556693]
[-0.09291853284564411]
MSE for Nystose
[0.11251938216946294]
[0.9120544958060918]
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   23.6s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   48.8s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.0min finished


R2 for Kestopentaose
[0.8437320374753126]
[-0.20784114052989544]
MSE for Kestopentaose
[0.010870153217759079]
[0.09272684121292242]
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   23.6s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  2.2min finished


R2 for Total Fructan
[0.8351814503746505]
[-0.14367030098763212]
MSE for Total Fructan
[2.861604693964262]
[21.089215465444468]
Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   16.1s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   32.1s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  1.3min finished


R2 for Invalid day of week
[0.9996866148465618]
[0.9942951962786213]
MSE for Invalid day of week
[1873.8677569502906]
[30734.23820550051]
