## Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

## Import Data

In [None]:
dataset = pd.read_excel("Prepared_Datasets/merged_VNIR_SWIR_add_light_intensity.xlsx")
target = pd.read_excel("Sugar_Quantities.xlsx")

In [None]:
dataset.head(3)

In [None]:
target_columns = ['Barcode', 'Print_Info', 'Variety_ID', 'Variety', 'Batch', 'Sample', 'Glucose', 'Fructose', 'Sucrose', 'Raffinose',
                 'Kestose', 'Maltose', 'Nystose', 'Kestopentaose', 'Total_Fructan']
target.columns = target_columns

In [None]:
# g = sns.pairplot(target[['Glucose','Fructose','Sucrose', 'Raffinose', 'Kestose', 'Maltose', 'Nystose', 'Kestopentaose']])

In [None]:
cor2 = target.iloc[:, [7, 8, 9, 10, 11, 12, 13, 14]].corr()

In [None]:
sns.heatmap(cor2, annot=True, cmap=plt.cm.Reds)
plt.show()

### adding helper columns on target dataset

#### adding counts of varieties

In [None]:
counts = pd.value_counts(target['Variety_ID'])
counts_index = np.array([counts.index]).reshape(-1, 1)
counts_values = np.array([counts.values]).reshape(-1, 1)

In [None]:
counts_data = np.concatenate((counts_index, counts_values),axis = 1)

In [None]:
counts_df = pd.DataFrame(data = counts_data, columns = ['Variety_ID', 'Variety_counts'])

In [None]:
target = pd.merge(left = target, right = counts_df, how = 'outer', left_on = 'Variety_ID', right_on = 'Variety_ID')

In [None]:
# target.head(2)

#### adding variety number of counts

In [None]:
target.insert(loc = 16, column = "Variety_number", value = "")

In [None]:
for r in range(0, target.shape[0]):
    num = 0
    Variety_ID = target.Variety_ID[r]
    for x in range(r, target.shape[0]):
        if target.Variety_ID[x] == Variety_ID:
            num = num + 1
    target.Variety_number[r] = num

In [None]:
target.head(3)

### Prepare dataset with different scalings to try each and find which one works better

#### Max_min scaler, scaling dataset in the range of 0 and 1

In [None]:
scaler = MinMaxScaler()
scaled_dataset = dataset.iloc[:, 1:]
min_max_scaled_dataset = scaler.fit_transform(scaled_dataset)

In [None]:
min_max_scaled_dataset = pd.DataFrame(data = min_max_scaled_dataset)
min_max_scaled_dataset.insert(loc = 0, column = "img", value = dataset.Img_name)
min_max_scaled_dataset.columns = dataset.columns

In [None]:
min_max_scaled_dataset = pd.merge(left = min_max_scaled_dataset, right = target, how = "inner", left_on = "Img_name", right_on = "Barcode")
min_max_scaled_dataset.head(3)

#### not scaled dataset

In [None]:
not_scaled_dataset = pd.merge(left = dataset, right = target, how = "inner", left_on = "Img_name", right_on = "Barcode")
not_scaled_dataset.head(3)

## Datasets

### Split based on Variety_ID

In [None]:
dataset_test_no_scale = not_scaled_dataset[not_scaled_dataset.Variety_number == 1]
dataset_train_no_scale = not_scaled_dataset[not_scaled_dataset.Variety_number > 1]

dataset_test_min_max = min_max_scaled_dataset[min_max_scaled_dataset.Variety_number == 1]
dataset_train_min_max = min_max_scaled_dataset[min_max_scaled_dataset.Variety_number > 1]

In [None]:
# min_max_scaled_dataset.Variety_number.unique()
# min_max_scaled_dataset.sort_values(by = ['Variety_ID'], axis=0, ascending=True, inplace=True, kind='quicksort', na_position='last')

In [None]:
# ************************** no scaled training and test split based on varieties ***************************
X_train_no_scale = dataset_train_no_scale.iloc[:,1:417]
y_train_no_scale = dataset_train_no_scale.iloc[:,263:272]

X_test_no_scale = dataset_test_no_scale.iloc[:,1:417]
y_test_no_scale = dataset_test_no_scale.iloc[:,423:432]

# *************************** min max training and test split based on varieties ****************************
X_train_min_max = dataset_train_min_max.iloc[:,1:417]
y_train_min_max = dataset_train_min_max.iloc[:,423:432]

X_test_min_max = dataset_test_min_max.iloc[:,1:417]
y_test_min_max = dataset_test_min_max.iloc[:,423:432]

# Algorithms

## Lasso

In [None]:
target.columns

In [None]:
y_train_min_max_Glucose = y_train_min_max.Glucose
y_train_min_max_Fructose = y_train_min_max.Fructose
y_train_min_max_Sucrose = y_train_min_max.Sucrose
y_train_min_max_Raffinose = y_train_min_max.Raffinose
y_train_min_max_Kestose = y_train_min_max.Kestose
y_train_min_max_Maltose = y_train_min_max.Maltose
y_train_min_max_Nystose = y_train_min_max.Nystose
y_train_min_max_Kestopentaose = y_train_min_max.Kestopentaose
y_train_min_max_Total_Fructan = y_train_min_max.Total_Fructan

y_test_min_max_Glucose = y_test_min_max.Glucose
y_test_min_max_Fructose = y_test_min_max.Fructose
y_test_min_max_Sucrose = y_test_min_max.Sucrose
y_test_min_max_Raffinose = y_test_min_max.Raffinose
y_test_min_max_Kestose = y_test_min_max.Kestose
y_test_min_max_Maltose = y_test_min_max.Maltose
y_test_min_max_Nystose = y_test_min_max.Nystose
y_test_min_max_Kestopentaose = y_test_min_max.Kestopentaose
y_test_min_max_Total_Fructan = y_test_min_max.Total_Fructan

In [None]:
lasso_min_max_Glucose = linear_model.Lasso(max_iter=100000, tol=0.001)
lasso_min_max_Fructose = linear_model.Lasso(max_iter=100000, tol=0.001)
lasso_min_max_Sucrose = linear_model.Lasso(max_iter=100000, tol=0.001)
lasso_min_max_Raffinose = linear_model.Lasso(max_iter=100000, tol=0.001)
lasso_min_max_Kestose = linear_model.Lasso(max_iter=100000, tol=0.001)
lasso_min_max_Maltose = linear_model.Lasso(max_iter=100000, tol=0.001)
lasso_min_max_Nystose = linear_model.Lasso(max_iter=100000, tol=0.001)
lasso_min_max_Kestopentaose = linear_model.Lasso(max_iter=100000, tol=0.001)
lasso_min_max_Total_Fructan = linear_model.Lasso(max_iter=100000, tol=0.001)

In [None]:
lasso_min_max_Glucose.fit(X_train_min_max, y_train_min_max_Glucose)
lasso_min_max_Fructose.fit(X_train_min_max, y_train_min_max_Fructose)
lasso_min_max_Sucrose.fit(X_train_min_max, y_train_min_max_Sucrose)
lasso_min_max_Raffinose.fit(X_train_min_max, y_train_min_max_Raffinose)
lasso_min_max_Kestose.fit(X_train_min_max, y_train_min_max_Kestose)
lasso_min_max_Maltose.fit(X_train_min_max, y_train_min_max_Maltose)
lasso_min_max_Nystose.fit(X_train_min_max, y_train_min_max_Nystose)
lasso_min_max_Kestopentaose.fit(X_train_min_max, y_train_min_max_Kestopentaose)
lasso_min_max_Total_Fructan.fit(X_train_min_max, y_train_min_max_Total_Fructan)

In [None]:
prediction_min_max_Glucose = lasso_min_max_Glucose.predict(X_test_min_max)
prediction_min_max_Fructose = lasso_min_max_Fructose.predict(X_test_min_max)
prediction_min_max_Sucrose = lasso_min_max_Sucrose.predict(X_test_min_max)
prediction_min_max_Raffinose = lasso_min_max_Raffinose.predict(X_test_min_max)
prediction_min_max_Kestose = lasso_min_max_Kestose.predict(X_test_min_max)
prediction_min_max_Maltose = lasso_min_max_Maltose.predict(X_test_min_max)
prediction_min_max_Nystose = lasso_min_max_Nystose.predict(X_test_min_max)
prediction_min_max_Kestopentaose = lasso_min_max_Kestopentaose.predict(X_test_min_max)
prediction_min_max_Total_Fructan = lasso_min_max_Total_Fructan.predict(X_test_min_max)

In [None]:
variance_min_max_Glucose = lasso_min_max_Glucose.score(X_test_min_max, y_test_min_max_Glucose)
variance_min_max_Fructose = lasso_min_max_Fructose.score(X_test_min_max, y_test_min_max_Fructose)
variance_min_max_Sucrose = lasso_min_max_Sucrose.score(X_test_min_max, y_test_min_max_Sucrose)
variance_min_max_Raffinose = lasso_min_max_Raffinose.score(X_test_min_max, y_test_min_max_Raffinose)
variance_min_max_Kestose = lasso_min_max_Kestose.score(X_test_min_max, y_test_min_max_Kestose)
variance_min_max_Maltose = lasso_min_max_Maltose.score(X_test_min_max, y_test_min_max_Maltose)
variance_min_max_Nystose = lasso_min_max_Nystose.score(X_test_min_max, y_test_min_max_Nystose)
variance_min_max_Kestopentaose = lasso_min_max_Kestopentaose.score(X_test_min_max, y_test_min_max_Kestopentaose)
variance_min_max_Total_Fructan = lasso_min_max_Total_Fructan.score(X_test_min_max, y_test_min_max_Total_Fructan)

In [None]:
print(variance_min_max_Glucose)
print(variance_min_max_Fructose)
print(variance_min_max_Sucrose)
print(variance_min_max_Raffinose)
print(variance_min_max_Kestose)
print(variance_min_max_Maltose)
print(variance_min_max_Nystose)
print(variance_min_max_Kestopentaose)
print(variance_min_max_Total_Fructan)

In [None]:
# r2_score_min_max_Glucose = r2_score(y_test_min_max_Glucose, prediction_min_max_Glucose)
# r2_score_min_max_Fructose = r2_score(y_test_min_max_Fructose, prediction_min_max_Fructose)
# r2_score_min_max_Sucrose = r2_score(y_test_min_max_Sucrose, prediction_min_max_Sucrose)
# r2_score_min_max_Raffinose = r2_score(y_test_min_max_Raffinose, prediction_min_max_Raffinose)
# r2_score_min_max_Kestose = r2_score(y_test_min_max_Kestose, prediction_min_max_Kestose)
# r2_score_min_max_Maltose = r2_score(y_test_min_max_Maltose, prediction_min_max_Maltose)
# r2_score_min_max_Nystose = r2_score(y_test_min_max_Nystose, prediction_min_max_Nystose)
# r2_score_min_max_Kestopentaose = r2_score(y_test_min_max_Kestopentaose, prediction_min_max_Kestopentaose)
# r2_score_min_max_Total_Fructan = r2_score(y_test_min_max_Total_Fructan, prediction_min_max_Total_Fructan)

In [None]:
# print(r2_score_min_max_Glucose)
# print(r2_score_min_max_Fructose)
# print(r2_score_min_max_Sucrose)
# print(r2_score_min_max_Raffinose)
# print(r2_score_min_max_Kestose)
# print(r2_score_min_max_Maltose)
# print(r2_score_min_max_Nystose)
# print(r2_score_min_max_Kestopentaose)
# print(r2_score_min_max_Total_Fructan)

In [None]:
mean_absolute_error_min_max_Glucose = mean_absolute_error(y_test_min_max_Glucose, prediction_min_max_Glucose)
mean_absolute_error_min_max_Fructose = mean_absolute_error(y_test_min_max_Fructose, prediction_min_max_Fructose)
mean_absolute_error_min_max_Sucrose = mean_absolute_error(y_test_min_max_Sucrose, prediction_min_max_Sucrose)
mean_absolute_error_min_max_Raffinose = mean_absolute_error(y_test_min_max_Raffinose, prediction_min_max_Raffinose)
mean_absolute_error_min_max_Kestose = mean_absolute_error(y_test_min_max_Kestose, prediction_min_max_Kestose)
mean_absolute_error_min_max_Maltose = mean_absolute_error(y_test_min_max_Maltose, prediction_min_max_Maltose)
mean_absolute_error_min_max_Nystose = mean_absolute_error(y_test_min_max_Nystose, prediction_min_max_Nystose)
mean_absolute_error_min_max_Kestopentaose = mean_absolute_error(y_test_min_max_Kestopentaose, prediction_min_max_Kestopentaose)
mean_absolute_error_min_max_Total_Fructan = mean_absolute_error(y_test_min_max_Total_Fructan, prediction_min_max_Total_Fructan)

In [None]:
print(mean_absolute_error_min_max_Glucose)
print(mean_absolute_error_min_max_Fructose)
print(mean_absolute_error_min_max_Sucrose)
print(mean_absolute_error_min_max_Raffinose)
print(mean_absolute_error_min_max_Kestose)
print(mean_absolute_error_min_max_Maltose)
print(mean_absolute_error_min_max_Nystose)
print(mean_absolute_error_min_max_Kestopentaose)
print(mean_absolute_error_min_max_Total_Fructan)

In [None]:
y_test_min_max_Glucose = y_test_min_max.Glucose
y_test_min_max_Fructose = y_test_min_max.Fructose
y_test_min_max_Sucrose = y_test_min_max.Sucrose
y_test_min_max_Raffinose = y_test_min_max.Raffinose
y_test_min_max_Kestose = y_test_min_max.Kestose
y_test_min_max_Maltose = y_test_min_max.Maltose
y_test_min_max_Nystose = y_test_min_max.Nystose
y_test_min_max_Kestopentaose = y_test_min_max.Kestopentaose
y_test_min_max_Total_Fructan = y_test_min_max.Total_Fructan

In [None]:
mean_squared_error_Glucose = mean_squared_error(y_test_min_max_Glucose, prediction_min_max_Glucose)
mean_squared_error_Fructose = mean_squared_error(y_test_min_max_Fructose, prediction_min_max_Fructose)
mean_squared_error_Sucrose = mean_squared_error(y_test_min_max_Sucrose, prediction_min_max_Sucrose)
mean_squared_error_Raffinose = mean_squared_error(y_test_min_max_Raffinose, prediction_min_max_Raffinose)
mean_squared_error_Kestose = mean_squared_error(y_test_min_max_Kestose, prediction_min_max_Kestose)
mean_squared_error_Maltose = mean_squared_error(y_test_min_max_Maltose, prediction_min_max_Maltose)
mean_squared_error_Nystose = mean_squared_error(y_test_min_max_Nystose, prediction_min_max_Nystose)
mean_squared_error_Kestopentaose = mean_squared_error(y_test_min_max_Kestopentaose, prediction_min_max_Kestopentaose)
mean_squared_error_Total_Fructan = mean_squared_error(y_test_min_max_Total_Fructan, prediction_min_max_Total_Fructan)

In [None]:
print(mean_squared_error_Glucose)
print(mean_squared_error_Fructose)
print(mean_squared_error_Sucrose)
print(mean_squared_error_Raffinose)
print(mean_squared_error_Kestose)
print(mean_squared_error_Maltose)
print(mean_squared_error_Nystose)
print(mean_squared_error_Kestopentaose)
print(mean_squared_error_Total_Fructan)

##### visualization

In [None]:
plt.figure(figsize=(5,5))
plt.title("Lasso Regression \n Glucose")
plt.xlabel("actual")
plt.ylabel("predicted")
plt.scatter(y_test_min_max_Glucose, prediction_min_max_Glucose)

In [None]:
plt.figure(figsize=(5,5))
plt.title("Lasso Regression \n Fructose")
plt.xlabel("actual")
plt.ylabel("predicted")
plt.scatter(y_test_min_max_Fructose, prediction_min_max_Fructose)

In [None]:
plt.figure(figsize=(5,5))
plt.title("Lasso Regression \n Sucrose")
plt.xlabel("actual")
plt.ylabel("predicted")
plt.scatter(y_test_min_max_Sucrose, prediction_min_max_Sucrose)

In [None]:
plt.figure(figsize=(5,5))
plt.title("Lasso Regression \n Raffinose")
plt.xlabel("actual")
plt.ylabel("predicted")
plt.scatter(y_test_min_max_Raffinose, prediction_min_max_Raffinose)

In [None]:
plt.figure(figsize=(5,5))
plt.title("Lasso Regression \n Kestose")
plt.xlabel("actual")
plt.ylabel("predicted")
plt.scatter(y_test_min_max_Kestose, prediction_min_max_Kestose)

In [None]:
plt.figure(figsize=(5,5))
plt.title("Lasso Regression \n Maltose")
plt.xlabel("actual")
plt.ylabel("predicted")
plt.scatter(y_test_min_max_Maltose, prediction_min_max_Maltose)

In [None]:
plt.figure(figsize=(5,5))
plt.title("Lasso Regression \n Nystose")
plt.xlabel("actual")
plt.ylabel("predicted")
plt.scatter(y_test_min_max_Nystose, prediction_min_max_Nystose)

In [None]:
plt.figure(figsize=(5,5))
plt.title("Lasso Regression \n Kestopentaose")
plt.xlabel("actual")
plt.ylabel("predicted")
plt.scatter(y_test_min_max_Kestopentaose, prediction_min_max_Kestopentaose)

In [None]:
plt.figure(figsize=(5,5))
plt.title("Lasso Regression \n Total_Fructan")
plt.xlabel("actual")
plt.ylabel("predicted")
plt.scatter(y_test_min_max_Total_Fructan, prediction_min_max_Total_Fructan)

In [None]:
# slope = elasticNet.coef_
# slope.max()

In [None]:
# intercept = elasticNet.intercept_
# print(intercept)