# Capstone - Delivery-3
This dataset is part of the [Farming Systems Project](https://www.ars.usda.gov/northeast-area/beltsville-md-barc/beltsville-agricultural-research-center/sustainable-agricultural-systems-laboratory/docs/farming-systems-project/) at USDA, Beltsville MD.  This data is not available online on the USDA
 website but can be found on my [GitHub](https://github.com/mmtokay/DATA606/tree/master/dataset)
* Julian Day 
* Month
* Day
* Date
* avgtTempC - average temperature in C
* maxTempC - maximum temperature in C
* minTempC - minimum temperature in C
* maxHumPct - maximum humidity in %
* minHumPct - minimum humidity in %
* avgRadWm-2 - average radiation in w/m2
* meanWindMs-1 - mean wind in m/s
* PrecipitationMm - precipitation/snow me# Model A

In [1]:
import io
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import warnings
import time

from datetime import datetime, timedelta
from sklearn import linear_model
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression, RidgeClassifier
from sklearn.metrics import *
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, cross_val_predict, cross_validate
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, RobustScaler, Normalizer, MinMaxScaler, StandardScaler, Binarizer
from sklearn.tree import DecisionTreeRegressor, export_graphviz
from sklearn.utils import shuffle
from time import time

warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
def modelEvaluation(test_y, y_pred):
    # Mean absolute error regression loss (Best is 0)
    mae = mean_absolute_error(test_y, y_pred)
    print("Mean absolute error regression loss (Best is 0) = {:.5f}".format(mae))
    
    # Mean squared error
    mse = mean_squared_error(test_y, y_pred)
    print("Mean squared error (Best is 0) = {:.5f}".format(mse))
    
    # Median absolute error regression loss
    maerl = median_absolute_error(test_y, y_pred)
    print("Median absolute error regression loss (Best is 0) = {:.5f}".format(maerl))
    
    # Coefficient of determination (Best is 1)
    r2 = r2_score(y_pred, test_y)
    print("Coefficient of determination (Best is 1) = {:.5f}".format(r2))

# Soybean - 15 weeks

In [3]:
dataSoy15w = pd.read_csv('soyFeatures15w.csv')
corrSoy15w = dataSoy15w.corr()
corrSoy15w.style.background_gradient(cmap='coolwarm').set_precision(2)

Unnamed: 0,avgTemp1,maxTemp1,minTemp1,maxHum1,minHum1,meanWind1,Precip1,avgTemp2,maxTemp2,minTemp2,maxHum2,minHum2,meanWind2,Precip2,avgTemp3,maxTemp3,minTemp3,maxHum3,minHum3,meanWind3,Precip3,avgTemp4,maxTemp4,minTemp4,maxHum4,minHum4,meanWind4,Precip4,avgTemp5,maxTemp5,minTemp5,maxHum5,minHum5,meanWind5,Precip5,avgTemp6,maxTemp6,minTemp6,maxHum6,minHum6,meanWind6,Precip6,avgTemp7,maxTemp7,minTemp7,maxHum7,minHum7,meanWind7,Precip7,avgTemp8,maxTemp8,minTemp8,maxHum8,minHum8,meanWind8,Precip8,avgTemp9,maxTemp9,minTemp9,maxHum9,minHum9,meanWind9,Precip9,avgTemp10,maxTemp10,minTemp10,maxHum10,minHum10,meanWind10,Precip10,avgTemp11,maxTemp11,minTemp11,maxHum11,minHum11,meanWind11,Precip11,avgTemp12,maxTemp12,minTemp12,maxHum12,minHum12,meanWind12,Precip12,avgTemp13,maxTemp13,minTemp13,maxHum13,minHum13,meanWind13,Precip13,avgTemp14,maxTemp14,minTemp14,maxHum14,minHum14,meanWind14,Precip14,avgTemp15,maxTemp15,minTemp15,maxHum15,minHum15,meanWind15,Precip15,GDD,SystemNameType,GrainYield
avgTemp1,1.0,-0.025,-0.069,0.12,0.012,0.95,0.035,1.0,-0.092,-0.11,0.15,0.014,0.96,-0.09,0.97,-0.14,0.032,0.15,0.11,0.93,-0.13,0.97,-0.12,0.18,0.13,0.062,0.93,0.3,0.97,-0.19,-0.0077,0.15,0.054,0.91,-0.11,0.97,-0.24,-0.0094,0.14,0.14,0.93,0.045,0.98,-0.056,-0.045,0.086,-0.1,0.92,-0.19,0.97,-0.25,-0.05,0.098,0.11,0.91,0.089,0.97,-0.19,-0.013,0.049,0.072,0.97,0.16,0.96,-0.12,-0.14,0.045,0.037,0.96,0.089,0.94,-0.24,-0.053,0.036,0.024,0.98,-0.072,0.94,-0.24,-0.12,0.02,0.089,0.99,0.11,0.91,-0.14,-0.14,0.029,0.021,0.95,-0.088,0.87,-0.23,-0.099,0.0028,0.065,0.95,-0.032,0.9,-0.1,-0.08,0.0066,0.073,0.94,-0.0037,-0.55,-0.074,0.23
maxTemp1,-0.025,1.0,0.71,-0.086,0.17,-0.12,-0.00047,-0.053,0.48,0.48,-0.038,0.11,-0.1,-0.058,-0.076,0.3,0.25,-0.044,-0.083,-0.11,-0.065,-0.079,0.29,0.41,-0.036,0.093,-0.11,-0.073,-0.086,0.18,0.65,-0.038,0.085,-0.096,0.067,-0.11,0.15,0.2,-0.034,-0.033,-0.12,-0.15,-0.12,-0.028,0.058,-0.012,0.2,-0.15,0.043,-0.12,-0.14,0.27,-0.019,0.25,-0.13,0.045,-0.12,-0.13,-0.1,-0.014,-0.036,-0.12,0.13,-0.14,-0.16,-0.0091,-0.01,0.088,-0.11,0.14,-0.17,-0.45,-0.51,-0.02,0.019,-0.1,0.014,-0.16,-0.5,-0.35,-0.026,0.14,-0.068,0.39,-0.18,-0.48,-0.54,-0.0063,-0.056,-0.12,0.099,-0.2,-0.33,-0.43,-0.037,0.039,-0.097,0.17,-0.2,-0.56,-0.42,-0.055,0.018,-0.12,0.18,0.21,-0.059,-0.066
minTemp1,-0.069,0.71,1.0,-0.32,0.29,-0.17,0.25,-0.093,0.41,0.6,-0.27,0.16,-0.15,-0.02,-0.12,0.41,0.56,-0.29,-0.18,-0.16,-0.13,-0.13,0.3,0.53,-0.24,0.024,-0.18,0.037,-0.14,-0.085,0.64,-0.26,-0.029,-0.16,0.054,-0.16,-0.0099,0.18,-0.2,-0.15,-0.17,-0.08,-0.16,-0.074,0.18,-0.14,0.081,-0.22,-0.0098,-0.17,-0.22,0.26,-0.16,0.069,-0.2,-0.002,-0.18,-0.24,-0.17,-0.11,-0.098,-0.18,0.15,-0.19,-0.36,-0.34,-0.12,-0.076,-0.18,-0.063,-0.22,-0.49,-0.6,-0.094,-0.1,-0.13,-0.035,-0.22,-0.59,-0.46,-0.095,-0.034,-0.095,0.36,-0.25,-0.7,-0.7,-0.055,-0.099,-0.14,0.29,-0.27,-0.61,-0.69,-0.088,-0.039,-0.13,0.27,-0.27,-0.64,-0.68,-0.14,-0.21,-0.15,-0.023,0.3,0.032,-0.042
maxHum1,0.12,-0.086,-0.32,1.0,0.51,0.094,-0.1,0.13,-0.14,-0.057,0.92,0.6,0.11,-0.011,0.12,-0.14,-0.3,0.91,0.64,0.11,-0.21,0.12,-0.17,-0.26,0.84,0.49,0.12,-0.28,0.13,-0.063,-0.11,0.89,0.67,0.11,0.24,0.14,-0.11,-0.066,0.78,0.61,0.11,0.08,0.13,-0.28,-0.31,0.71,0.53,0.13,0.14,0.14,-0.26,-0.16,0.72,0.62,0.12,-0.073,0.14,-0.32,-0.19,0.65,0.61,0.12,0.24,0.15,0.018,0.077,0.65,0.67,0.14,-0.058,0.15,0.0093,-0.037,0.59,0.56,0.12,-0.079,0.15,-0.058,-0.11,0.59,0.44,0.11,-0.12,0.17,0.12,0.1,0.54,0.51,0.11,-0.28,0.16,0.051,0.18,0.54,0.54,0.12,-0.087,0.16,0.0074,0.078,0.62,0.43,0.12,-0.13,-0.65,-0.072,-0.084
minHum1,0.012,0.17,0.29,0.51,1.0,-0.067,0.28,-0.0023,-0.2,0.3,0.59,0.83,-0.042,0.0082,-0.023,-0.019,0.14,0.59,0.59,-0.062,-0.19,-0.029,0.043,0.15,0.52,0.35,-0.058,-0.22,-0.033,-0.2,0.26,0.56,0.69,-0.063,0.38,-0.038,-0.18,0.14,0.52,0.68,-0.076,0.18,-0.041,-0.28,-0.17,0.4,0.67,-0.078,0.16,-0.042,-0.43,-0.064,0.41,0.57,-0.08,-0.14,-0.045,-0.46,-0.31,0.26,0.54,-0.048,0.22,-0.047,-0.28,-0.24,0.24,0.6,-0.037,-0.092,-0.063,-0.23,-0.27,0.2,0.51,-0.016,-0.055,-0.061,-0.27,-0.29,0.13,0.4,0.0059,0.063,-0.071,-0.25,-0.22,0.13,0.42,-0.021,-0.12,-0.095,-0.28,-0.21,0.057,0.54,-0.019,-0.052,-0.096,-0.37,-0.35,0.079,0.27,-0.041,-0.28,-0.29,0.041,-0.068
meanWind1,0.95,-0.12,-0.17,0.094,-0.067,1.0,0.026,0.96,-0.14,-0.22,0.1,-0.067,0.99,-0.11,0.98,-0.14,-0.019,0.1,0.063,0.99,-0.084,0.98,-0.18,0.11,0.094,-0.028,0.99,0.39,0.97,-0.19,-0.11,0.098,-0.029,0.97,-0.12,0.98,-0.29,-0.07,0.093,0.093,0.97,0.065,0.98,0.0036,-0.0064,0.053,-0.15,0.97,-0.18,0.98,-0.17,-0.03,0.062,0.057,0.96,0.14,0.98,-0.19,0.024,0.024,0.069,0.98,0.13,0.98,-0.12,-0.087,0.017,-0.019,0.96,0.094,0.97,-0.18,0.056,0.015,-0.012,0.95,-0.08,0.97,-0.15,-0.037,0.0023,0.054,0.96,0.1,0.95,-0.054,-0.038,0.008,-0.019,0.93,-0.087,0.94,-0.14,0.007,-0.011,0.018,0.93,-0.048,0.95,-0.011,-0.0073,-0.006,0.099,0.94,0.015,-0.51,-0.11,0.28
Precip1,0.035,-0.00047,0.25,-0.1,0.28,0.026,1.0,0.029,-0.11,0.33,-0.19,0.19,0.022,0.29,0.026,0.11,0.28,-0.21,-0.066,0.014,0.099,0.019,0.0044,0.35,-0.23,-0.012,0.0074,0.13,0.013,-0.18,0.37,-0.22,0.024,0.018,0.063,-0.0017,-0.1,0.43,-0.22,0.046,-0.0062,0.22,-0.0016,-0.11,0.29,-0.18,0.11,-0.021,0.11,-0.005,-0.3,0.13,-0.17,0.035,-0.012,0.12,-0.012,-0.29,-0.066,-0.093,-0.05,0.0033,0.04,-0.017,-0.22,-0.18,-0.1,-0.15,-0.012,-0.037,-0.031,-0.19,-0.14,-0.083,-0.11,0.012,-0.28,-0.028,0.033,-0.016,-0.022,-0.13,0.038,-0.24,-0.044,-0.11,-0.098,-0.052,-0.16,0.012,-0.099,-0.058,-0.017,-0.068,0.017,-0.012,0.0055,0.1,-0.052,0.061,-0.13,0.039,-0.35,-0.013,-0.21,0.24,-0.044,-0.016
avgTemp2,1.0,-0.053,-0.093,0.13,-0.0023,0.96,0.029,1.0,-0.079,-0.11,0.15,-0.0005,0.97,-0.11,0.98,-0.14,0.025,0.15,0.098,0.95,-0.14,0.98,-0.12,0.17,0.13,0.054,0.94,0.31,0.98,-0.19,-0.028,0.14,0.044,0.92,-0.11,0.98,-0.24,-0.0063,0.13,0.13,0.95,0.042,0.98,-0.044,-0.041,0.087,-0.11,0.94,-0.2,0.98,-0.24,-0.062,0.099,0.095,0.93,0.097,0.98,-0.19,-0.0076,0.051,0.065,0.98,0.14,0.97,-0.11,-0.13,0.047,0.027,0.97,0.072,0.95,-0.22,-0.028,0.039,0.023,0.98,-0.083,0.96,-0.22,-0.11,0.024,0.073,0.99,0.097,0.92,-0.13,-0.13,0.031,0.0024,0.96,-0.096,0.89,-0.21,-0.085,0.0072,0.056,0.95,-0.039,0.91,-0.085,-0.068,0.012,0.074,0.95,-0.0042,-0.56,-0.091,0.22
maxTemp2,-0.092,0.48,0.41,-0.14,-0.2,-0.14,-0.11,-0.079,1.0,0.53,-0.27,-0.24,-0.12,-0.14,-0.12,0.39,0.25,-0.27,-0.36,-0.13,-0.014,-0.13,0.24,0.2,-0.26,-0.11,-0.14,-0.13,-0.13,0.34,0.35,-0.25,-0.28,-0.16,-0.0097,-0.12,0.21,0.17,-0.27,-0.37,-0.14,-0.14,-0.11,0.096,0.24,-0.16,-0.092,-0.14,-0.19,-0.13,0.11,0.14,-0.16,-0.12,-0.14,-0.051,-0.12,0.093,-0.025,-0.024,-0.28,-0.13,-0.032,-0.13,-0.022,-0.053,-0.03,-0.23,-0.12,-0.24,-0.14,-0.091,-0.26,-0.00027,-0.19,-0.099,-0.071,-0.15,-0.3,-0.33,0.085,-0.25,-0.12,0.21,-0.17,-0.38,-0.46,0.052,-0.43,-0.11,0.17,-0.16,-0.22,-0.36,0.13,-0.24,-0.076,0.22,-0.16,-0.28,-0.32,0.15,-0.18,-0.049,0.36,0.16,-0.12,-0.14
minTemp2,-0.11,0.48,0.6,-0.057,0.3,-0.22,0.33,-0.11,0.53,1.0,-0.092,0.32,-0.19,0.26,-0.17,0.27,0.52,-0.12,-0.056,-0.22,-0.011,-0.19,0.2,0.36,-0.092,-0.045,-0.24,-0.23,-0.19,-0.019,0.52,-0.081,0.041,-0.24,0.062,-0.21,-0.018,0.26,-0.064,-0.078,-0.23,-0.0065,-0.2,-0.16,0.34,0.031,0.19,-0.26,-0.082,-0.22,-0.25,0.21,0.031,0.12,-0.25,0.1,-0.22,-0.22,-0.12,0.11,-0.067,-0.2,0.21,-0.23,-0.39,-0.21,0.12,0.1,-0.2,0.0031,-0.27,-0.39,-0.34,0.15,0.22,-0.15,0.037,-0.26,-0.39,-0.28,0.18,0.095,-0.13,0.1,-0.3,-0.55,-0.55,0.16,-0.08,-0.16,0.089,-0.33,-0.52,-0.56,0.2,-0.012,-0.15,0.061,-0.32,-0.44,-0.49,0.2,-0.24,-0.18,-0.14,0.2,-0.066,0.061


In [4]:
# Splitting data set
train_X, test_X = train_test_split(dataSoy15w.drop('GrainYield', axis=1), random_state=1)
train_y, test_y = train_test_split(dataSoy15w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_X = scaler.fit_transform(train_X)
test_scaler_X = scaler.transform(test_X)

**Lasso**

In [5]:
lm = linear_model.Lasso(alpha=0.6)
lm.fit(train_scaler_X,train_y)

y_pred = lm.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

important_features = pd.Series(data=lm.coef_,index=dataSoy15w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 391.32579
Mean squared error (Best is 0) = 257187.41626
Median absolute error regression loss (Best is 0) = 288.72673
Coefficient of determination (Best is 1) = 0.80107
minHum13          931.614770
Precip10          512.885748
maxTemp2          470.311016
SystemNameType    327.484267
minTemp7          318.504855
minTemp12         285.562759
minHum12          279.407371
minHum10          263.988825
maxTemp13         230.653833
minHum9           220.025983
dtype: float64
Precip12    -122.167471
minHum1     -131.744831
minHum15    -191.433067
minHum7     -202.347322
minHum8     -263.170250
maxHum15    -285.869980
maxTemp15   -319.678019
minHum4     -321.507626
maxHum1     -370.527645
maxTemp7    -848.338020
dtype: float64




**Decision Tree Regressor**

In [6]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_X,train_y)

y_pred = tree_model.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

mportant_features = pd.Series(data=tree_model.feature_importances_,index=dataSoy15w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 362.51290
Mean squared error (Best is 0) = 226064.05134
Median absolute error regression loss (Best is 0) = 267.07778
Coefficient of determination (Best is 1) = 0.83261
minHum13          931.614770
Precip10          512.885748
maxTemp2          470.311016
SystemNameType    327.484267
minTemp7          318.504855
minTemp12         285.562759
minHum12          279.407371
minHum10          263.988825
maxTemp13         230.653833
minHum9           220.025983
dtype: float64
Precip12    -122.167471
minHum1     -131.744831
minHum15    -191.433067
minHum7     -202.347322
minHum8     -263.170250
maxHum15    -285.869980
maxTemp15   -319.678019
minHum4     -321.507626
maxHum1     -370.527645
maxTemp7    -848.338020
dtype: float64


**Random Forest Regressor**

In [7]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_X,train_y)

y_pred = rf_model.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataSoy15w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 371.24957
Mean squared error (Best is 0) = 242497.11364
Median absolute error regression loss (Best is 0) = 270.45111
Coefficient of determination (Best is 1) = 0.81410
minTemp12         0.474400
maxTemp7          0.127043
minTemp5          0.060960
Precip10          0.035006
maxTemp14         0.032928
minTemp1          0.025239
SystemNameType    0.019784
meanWind13        0.019146
minTemp7          0.015479
maxTemp8          0.013108
dtype: float64
avgTemp4     1.075909e-05
meanWind8    1.003307e-05
Precip1      8.948224e-06
maxHum12     4.040753e-06
GDD          1.715720e-06
maxHum4      2.942185e-07
maxHum9      0.000000e+00
minHum14     0.000000e+00
Precip6      0.000000e+00
maxTemp13    0.000000e+00
dtype: float64


In [8]:
max_depth = 35
n_est = 500
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_X,train_y)

y_pred = rf_model.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataSoy15w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 362.15682
Mean squared error (Best is 0) = 228130.64745
Median absolute error regression loss (Best is 0) = 263.96131
Coefficient of determination (Best is 1) = 0.83011
minTemp12         0.465249
maxTemp7          0.104908
minTemp5          0.086511
Precip10          0.042782
minTemp1          0.026931
SystemNameType    0.017790
maxTemp14         0.014271
minTemp13         0.012091
maxTemp4          0.011620
meanWind13        0.011515
dtype: float64
minHum11     0.000727
maxHum6      0.000723
avgTemp3     0.000706
maxHum9      0.000701
maxTemp10    0.000695
maxHum4      0.000683
maxHum1      0.000679
meanWind5    0.000662
maxHum8      0.000495
maxHum3      0.000491
dtype: float64


# Soybean - 14 weeks

In [9]:
dataSoy14w = pd.read_csv('soyFeatures14w.csv')
corrSoy14w = dataSoy14w.corr()
corrSoy14w.style.background_gradient(cmap='coolwarm').set_precision(2)

Unnamed: 0,avgTemp1,maxTemp1,minTemp1,maxHum1,minHum1,meanWind1,Precip1,avgTemp2,maxTemp2,minTemp2,maxHum2,minHum2,meanWind2,Precip2,avgTemp3,maxTemp3,minTemp3,maxHum3,minHum3,meanWind3,Precip3,avgTemp4,maxTemp4,minTemp4,maxHum4,minHum4,meanWind4,Precip4,avgTemp5,maxTemp5,minTemp5,maxHum5,minHum5,meanWind5,Precip5,avgTemp6,maxTemp6,minTemp6,maxHum6,minHum6,meanWind6,Precip6,avgTemp7,maxTemp7,minTemp7,maxHum7,minHum7,meanWind7,Precip7,avgTemp8,maxTemp8,minTemp8,maxHum8,minHum8,meanWind8,Precip8,avgTemp9,maxTemp9,minTemp9,maxHum9,minHum9,meanWind9,Precip9,avgTemp10,maxTemp10,minTemp10,maxHum10,minHum10,meanWind10,Precip10,avgTemp11,maxTemp11,minTemp11,maxHum11,minHum11,meanWind11,Precip11,avgTemp12,maxTemp12,minTemp12,maxHum12,minHum12,meanWind12,Precip12,avgTemp13,maxTemp13,minTemp13,maxHum13,minHum13,meanWind13,Precip13,avgTemp14,maxTemp14,minTemp14,maxHum14,minHum14,meanWind14,Precip14,GDD,SystemNameType,GrainYield
avgTemp1,1.0,-0.025,-0.069,0.12,0.012,0.95,0.035,1.0,-0.092,-0.11,0.15,0.014,0.96,-0.09,0.97,-0.14,0.032,0.15,0.11,0.93,-0.13,0.97,-0.12,0.18,0.13,0.062,0.93,0.3,0.97,-0.19,-0.0077,0.15,0.054,0.91,-0.11,0.97,-0.24,-0.0094,0.14,0.14,0.93,0.045,0.98,-0.056,-0.045,0.086,-0.1,0.92,-0.19,0.97,-0.25,-0.05,0.098,0.11,0.91,0.089,0.97,-0.19,-0.013,0.049,0.072,0.97,0.16,0.96,-0.12,-0.14,0.045,0.037,0.96,0.089,0.94,-0.24,-0.053,0.036,0.024,0.98,-0.072,0.94,-0.24,-0.12,0.02,0.089,0.99,0.11,0.91,-0.14,-0.14,0.029,0.021,0.95,-0.088,0.87,-0.23,-0.099,0.0028,0.065,0.95,-0.032,-0.55,-0.074,0.23
maxTemp1,-0.025,1.0,0.71,-0.086,0.17,-0.12,-0.00047,-0.053,0.48,0.48,-0.038,0.11,-0.1,-0.058,-0.076,0.3,0.25,-0.044,-0.083,-0.11,-0.065,-0.079,0.29,0.41,-0.036,0.093,-0.11,-0.073,-0.086,0.18,0.65,-0.038,0.085,-0.096,0.067,-0.11,0.15,0.2,-0.034,-0.033,-0.12,-0.15,-0.12,-0.028,0.058,-0.012,0.2,-0.15,0.043,-0.12,-0.14,0.27,-0.019,0.25,-0.13,0.045,-0.12,-0.13,-0.1,-0.014,-0.036,-0.12,0.13,-0.14,-0.16,-0.0091,-0.01,0.088,-0.11,0.14,-0.17,-0.45,-0.51,-0.02,0.019,-0.1,0.014,-0.16,-0.5,-0.35,-0.026,0.14,-0.068,0.39,-0.18,-0.48,-0.54,-0.0063,-0.056,-0.12,0.099,-0.2,-0.33,-0.43,-0.037,0.039,-0.097,0.17,0.21,-0.059,-0.066
minTemp1,-0.069,0.71,1.0,-0.32,0.29,-0.17,0.25,-0.093,0.41,0.6,-0.27,0.16,-0.15,-0.02,-0.12,0.41,0.56,-0.29,-0.18,-0.16,-0.13,-0.13,0.3,0.53,-0.24,0.024,-0.18,0.037,-0.14,-0.085,0.64,-0.26,-0.029,-0.16,0.054,-0.16,-0.0099,0.18,-0.2,-0.15,-0.17,-0.08,-0.16,-0.074,0.18,-0.14,0.081,-0.22,-0.0098,-0.17,-0.22,0.26,-0.16,0.069,-0.2,-0.002,-0.18,-0.24,-0.17,-0.11,-0.098,-0.18,0.15,-0.19,-0.36,-0.34,-0.12,-0.076,-0.18,-0.063,-0.22,-0.49,-0.6,-0.094,-0.1,-0.13,-0.035,-0.22,-0.59,-0.46,-0.095,-0.034,-0.095,0.36,-0.25,-0.7,-0.7,-0.055,-0.099,-0.14,0.29,-0.27,-0.61,-0.69,-0.088,-0.039,-0.13,0.27,0.31,0.032,-0.042
maxHum1,0.12,-0.086,-0.32,1.0,0.51,0.094,-0.1,0.13,-0.14,-0.057,0.92,0.6,0.11,-0.011,0.12,-0.14,-0.3,0.91,0.64,0.11,-0.21,0.12,-0.17,-0.26,0.84,0.49,0.12,-0.28,0.13,-0.063,-0.11,0.89,0.67,0.11,0.24,0.14,-0.11,-0.066,0.78,0.61,0.11,0.08,0.13,-0.28,-0.31,0.71,0.53,0.13,0.14,0.14,-0.26,-0.16,0.72,0.62,0.12,-0.073,0.14,-0.32,-0.19,0.65,0.61,0.12,0.24,0.15,0.018,0.077,0.65,0.67,0.14,-0.058,0.15,0.0093,-0.037,0.59,0.56,0.12,-0.079,0.15,-0.058,-0.11,0.59,0.44,0.11,-0.12,0.17,0.12,0.1,0.54,0.51,0.11,-0.28,0.16,0.051,0.18,0.54,0.54,0.12,-0.087,-0.65,-0.072,-0.084
minHum1,0.012,0.17,0.29,0.51,1.0,-0.067,0.28,-0.0023,-0.2,0.3,0.59,0.83,-0.042,0.0082,-0.023,-0.019,0.14,0.59,0.59,-0.062,-0.19,-0.029,0.043,0.15,0.52,0.35,-0.058,-0.22,-0.033,-0.2,0.26,0.56,0.69,-0.063,0.38,-0.038,-0.18,0.14,0.52,0.68,-0.076,0.18,-0.041,-0.28,-0.17,0.4,0.67,-0.078,0.16,-0.042,-0.43,-0.064,0.41,0.57,-0.08,-0.14,-0.045,-0.46,-0.31,0.26,0.54,-0.048,0.22,-0.047,-0.28,-0.24,0.24,0.6,-0.037,-0.092,-0.063,-0.23,-0.27,0.2,0.51,-0.016,-0.055,-0.061,-0.27,-0.29,0.13,0.4,0.0059,0.063,-0.071,-0.25,-0.22,0.13,0.42,-0.021,-0.12,-0.095,-0.28,-0.21,0.057,0.54,-0.019,-0.052,-0.29,0.041,-0.068
meanWind1,0.95,-0.12,-0.17,0.094,-0.067,1.0,0.026,0.96,-0.14,-0.22,0.1,-0.067,0.99,-0.11,0.98,-0.14,-0.019,0.1,0.063,0.99,-0.084,0.98,-0.18,0.11,0.094,-0.028,0.99,0.39,0.97,-0.19,-0.11,0.098,-0.029,0.97,-0.12,0.98,-0.29,-0.07,0.093,0.093,0.97,0.065,0.98,0.0036,-0.0064,0.053,-0.15,0.97,-0.18,0.98,-0.17,-0.03,0.062,0.057,0.96,0.14,0.98,-0.19,0.024,0.024,0.069,0.98,0.13,0.98,-0.12,-0.087,0.017,-0.019,0.96,0.094,0.97,-0.18,0.056,0.015,-0.012,0.95,-0.08,0.97,-0.15,-0.037,0.0023,0.054,0.96,0.1,0.95,-0.054,-0.038,0.008,-0.019,0.93,-0.087,0.94,-0.14,0.007,-0.011,0.018,0.93,-0.048,-0.51,-0.11,0.28
Precip1,0.035,-0.00047,0.25,-0.1,0.28,0.026,1.0,0.029,-0.11,0.33,-0.19,0.19,0.022,0.29,0.026,0.11,0.28,-0.21,-0.066,0.014,0.099,0.019,0.0044,0.35,-0.23,-0.012,0.0074,0.13,0.013,-0.18,0.37,-0.22,0.024,0.018,0.063,-0.0017,-0.1,0.43,-0.22,0.046,-0.0062,0.22,-0.0016,-0.11,0.29,-0.18,0.11,-0.021,0.11,-0.005,-0.3,0.13,-0.17,0.035,-0.012,0.12,-0.012,-0.29,-0.066,-0.093,-0.05,0.0033,0.04,-0.017,-0.22,-0.18,-0.1,-0.15,-0.012,-0.037,-0.031,-0.19,-0.14,-0.083,-0.11,0.012,-0.28,-0.028,0.033,-0.016,-0.022,-0.13,0.038,-0.24,-0.044,-0.11,-0.098,-0.052,-0.16,0.012,-0.099,-0.058,-0.017,-0.068,0.017,-0.012,0.0055,0.1,0.24,-0.044,-0.016
avgTemp2,1.0,-0.053,-0.093,0.13,-0.0023,0.96,0.029,1.0,-0.079,-0.11,0.15,-0.0005,0.97,-0.11,0.98,-0.14,0.025,0.15,0.098,0.95,-0.14,0.98,-0.12,0.17,0.13,0.054,0.94,0.31,0.98,-0.19,-0.028,0.14,0.044,0.92,-0.11,0.98,-0.24,-0.0063,0.13,0.13,0.95,0.042,0.98,-0.044,-0.041,0.087,-0.11,0.94,-0.2,0.98,-0.24,-0.062,0.099,0.095,0.93,0.097,0.98,-0.19,-0.0076,0.051,0.065,0.98,0.14,0.97,-0.11,-0.13,0.047,0.027,0.97,0.072,0.95,-0.22,-0.028,0.039,0.023,0.98,-0.083,0.96,-0.22,-0.11,0.024,0.073,0.99,0.097,0.92,-0.13,-0.13,0.031,0.0024,0.96,-0.096,0.89,-0.21,-0.085,0.0072,0.056,0.95,-0.039,-0.56,-0.091,0.22
maxTemp2,-0.092,0.48,0.41,-0.14,-0.2,-0.14,-0.11,-0.079,1.0,0.53,-0.27,-0.24,-0.12,-0.14,-0.12,0.39,0.25,-0.27,-0.36,-0.13,-0.014,-0.13,0.24,0.2,-0.26,-0.11,-0.14,-0.13,-0.13,0.34,0.35,-0.25,-0.28,-0.16,-0.0097,-0.12,0.21,0.17,-0.27,-0.37,-0.14,-0.14,-0.11,0.096,0.24,-0.16,-0.092,-0.14,-0.19,-0.13,0.11,0.14,-0.16,-0.12,-0.14,-0.051,-0.12,0.093,-0.025,-0.024,-0.28,-0.13,-0.032,-0.13,-0.022,-0.053,-0.03,-0.23,-0.12,-0.24,-0.14,-0.091,-0.26,-0.00027,-0.19,-0.099,-0.071,-0.15,-0.3,-0.33,0.085,-0.25,-0.12,0.21,-0.17,-0.38,-0.46,0.052,-0.43,-0.11,0.17,-0.16,-0.22,-0.36,0.13,-0.24,-0.076,0.22,0.17,-0.12,-0.14
minTemp2,-0.11,0.48,0.6,-0.057,0.3,-0.22,0.33,-0.11,0.53,1.0,-0.092,0.32,-0.19,0.26,-0.17,0.27,0.52,-0.12,-0.056,-0.22,-0.011,-0.19,0.2,0.36,-0.092,-0.045,-0.24,-0.23,-0.19,-0.019,0.52,-0.081,0.041,-0.24,0.062,-0.21,-0.018,0.26,-0.064,-0.078,-0.23,-0.0065,-0.2,-0.16,0.34,0.031,0.19,-0.26,-0.082,-0.22,-0.25,0.21,0.031,0.12,-0.25,0.1,-0.22,-0.22,-0.12,0.11,-0.067,-0.2,0.21,-0.23,-0.39,-0.21,0.12,0.1,-0.2,0.0031,-0.27,-0.39,-0.34,0.15,0.22,-0.15,0.037,-0.26,-0.39,-0.28,0.18,0.095,-0.13,0.1,-0.3,-0.55,-0.55,0.16,-0.08,-0.16,0.089,-0.33,-0.52,-0.56,0.2,-0.012,-0.15,0.061,0.2,-0.066,0.061


In [10]:
# Splitting data set
train_X, test_X = train_test_split(dataSoy14w.drop('GrainYield', axis=1), random_state=1)
train_y, test_y = train_test_split(dataSoy14w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_X = scaler.fit_transform(train_X)
test_scaler_X = scaler.transform(test_X)

**Lasso**

In [11]:
lm = linear_model.Lasso(alpha=0.6)
lm.fit(train_scaler_X,train_y)

y_pred = lm.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

important_features = pd.Series(data=lm.coef_,index=dataSoy14w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 390.89912
Mean squared error (Best is 0) = 253732.24411
Median absolute error regression loss (Best is 0) = 287.81818
Coefficient of determination (Best is 1) = 0.80354
minHum13          792.908200
maxTemp2          479.196186
minHum9           474.560664
minTemp13         419.655378
minHum2           403.936166
minHum12          386.642566
Precip10          378.552783
SystemNameType    317.432448
minTemp7          270.382635
maxTemp3          239.402515
dtype: float64
maxTemp8    -132.005082
minHum5     -133.258074
minHum7     -147.084719
maxTemp14   -167.729345
minTemp9    -244.120128
minHum8     -262.167214
minHum4     -341.496656
minHum1     -441.646256
maxTemp7    -638.903471
maxHum1     -927.862511
dtype: float64




**Decision Tree Regressor**

In [12]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_X,train_y)

y_pred = tree_model.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

mportant_features = pd.Series(data=tree_model.feature_importances_,index=dataSoy14w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 365.44649
Mean squared error (Best is 0) = 234428.06372
Median absolute error regression loss (Best is 0) = 262.75000
Coefficient of determination (Best is 1) = 0.82588
minHum13          792.908200
maxTemp2          479.196186
minHum9           474.560664
minTemp13         419.655378
minHum2           403.936166
minHum12          386.642566
Precip10          378.552783
SystemNameType    317.432448
minTemp7          270.382635
maxTemp3          239.402515
dtype: float64
maxTemp8    -132.005082
minHum5     -133.258074
minHum7     -147.084719
maxTemp14   -167.729345
minTemp9    -244.120128
minHum8     -262.167214
minHum4     -341.496656
minHum1     -441.646256
maxTemp7    -638.903471
maxHum1     -927.862511
dtype: float64


**Random Forest Regressor**

In [13]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_X,train_y)

y_pred = rf_model.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataSoy14w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 358.55076
Mean squared error (Best is 0) = 224661.31477
Median absolute error regression loss (Best is 0) = 284.61338
Coefficient of determination (Best is 1) = 0.83497
minTemp12         0.471747
maxTemp7          0.102974
minTemp5          0.066201
Precip10          0.050307
maxTemp14         0.026128
minTemp1          0.025429
SystemNameType    0.022902
minTemp13         0.018017
minTemp7          0.016534
minHum14          0.014015
dtype: float64
maxHum5      2.196989e-05
avgTemp3     1.117876e-05
maxHum3      1.056859e-05
maxHum12     7.704289e-06
meanWind6    1.909989e-06
maxTemp12    1.164689e-06
maxHum9      1.640632e-07
minHum5      0.000000e+00
Precip3      0.000000e+00
maxHum2      0.000000e+00
dtype: float64


In [14]:
max_depth = 35
n_est = 500
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_X,train_y)

y_pred = rf_model.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataSoy14w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 362.64800
Mean squared error (Best is 0) = 228766.69154
Median absolute error regression loss (Best is 0) = 263.96131
Coefficient of determination (Best is 1) = 0.82962
minTemp12         0.465115
maxTemp7          0.104755
minTemp5          0.086764
Precip10          0.044209
minTemp1          0.027005
SystemNameType    0.017793
maxTemp14         0.015063
minTemp13         0.012376
maxTemp4          0.011785
minTemp7          0.011348
dtype: float64
maxHum3      0.000784
maxHum1      0.000707
maxHum6      0.000693
maxHum4      0.000680
avgTemp12    0.000666
maxTemp10    0.000643
minHum10     0.000576
maxHum5      0.000568
maxHum8      0.000554
maxHum7      0.000530
dtype: float64


# Soybean - 13 weeks

In [15]:
dataSoy13w = pd.read_csv('soyFeatures13w.csv')
corrSoy13w = dataSoy13w.corr()
corrSoy13w.style.background_gradient(cmap='coolwarm').set_precision(2)

Unnamed: 0,avgTemp1,maxTemp1,minTemp1,maxHum1,minHum1,meanWind1,Precip1,avgTemp2,maxTemp2,minTemp2,maxHum2,minHum2,meanWind2,Precip2,avgTemp3,maxTemp3,minTemp3,maxHum3,minHum3,meanWind3,Precip3,avgTemp4,maxTemp4,minTemp4,maxHum4,minHum4,meanWind4,Precip4,avgTemp5,maxTemp5,minTemp5,maxHum5,minHum5,meanWind5,Precip5,avgTemp6,maxTemp6,minTemp6,maxHum6,minHum6,meanWind6,Precip6,avgTemp7,maxTemp7,minTemp7,maxHum7,minHum7,meanWind7,Precip7,avgTemp8,maxTemp8,minTemp8,maxHum8,minHum8,meanWind8,Precip8,avgTemp9,maxTemp9,minTemp9,maxHum9,minHum9,meanWind9,Precip9,avgTemp10,maxTemp10,minTemp10,maxHum10,minHum10,meanWind10,Precip10,avgTemp11,maxTemp11,minTemp11,maxHum11,minHum11,meanWind11,Precip11,avgTemp12,maxTemp12,minTemp12,maxHum12,minHum12,meanWind12,Precip12,avgTemp13,maxTemp13,minTemp13,maxHum13,minHum13,meanWind13,Precip13,GDD,SystemNameType,GrainYield
avgTemp1,1.0,-0.025,-0.069,0.12,0.012,0.95,0.035,1.0,-0.092,-0.11,0.15,0.014,0.96,-0.09,0.97,-0.14,0.032,0.15,0.11,0.93,-0.13,0.97,-0.12,0.18,0.13,0.062,0.93,0.3,0.97,-0.19,-0.0077,0.15,0.054,0.91,-0.11,0.97,-0.24,-0.0094,0.14,0.14,0.93,0.045,0.98,-0.056,-0.045,0.086,-0.1,0.92,-0.19,0.97,-0.25,-0.05,0.098,0.11,0.91,0.089,0.97,-0.19,-0.013,0.049,0.072,0.97,0.16,0.96,-0.12,-0.14,0.045,0.037,0.96,0.089,0.94,-0.24,-0.053,0.036,0.024,0.98,-0.072,0.94,-0.24,-0.12,0.02,0.089,0.99,0.11,0.91,-0.14,-0.14,0.029,0.021,0.95,-0.088,-0.55,-0.074,0.23
maxTemp1,-0.025,1.0,0.71,-0.086,0.17,-0.12,-0.00047,-0.053,0.48,0.48,-0.038,0.11,-0.1,-0.058,-0.076,0.3,0.25,-0.044,-0.083,-0.11,-0.065,-0.079,0.29,0.41,-0.036,0.093,-0.11,-0.073,-0.086,0.18,0.65,-0.038,0.085,-0.096,0.067,-0.11,0.15,0.2,-0.034,-0.033,-0.12,-0.15,-0.12,-0.028,0.058,-0.012,0.2,-0.15,0.043,-0.12,-0.14,0.27,-0.019,0.25,-0.13,0.045,-0.12,-0.13,-0.1,-0.014,-0.036,-0.12,0.13,-0.14,-0.16,-0.0091,-0.01,0.088,-0.11,0.14,-0.17,-0.45,-0.51,-0.02,0.019,-0.1,0.014,-0.16,-0.5,-0.35,-0.026,0.14,-0.068,0.39,-0.18,-0.48,-0.54,-0.0063,-0.056,-0.12,0.099,0.21,-0.059,-0.066
minTemp1,-0.069,0.71,1.0,-0.32,0.29,-0.17,0.25,-0.093,0.41,0.6,-0.27,0.16,-0.15,-0.02,-0.12,0.41,0.56,-0.29,-0.18,-0.16,-0.13,-0.13,0.3,0.53,-0.24,0.024,-0.18,0.037,-0.14,-0.085,0.64,-0.26,-0.029,-0.16,0.054,-0.16,-0.0099,0.18,-0.2,-0.15,-0.17,-0.08,-0.16,-0.074,0.18,-0.14,0.081,-0.22,-0.0098,-0.17,-0.22,0.26,-0.16,0.069,-0.2,-0.002,-0.18,-0.24,-0.17,-0.11,-0.098,-0.18,0.15,-0.19,-0.36,-0.34,-0.12,-0.076,-0.18,-0.063,-0.22,-0.49,-0.6,-0.094,-0.1,-0.13,-0.035,-0.22,-0.59,-0.46,-0.095,-0.034,-0.095,0.36,-0.25,-0.7,-0.7,-0.055,-0.099,-0.14,0.29,0.31,0.032,-0.042
maxHum1,0.12,-0.086,-0.32,1.0,0.51,0.094,-0.1,0.13,-0.14,-0.057,0.92,0.6,0.11,-0.011,0.12,-0.14,-0.3,0.91,0.64,0.11,-0.21,0.12,-0.17,-0.26,0.84,0.49,0.12,-0.28,0.13,-0.063,-0.11,0.89,0.67,0.11,0.24,0.14,-0.11,-0.066,0.78,0.61,0.11,0.08,0.13,-0.28,-0.31,0.71,0.53,0.13,0.14,0.14,-0.26,-0.16,0.72,0.62,0.12,-0.073,0.14,-0.32,-0.19,0.65,0.61,0.12,0.24,0.15,0.018,0.077,0.65,0.67,0.14,-0.058,0.15,0.0093,-0.037,0.59,0.56,0.12,-0.079,0.15,-0.058,-0.11,0.59,0.44,0.11,-0.12,0.17,0.12,0.1,0.54,0.51,0.11,-0.28,-0.65,-0.072,-0.084
minHum1,0.012,0.17,0.29,0.51,1.0,-0.067,0.28,-0.0023,-0.2,0.3,0.59,0.83,-0.042,0.0082,-0.023,-0.019,0.14,0.59,0.59,-0.062,-0.19,-0.029,0.043,0.15,0.52,0.35,-0.058,-0.22,-0.033,-0.2,0.26,0.56,0.69,-0.063,0.38,-0.038,-0.18,0.14,0.52,0.68,-0.076,0.18,-0.041,-0.28,-0.17,0.4,0.67,-0.078,0.16,-0.042,-0.43,-0.064,0.41,0.57,-0.08,-0.14,-0.045,-0.46,-0.31,0.26,0.54,-0.048,0.22,-0.047,-0.28,-0.24,0.24,0.6,-0.037,-0.092,-0.063,-0.23,-0.27,0.2,0.51,-0.016,-0.055,-0.061,-0.27,-0.29,0.13,0.4,0.0059,0.063,-0.071,-0.25,-0.22,0.13,0.42,-0.021,-0.12,-0.29,0.041,-0.068
meanWind1,0.95,-0.12,-0.17,0.094,-0.067,1.0,0.026,0.96,-0.14,-0.22,0.1,-0.067,0.99,-0.11,0.98,-0.14,-0.019,0.1,0.063,0.99,-0.084,0.98,-0.18,0.11,0.094,-0.028,0.99,0.39,0.97,-0.19,-0.11,0.098,-0.029,0.97,-0.12,0.98,-0.29,-0.07,0.093,0.093,0.97,0.065,0.98,0.0036,-0.0064,0.053,-0.15,0.97,-0.18,0.98,-0.17,-0.03,0.062,0.057,0.96,0.14,0.98,-0.19,0.024,0.024,0.069,0.98,0.13,0.98,-0.12,-0.087,0.017,-0.019,0.96,0.094,0.97,-0.18,0.056,0.015,-0.012,0.95,-0.08,0.97,-0.15,-0.037,0.0023,0.054,0.96,0.1,0.95,-0.054,-0.038,0.008,-0.019,0.93,-0.087,-0.51,-0.11,0.28
Precip1,0.035,-0.00047,0.25,-0.1,0.28,0.026,1.0,0.029,-0.11,0.33,-0.19,0.19,0.022,0.29,0.026,0.11,0.28,-0.21,-0.066,0.014,0.099,0.019,0.0044,0.35,-0.23,-0.012,0.0074,0.13,0.013,-0.18,0.37,-0.22,0.024,0.018,0.063,-0.0017,-0.1,0.43,-0.22,0.046,-0.0062,0.22,-0.0016,-0.11,0.29,-0.18,0.11,-0.021,0.11,-0.005,-0.3,0.13,-0.17,0.035,-0.012,0.12,-0.012,-0.29,-0.066,-0.093,-0.05,0.0033,0.04,-0.017,-0.22,-0.18,-0.1,-0.15,-0.012,-0.037,-0.031,-0.19,-0.14,-0.083,-0.11,0.012,-0.28,-0.028,0.033,-0.016,-0.022,-0.13,0.038,-0.24,-0.044,-0.11,-0.098,-0.052,-0.16,0.012,-0.099,0.24,-0.044,-0.016
avgTemp2,1.0,-0.053,-0.093,0.13,-0.0023,0.96,0.029,1.0,-0.079,-0.11,0.15,-0.0005,0.97,-0.11,0.98,-0.14,0.025,0.15,0.098,0.95,-0.14,0.98,-0.12,0.17,0.13,0.054,0.94,0.31,0.98,-0.19,-0.028,0.14,0.044,0.92,-0.11,0.98,-0.24,-0.0063,0.13,0.13,0.95,0.042,0.98,-0.044,-0.041,0.087,-0.11,0.94,-0.2,0.98,-0.24,-0.062,0.099,0.095,0.93,0.097,0.98,-0.19,-0.0076,0.051,0.065,0.98,0.14,0.97,-0.11,-0.13,0.047,0.027,0.97,0.072,0.95,-0.22,-0.028,0.039,0.023,0.98,-0.083,0.96,-0.22,-0.11,0.024,0.073,0.99,0.097,0.92,-0.13,-0.13,0.031,0.0024,0.96,-0.096,-0.56,-0.091,0.22
maxTemp2,-0.092,0.48,0.41,-0.14,-0.2,-0.14,-0.11,-0.079,1.0,0.53,-0.27,-0.24,-0.12,-0.14,-0.12,0.39,0.25,-0.27,-0.36,-0.13,-0.014,-0.13,0.24,0.2,-0.26,-0.11,-0.14,-0.13,-0.13,0.34,0.35,-0.25,-0.28,-0.16,-0.0097,-0.12,0.21,0.17,-0.27,-0.37,-0.14,-0.14,-0.11,0.096,0.24,-0.16,-0.092,-0.14,-0.19,-0.13,0.11,0.14,-0.16,-0.12,-0.14,-0.051,-0.12,0.093,-0.025,-0.024,-0.28,-0.13,-0.032,-0.13,-0.022,-0.053,-0.03,-0.23,-0.12,-0.24,-0.14,-0.091,-0.26,-0.00027,-0.19,-0.099,-0.071,-0.15,-0.3,-0.33,0.085,-0.25,-0.12,0.21,-0.17,-0.38,-0.46,0.052,-0.43,-0.11,0.17,0.17,-0.12,-0.14
minTemp2,-0.11,0.48,0.6,-0.057,0.3,-0.22,0.33,-0.11,0.53,1.0,-0.092,0.32,-0.19,0.26,-0.17,0.27,0.52,-0.12,-0.056,-0.22,-0.011,-0.19,0.2,0.36,-0.092,-0.045,-0.24,-0.23,-0.19,-0.019,0.52,-0.081,0.041,-0.24,0.062,-0.21,-0.018,0.26,-0.064,-0.078,-0.23,-0.0065,-0.2,-0.16,0.34,0.031,0.19,-0.26,-0.082,-0.22,-0.25,0.21,0.031,0.12,-0.25,0.1,-0.22,-0.22,-0.12,0.11,-0.067,-0.2,0.21,-0.23,-0.39,-0.21,0.12,0.1,-0.2,0.0031,-0.27,-0.39,-0.34,0.15,0.22,-0.15,0.037,-0.26,-0.39,-0.28,0.18,0.095,-0.13,0.1,-0.3,-0.55,-0.55,0.16,-0.08,-0.16,0.089,0.2,-0.066,0.061


In [16]:
# Splitting data set
train_X, test_X = train_test_split(dataSoy13w.drop('GrainYield', axis=1), random_state=1)
train_y, test_y = train_test_split(dataSoy13w['GrainYield'], random_state=1)

# Apply Robust Scaler
scaler = RobustScaler()
train_scaler_X = scaler.fit_transform(train_X)
test_scaler_X = scaler.transform(test_X)

**Lasso**

In [17]:
lm = linear_model.Lasso(alpha=0.6)
lm.fit(train_scaler_X,train_y)

y_pred = lm.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

important_features = pd.Series(data=lm.coef_,index=dataSoy13w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 389.87393
Mean squared error (Best is 0) = 252001.47550
Median absolute error regression loss (Best is 0) = 294.09851
Coefficient of determination (Best is 1) = 0.80447
minHum13          887.653159
minTemp13         459.505181
maxTemp2          448.715699
minHum2           441.714933
Precip10          436.093006
minHum9           409.058208
minHum12          390.080021
maxTemp3          382.492398
SystemNameType    322.838292
minTemp7          246.006172
dtype: float64
Precip11     -136.419277
minHum5      -151.860087
minTemp10    -169.524439
maxTemp8     -186.589643
minTemp4     -203.527754
minHum4      -240.166007
minHum7      -258.081018
minHum1      -399.522606
maxTemp7     -673.728597
maxHum1     -1031.319931
dtype: float64




**Decision Tree Regressor**

In [18]:
tree_model = DecisionTreeRegressor()
tree_model.fit(train_scaler_X,train_y)

y_pred = tree_model.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

mportant_features = pd.Series(data=tree_model.feature_importances_,index=dataSoy13w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 361.58738
Mean squared error (Best is 0) = 225529.12061
Median absolute error regression loss (Best is 0) = 262.75000
Coefficient of determination (Best is 1) = 0.83244
minHum13          887.653159
minTemp13         459.505181
maxTemp2          448.715699
minHum2           441.714933
Precip10          436.093006
minHum9           409.058208
minHum12          390.080021
maxTemp3          382.492398
SystemNameType    322.838292
minTemp7          246.006172
dtype: float64
Precip11     -136.419277
minHum5      -151.860087
minTemp10    -169.524439
maxTemp8     -186.589643
minTemp4     -203.527754
minHum4      -240.166007
minHum7      -258.081018
minHum1      -399.522606
maxTemp7     -673.728597
maxHum1     -1031.319931
dtype: float64


**Random Forest Regressor**

In [19]:
rf_model = RandomForestRegressor()
rf_model.fit(train_scaler_X,train_y)

y_pred = rf_model.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataSoy13w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 366.67453
Mean squared error (Best is 0) = 234401.73314
Median absolute error regression loss (Best is 0) = 252.27839
Coefficient of determination (Best is 1) = 0.82666
minTemp12         0.477338
minTemp5          0.112309
maxTemp7          0.107469
Precip10          0.027875
meanWind9         0.025533
minTemp1          0.021164
SystemNameType    0.019899
Precip13          0.019133
minHum5           0.018257
minTemp13         0.016102
dtype: float64
maxHum6      0.000094
maxHum7      0.000091
minTemp6     0.000061
Precip1      0.000060
minHum10     0.000048
Precip12     0.000036
avgTemp13    0.000036
minHum3      0.000014
maxTemp3     0.000003
maxHum4      0.000001
dtype: float64


In [20]:
max_depth = 35
n_est = 500
rf_model = RandomForestRegressor(n_estimators=n_est,max_depth=max_depth,random_state=0)
rf_model.fit(train_scaler_X,train_y)

y_pred = rf_model.predict(test_scaler_X)

modelEvaluation(test_y, y_pred)

important_features = pd.Series(data=rf_model.feature_importances_,index=dataSoy13w.drop('GrainYield', axis=1).columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features[:10])
print(important_features[-10:])

Mean absolute error regression loss (Best is 0) = 362.66471
Mean squared error (Best is 0) = 230049.62658
Median absolute error regression loss (Best is 0) = 263.96131
Coefficient of determination (Best is 1) = 0.82870
minTemp12         0.465697
maxTemp7          0.105868
minTemp5          0.089433
Precip10          0.047392
minTemp1          0.027211
SystemNameType    0.017850
minTemp13         0.017707
meanWind13        0.013905
maxTemp4          0.012952
minTemp7          0.012174
dtype: float64
minHum3      0.000883
maxHum9      0.000869
minHum10     0.000829
minHum4      0.000820
maxHum3      0.000742
maxHum4      0.000722
maxHum7      0.000681
maxHum2      0.000678
maxTemp10    0.000643
maxHum12     0.000640
dtype: float64
