**Loading packages and cleaning**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error

In [2]:
aafc_data=pd.read_csv("aafc_data.txt",index_col=None)
aafc_data.head()

Unnamed: 0.1,Unnamed: 0,TWP_ID,ECODISTRICT_ID,YEAR,YieldKgAcre,SumPcpn18_20,SumPcpn19_21,SumPcpn20_22,SumPcpn21_23,SumPcpn22_24,...,SoilMoisture29_31,SoilMoisture30_32,SoilMoisture31_33,SoilMoisture32_34,SoilMoisture33_35,SoilMoisture34_36,SoilMoisture35_37,SoilMoisture36_38,SoilMoisture37_39,SoilMoisture38_40
0,0,00101E1,852.0,2010,867.766846,53.6,111.1,109.7,117.9,46.4,...,16.960125,18.766207,17.186998,15.461519,19.738222,22.958089,27.206203,26.480087,28.678156,26.308484
1,1,00101W1,852.0,2010,673.685028,57.2,114.7,110.5,114.0,46.2,...,16.32852,17.926029,16.787544,14.779726,20.245149,23.608204,28.56099,27.324254,29.079177,26.927224
2,2,00101W2,796.0,2010,824.303864,39.0,96.4,109.8,101.2,111.4,...,13.117879,12.869142,12.831834,14.126196,16.385776,18.650751,20.287069,20.514132,19.564788,16.681692
3,3,00102E1,853.0,2010,1006.708496,37.5,158.2,157.8,161.4,46.9,...,17.060778,18.699156,17.345822,15.998957,20.091525,22.761273,26.33743,25.559602,27.611729,25.575794
4,4,00102W1,852.0,2010,869.040283,57.2,114.7,110.5,114.0,46.2,...,16.050993,17.55686,16.612026,14.48015,20.467884,23.893858,29.156274,27.695178,29.255386,27.199097


In [82]:
aafc_data.groupby('ECODISTRICT_ID')['TWP_ID'].nunique()

ECODISTRICT_ID
375.0     24
379.0      1
647.0      1
652.0      3
657.0      5
659.0      1
660.0      4
661.0     11
662.0      1
668.0      1
669.0      9
672.0      6
677.0      5
680.0     20
682.0     10
685.0     15
686.0      2
687.0     34
689.0     24
690.0     18
691.0      5
693.0     23
694.0     22
695.0     16
696.0     39
697.0     17
698.0     16
699.0      6
700.0     15
701.0     31
702.0     25
704.0     20
705.0     31
706.0     27
707.0     48
709.0     83
710.0     16
711.0     23
714.0     24
715.0     19
716.0     12
717.0     32
718.0      3
720.0      2
723.0     41
724.0     45
726.0     33
729.0     86
733.0     39
734.0     14
735.0     25
736.0     43
739.0     11
741.0     26
742.0      7
743.0      5
745.0    100
747.0     19
748.0    109
749.0     67
751.0     21
752.0    101
753.0     86
754.0     36
755.0     30
756.0     57
757.0     20
758.0     34
759.0      5
760.0     78
761.0      7
762.0     14
763.0     42
764.0     14
765.0     54
766.0     

In [83]:
pd.set_option('display.max_rows', None)
aafc_data.groupby(['ECODISTRICT_ID']).size()

ECODISTRICT_ID
375.0     264
379.0      11
647.0      11
652.0      33
657.0      55
659.0      11
660.0      44
661.0     119
662.0      11
668.0      11
669.0      99
672.0      66
677.0      55
680.0     220
682.0     110
685.0     165
686.0      22
687.0     374
689.0     264
690.0     198
691.0      55
693.0     253
694.0     242
695.0     176
696.0     429
697.0     187
698.0     176
699.0      66
700.0     165
701.0     341
702.0     275
704.0     220
705.0     341
706.0     297
707.0     528
709.0     913
710.0     176
711.0     253
714.0     264
715.0     209
716.0     132
717.0     352
718.0      33
720.0      22
723.0     451
724.0     495
726.0     363
729.0     946
733.0     429
734.0     154
735.0     275
736.0     473
739.0     121
741.0     286
742.0      77
743.0      55
745.0    1100
747.0     209
748.0    1199
749.0     737
751.0     231
752.0    1111
753.0     946
754.0     396
755.0     330
756.0     627
757.0     220
758.0     374
759.0      55
760.0     858
761.0

# LASSO for Ecodistrict 748

In [4]:
aafc_data_748=aafc_data[aafc_data['ECODISTRICT_ID']==748.0]

In [5]:
# Labels are the values we want to predict
labels = np.array(aafc_data_748['YieldKgAcre'])
# Remove the labels from the features
# axis 1 refers to the columns
features= aafc_data_748.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)
# Saving feature names for later use
feature_list = list(features.columns)
# Convert to numpy array
features = np.array(features)

In [6]:
# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)

In [7]:
scaler = StandardScaler().fit(train_features)

In [8]:
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [9]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.13700269991838376)


  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.13700269991838376)

In [10]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 84.25
R squared test set 82.26


In [11]:
# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train,squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred,squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 77.64
MSE test set 84.29


In [12]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 65.8 degrees.


In [13]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 90.3 %.


# LASSO for Ecodistrict 826

In [14]:
aafc_data_826=aafc_data[aafc_data['ECODISTRICT_ID']==826.0]

labels = np.array(aafc_data_826['YieldKgAcre'])

features= aafc_data_826.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)

feature_list = list(features.columns)

features = np.array(features)

In [15]:

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [16]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.1364611722810638)


  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.1364611722810638)

In [17]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 76.43
R squared test set 68.61


In [18]:

# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train,squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred,squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 100.08
MSE test set 112.91


In [19]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 83.58 degrees.


In [20]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 86.38 %.


# LASSO for Ecodistrict 752

In [21]:
aafc_data_752=aafc_data[aafc_data['ECODISTRICT_ID']==752.0]

labels = np.array(aafc_data_752['YieldKgAcre'])

features= aafc_data_752.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)

feature_list = list(features.columns)

features = np.array(features)

In [22]:

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [23]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.19329979313638276)


  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.19329979313638276)

In [24]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 84.81
R squared test set 81.22


In [26]:
from sklearn.metrics import mean_squared_error

# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train,squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred,squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 81.46
MSE test set 90.96


In [27]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 71.35 degrees.


In [28]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 90.17 %.


# LASSO for Ecodistrict 745

In [29]:
aafc_data_745=aafc_data[aafc_data['ECODISTRICT_ID']==745.0]

labels = np.array(aafc_data_745['YieldKgAcre'])

features= aafc_data_745.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)

feature_list = list(features.columns)

features = np.array(features)

In [30]:

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [32]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.29520255449486854)


  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.29520255449486854)

In [33]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 88.82
R squared test set 87.02


In [36]:
from sklearn.metrics import mean_squared_error

# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train, squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred, squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 75.47
MSE test set 86.12


In [37]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 65.93 degrees.


In [38]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 91.95 %.


# LASSO for Ecodistrict 808

In [39]:
aafc_data_808=aafc_data[aafc_data['ECODISTRICT_ID']==808.0]

labels = np.array(aafc_data_808['YieldKgAcre'])

features= aafc_data_808.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)

feature_list = list(features.columns)

features = np.array(features)

In [40]:

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [41]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.3922599255959099)


Lasso(alpha=0.3922599255959099)

In [42]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 69.21
R squared test set 67.26


In [44]:
from sklearn.metrics import mean_squared_error

# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train,squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred,squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 111.02
MSE test set 119.34


In [45]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 94.65 degrees.


In [46]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 86.83 %.


# LASSO for Ecodistrict 792

In [47]:
aafc_data_792=aafc_data[aafc_data['ECODISTRICT_ID']==792.0]

labels = np.array(aafc_data_792['YieldKgAcre'])

features= aafc_data_792.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)

feature_list = list(features.columns)

features = np.array(features)

In [48]:

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [49]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.2741273855697071)


  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.2741273855697071)

In [50]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 78.87
R squared test set 72.17


In [51]:
from sklearn.metrics import mean_squared_error

# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train,squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred,squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 94.39
MSE test set 106.76


In [52]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 83.5 degrees.


In [53]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 89.21 %.


# LASSO for Ecodistrict 849

In [54]:
aafc_data_849=aafc_data[aafc_data['ECODISTRICT_ID']==849.0]

labels = np.array(aafc_data_849['YieldKgAcre'])

features= aafc_data_849.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)

feature_list = list(features.columns)

features = np.array(features)

In [55]:

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [56]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.18960732797695926)


  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.18960732797695926)

In [57]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 90.06
R squared test set 86.2


In [58]:
from sklearn.metrics import mean_squared_error

# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train,squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred,squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 80.64
MSE test set 97.19


In [59]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 73.91 degrees.


In [60]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 89.21 %.


# LASSO for Ecodistrict 729

In [61]:
aafc_data_729=aafc_data[aafc_data['ECODISTRICT_ID']==729.0]

labels = np.array(aafc_data_729['YieldKgAcre'])

features= aafc_data_729.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)

feature_list = list(features.columns)

features = np.array(features)

In [62]:

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [63]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.11212055741708106)


  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.11212055741708106)

In [64]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 81.52
R squared test set 66.29


In [65]:
from sklearn.metrics import mean_squared_error

# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train,squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred,squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 75.81
MSE test set 85.2


In [66]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 65.27 degrees.


In [67]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 92.86 %.


# LASSO for Ecodistrict 753

In [68]:
aafc_data_753=aafc_data[aafc_data['ECODISTRICT_ID']==753.0]

labels = np.array(aafc_data_753['YieldKgAcre'])

features= aafc_data_753.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)

feature_list = list(features.columns)

features = np.array(features)

In [69]:

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [70]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.13534582567858963)


  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.13534582567858963)

In [71]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 89.44
R squared test set 82.58


In [72]:
from sklearn.metrics import mean_squared_error

# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train,squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred,squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 58.3
MSE test set 69.73


In [73]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 55.03 degrees.


In [74]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 93.86 %.


# LASSO for Ecodistrict 709

In [75]:
aafc_data_709=aafc_data[aafc_data['ECODISTRICT_ID']==709.0]

labels = np.array(aafc_data_709['YieldKgAcre'])

features= aafc_data_709.drop(['YieldKgAcre','YEAR','TWP_ID','Unnamed: 0','ECODISTRICT_ID'], axis = 1)

feature_list = list(features.columns)

features = np.array(features)

In [76]:

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 42)
scaler = StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)

test_features = scaler.transform(test_features)

In [77]:
model = LassoCV(cv=10, random_state=0, max_iter=10000)
model.fit(train_features, train_labels)
lasso_best = Lasso(alpha=model.alpha_)
print(lasso_best)
lasso_best.fit(train_features, train_labels)

Lasso(alpha=0.16865185704287242)


  model = cd_fast.enet_coordinate_descent(


Lasso(alpha=0.16865185704287242)

In [78]:
print('R squared training set', round(lasso_best.score(train_features, train_labels)*100, 2))
print('R squared test set', round(lasso_best.score(test_features, test_labels)*100, 2))


R squared training set 88.65
R squared test set 82.8


In [79]:
from sklearn.metrics import mean_squared_error

# Training data
pred_train = lasso_best.predict(train_features)
mse_train = mean_squared_error(train_labels, pred_train,squared=False)
print('MSE training set', round(mse_train, 2))

# Test data
pred = lasso_best.predict(test_features)
mse_test =mean_squared_error(test_labels, pred,squared=False)
print('MSE test set', round(mse_test, 2))

MSE training set 81.82
MSE test set 103.32


In [80]:

# Calculate the absolute errors
errors = abs(pred - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 77.41 degrees.


In [81]:
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / test_labels)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 90.63 %.
