In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import statistics
from math import sqrt
import sklearn
from sklearn import metrics
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker


In [None]:
#data on atmospheric carbon dioxide concentrations 
co2 = pd.read_excel("co2ppm.xlsx")

y1 = co2['co2_ppm'].loc[:40]
y2 = co2['co2_ppm'].loc[40:]

plt.plot(co2['co2_ppm'].loc[:40], label="Past Data")
plt.plot(co2['co2_ppm'].loc[40:], label="Projected Data", color='red')
plt.xticks(np.arange(50), range(1980, 2030), rotation=90)
ax=plt.gca()
[label.set_visible(False) for (index,label) in enumerate(ax.xaxis.get_ticklabels()) if index % 2 != 0]
plt.legend(loc="upper left")
plt.ylabel("Atmospheric CO2 Concentrations (ppm)")
annual_co2 = co2.set_index('Year')
print(annual_co2)

In [None]:
#data on soybean yields 
soy_yield = pd.read_excel("annual_midwest_soybean_yield.xlsx")

#soy_yield.plot(x ='Year', y='Soybean Yield (bushels/acre)', title = "Annual Soybean Yield in the US Midwest", kind = 'line')
#annual_soy_yield = soy_yield.set_index('Year')

tick_spacing = 2
fig, ax = plt.subplots(1,1)
ax.plot(soy_yield['Year'], soy_yield['Soybean Yield (bushels/acre)'], color='green')
ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))
plt.xlabel('Year')
tick_spacing = 3
ax.yaxis.set_major_locator(ticker.MultipleLocator(tick_spacing))
plt.ylabel('Soybean Yield (bushels/acre)')
plt.xticks(rotation=90)
ax.set_title("Annual Soybean Yield in the US Midwest")
plt.grid()
plt.show()

In [None]:
temps_precip = pd.read_excel("avg_temps_precip.xlsx")
temps_precip
#group the different months together in order of year
months = temps_precip.sort_values(['Year'],ascending=True).groupby('Month').plot(x='Year', kind='line')

In [None]:
climate_yearly = temps_precip.groupby('Year')
for key, item in climate_yearly:
    print(climate_yearly.get_group(key), "\n\n")

In [23]:
projected_temps_precip = pd.read_excel("projected_temp_precip.xlsx")
projected_temps_precip

Unnamed: 0,Year,Projected Annual Temperature Increase (celsius),Projected Annual Precipitation Increase Spring(%),Projected Annual Precipitation Increase Summer (%),Projected Annual Precipitation Increase Fall (%)
0,2020,0.06,1,0.625,0.2
1,2021,0.12,2,1.25,0.4
2,2022,0.18,3,1.875,0.6
3,2023,0.24,4,2.5,0.8
4,2024,0.3,5,3.125,1.0
5,2025,0.36,6,4.375,1.2
6,2026,0.42,7,5.0,1.4
7,2027,0.6,10,5.625,1.6
8,2028,0.66,11,6.25,1.8


In [2]:
all_data = pd.read_excel("all_data.xlsx")

np_all_data = all_data.to_numpy()
X = np_all_data[:278, :4] 
y = np_all_data[:278, 5:]

X_projected = np_all_data[278:, :4]

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,test_size=0.2)
regressor = LinearRegression()  
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df

Unnamed: 0,Actual,Predicted
0,25.8,28.637925
1,35.6,36.880764
2,27.5,29.732152
3,39.5,43.976502
4,46.5,49.474697
5,44.0,45.040497
6,52.6,48.330597
7,34.0,34.128534
8,44.6,46.73323
9,34.2,31.925117


In [3]:
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
print('Root Mean Squared Error:', sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))

Mean Squared Error: 12.3574703745019
Root Mean Squared Error: 3.515319384423256
Mean Absolute Error: 2.6936967046073135


In [None]:
#graph the results
plt.plot(y_test, marker='', color='blue', linewidth=2, label='actual')
plt.plot(y_pred, marker='', color='red', linewidth=2, label='predicted')
plt.legend()
plt.figure(figsize=(50,20))
plt.show()

In [None]:
y_projected = regressor.predict(X_projected)
df = pd.DataFrame({'Projected': y_projected.flatten()})
df

In [5]:
#with outliers removed 
#this line taken from: https://stackoverflow.com/questions/23199796/detect-and-exclude-outliers-in-pandas-data-frame
reg_data = all_data[all_data.apply(lambda x: np.abs(x - x.mean()) / x.std() < 3).all(axis=1)]
reg_data

np_reg_data = reg_data.to_numpy()
X = np_reg_data[:277, :4] 
y = np_reg_data[:277, 5:]

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,test_size=0.2)
regressor = LinearRegression()  
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df

Unnamed: 0,Actual,Predicted
0,48.0,40.106304
1,27.5,29.563836
2,42.0,42.858884
3,37.5,34.856523
4,32.5,35.814285
5,35.6,36.860831
6,52.6,47.810737
7,32.5,32.473782
8,32.5,35.235632
9,49.0,38.376389


In [6]:
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) 
print('Root Mean Squared Error:', sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))

Mean Squared Error: 16.268039916477477
Root Mean Squared Error: 4.033365829735443
Mean Absolute Error: 2.781750723892427


In [None]:
#graph the results
plt.plot(y_test, marker='', color='blue', linewidth=2, label='actual')
plt.plot(y_pred, marker='', color='red', linewidth=2, label='predicted')
plt.legend()
plt.figure(figsize=(50,20))
plt.show()

In [None]:
y_projected = regressor.predict(X_projected)
df = pd.DataFrame({'Projected': y_projected.flatten()})
df

In [7]:
#ridge regression
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=0.01) 
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,test_size=0.2)
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df

Unnamed: 0,Actual,Predicted
0,38.0,39.481205
1,46.9,50.213504
2,44.0,45.099955
3,39.5,44.065794
4,42.0,45.775269
5,42.0,43.217716
6,49.5,48.331619
7,43.0,42.7379
8,25.8,28.149467
9,48.0,48.273459


In [8]:
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) 
print('Root Mean Squared Error:', sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))

Mean Squared Error: 15.87674284104554
Root Mean Squared Error: 3.9845630677711124
Mean Absolute Error: 2.7670255152053196


In [None]:
#graph the results
plt.plot(y_test, marker='', color='blue', linewidth=2, label='actual')
plt.plot(y_pred, marker='', color='red', linewidth=2, label='predicted')
plt.legend()
plt.figure(figsize=(50,20))
plt.show()

In [None]:
y_projected = ridge.predict(X_projected)
df = pd.DataFrame({'Projected': y_projected.flatten()})
df

In [14]:
#lasso regression
from sklearn.linear_model import Lasso

lasso = Lasso()
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,test_size=0.2)
lasso.fit(X_train,y_train)
y_pred = lasso.predict(X_test)
coeff_used = np.sum(lasso.coef_!=0)
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) 
print('Root Mean Squared Error:', sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df


Mean Squared Error: 14.47606390654438
Root Mean Squared Error: 3.8047422917386116
Mean Absolute Error: 2.8830532563472007


Unnamed: 0,Actual,Predicted
0,27.5,29.906981
1,25.8,29.129235
2,46.5,48.813947
3,46.9,49.184169
4,39.5,43.357891
5,48.0,47.453735
6,30.0,32.798855
7,33.6,31.515067
8,42.3,36.567009
9,43.7,41.58327


In [None]:
#graph the results
plt.plot(y_test, marker='', color='blue', linewidth=2, label='actual')
plt.plot(y_pred, marker='', color='red', linewidth=2, label='predicted')
plt.legend()
plt.figure(figsize=(50,20))
plt.show()

In [None]:
y_projected = lasso.predict(X_projected)
df = pd.DataFrame({'Projected': y_projected.flatten()})
df

In [11]:
#ElasticNet 
from sklearn.linear_model import ElasticNet

In [12]:
enet = ElasticNet(alpha=0.25, l1_ratio=0.7)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,test_size=0.2)
enet.fit(X_train, y_train)
y_pred = enet.predict(X_test)
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df

Unnamed: 0,Actual,Predicted
0,42.0,45.678742
1,36.2,39.167819
2,40.0,46.007475
3,43.0,42.565729
4,26.9,31.29184
5,42.0,45.252378
6,26.9,31.064236
7,46.5,49.846148
8,47.8,47.252802
9,34.7,34.600709


In [13]:
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) 
print('Root Mean Squared Error:', sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))

Mean Squared Error: 14.61317388478071
Root Mean Squared Error: 3.822718127822232
Mean Absolute Error: 2.6407500629793264


In [None]:
#graph the results
plt.plot(y_test, marker='', color='blue', linewidth=2, label='actual')
plt.plot(y_pred, marker='', color='red', linewidth=2, label='predicted')
plt.legend()
plt.figure(figsize=(50,20))
plt.show()

In [None]:
y_projected = enet.predict(X_projected)
df = pd.DataFrame({'Projected': y_projected.flatten()})
df

In [15]:
enet = ElasticNet(alpha=0.5, l1_ratio=0.7)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,test_size=0.2)
enet.fit(X_train, y_train)
y_pred = enet.predict(X_test)
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df

Unnamed: 0,Actual,Predicted
0,44.6,46.176689
1,43.0,42.524957
2,25.8,28.658675
3,39.5,39.956943
4,36.2,38.790545
5,27.5,29.785347
6,36.2,38.839369
7,43.7,41.456232
8,26.9,30.87883
9,49.0,38.389967


In [16]:
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) 
print('Root Mean Squared Error:', sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))

Mean Squared Error: 13.87096243315793
Root Mean Squared Error: 3.7243740995176533
Mean Absolute Error: 2.716061543059147


In [None]:
#graph the results
plt.plot(y_test, marker='', color='blue', linewidth=2, label='actual')
plt.plot(y_pred, marker='', color='red', linewidth=2, label='predicted')
plt.legend()
plt.figure(figsize=(50,20))
plt.show()

In [None]:
y_projected = enet.predict(X_projected)
df = pd.DataFrame({'Projected': y_projected.flatten()})
df

In [21]:
enet = ElasticNet(alpha=0.75, l1_ratio=0.7)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,test_size=0.2)
enet.fit(X_train, y_train)
y_pred = enet.predict(X_test)
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
df

Unnamed: 0,Actual,Predicted
0,48.0,47.211697
1,48.0,47.325907
2,49.5,48.360787
3,26.9,30.956082
4,32.5,35.65543
5,34.2,31.957559
6,32.5,33.575517
7,37.5,35.192806
8,33.6,31.519206
9,48.0,47.314486


In [None]:
#graph the results
plt.plot(y_test, marker='', color='blue', linewidth=2, label='actual')
plt.plot(y_pred, marker='', color='red', linewidth=2, label='predicted')
plt.legend()
plt.figure(figsize=(50,20))
plt.show()

In [22]:
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) 
print('Root Mean Squared Error:', sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))

Mean Squared Error: 9.908161529078754
Root Mean Squared Error: 3.147723229427701
Mean Absolute Error: 2.268331672957128


In [None]:
y_projected = enet.predict(X_projected)
df = pd.DataFrame({'Projected': y_projected.flatten()})
df