In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
df = pd.read_csv(r"data\model_input.csv")
dataset = df.drop(columns=["Unnamed: 0", "Min Temp", "Max Temp", "percentile_25", "percentile_75"])
dataset

Unnamed: 0,Mean Temp,RAINFALL,month_year,median
0,22.178833,0.085958,2013-04,0.455720
1,26.819516,0.413366,2013-05,0.665620
2,25.359833,31.586529,2013-06,0.226039
3,25.052258,20.653476,2013-07,0.249820
4,24.369032,24.446940,2013-08,0.267682
...,...,...,...,...
112,25.292838,13.266669,2022-08,0.238900
113,23.823002,11.473897,2022-09,0.244405
114,20.420344,3.875470,2022-10,0.419238
115,16.548313,0.000000,2022-11,0.518314


In [3]:
dataset['month_year'] = pd.to_datetime(dataset['month_year'])
dataset['year'] = dataset['month_year'].dt.year
dataset['month'] = dataset['month_year'].dt.month
reg_dataset = dataset.drop(columns=["month_year"])
reg_dataset

Unnamed: 0,Mean Temp,RAINFALL,median,year,month
0,22.178833,0.085958,0.455720,2013,4
1,26.819516,0.413366,0.665620,2013,5
2,25.359833,31.586529,0.226039,2013,6
3,25.052258,20.653476,0.249820,2013,7
4,24.369032,24.446940,0.267682,2013,8
...,...,...,...,...,...
112,25.292838,13.266669,0.238900,2022,8
113,23.823002,11.473897,0.244405,2022,9
114,20.420344,3.875470,0.419238,2022,10
115,16.548313,0.000000,0.518314,2022,11


In [4]:
reg_dataset['timeindex'] = (reg_dataset['year'] - 2013)*12 + reg_dataset['month'] - 3
reg_dataset.drop(columns = ['year', 'month'], inplace=True)

In [5]:
median = reg_dataset['median']
reg_dataset.drop(columns='median', inplace=True)
reg_dataset['Median NDSI'] = median
reg_dataset

Unnamed: 0,Mean Temp,RAINFALL,timeindex,Median NDSI
0,22.178833,0.085958,1,0.455720
1,26.819516,0.413366,2,0.665620
2,25.359833,31.586529,3,0.226039
3,25.052258,20.653476,4,0.249820
4,24.369032,24.446940,5,0.267682
...,...,...,...,...
112,25.292838,13.266669,113,0.238900
113,23.823002,11.473897,114,0.244405
114,20.420344,3.875470,115,0.419238
115,16.548313,0.000000,116,0.518314


In [6]:
def multipleLR(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)

    print("RMSE: ", rmse)
    print("R2: ", r2)

In [7]:
def polynomialRegression(X, y, degree):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
    poly = PolynomialFeatures(degree=degree)
    X_poly_train = poly.fit_transform(X_train)
    X_poly_test = poly.transform(X_test)
    poly_regressor = LinearRegression()
    poly_regressor.fit(X_poly_train, y_train)
    y_pred = poly_regressor.predict(X_poly_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Polynomial Degree: {degree}")
    print("RMSE: ", rmse)
    print("R2: ", r2)

In [8]:
def DTRegression(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)
    regressor = DecisionTreeRegressor(random_state = 0)
    regressor.fit(X_train,y_train)
    y_pred = regressor.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)

    print("RMSE: ", rmse)
    print("R2: ", r2)

In [9]:
def RFRegression(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)
    regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
    regressor.fit(X_train,y_train)
    y_pred = regressor.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)

    print("RMSE: ", rmse)
    print("R2: ", r2)

In [10]:
X = reg_dataset.iloc[:, :-1].values
y = reg_dataset.iloc[:, -1].values
print(X)
print(y)

[[2.21788334e+01 8.59575709e-02 1.00000000e+00]
 [2.68195160e+01 4.13366435e-01 2.00000000e+00]
 [2.53598333e+01 3.15865286e+01 3.00000000e+00]
 [2.50522580e+01 2.06534755e+01 4.00000000e+00]
 [2.43690322e+01 2.44469404e+01 5.00000000e+00]
 [2.39613334e+01 7.31920172e+00 6.00000000e+00]
 [2.10993548e+01 1.03277233e+00 7.00000000e+00]
 [1.57763332e+01 4.08671377e-01 8.00000000e+00]
 [1.22835484e+01 2.27593876e-01 9.00000000e+00]
 [1.01951563e+01 3.34616596e+00 1.00000000e+01]
 [1.14715847e+01 3.92721932e+00 1.10000000e+01]
 [1.62364870e+01 2.17313609e+00 1.20000000e+01]
 [2.12365510e+01 8.25622966e-01 1.30000000e+01]
 [2.47275136e+01 1.08771570e+00 1.40000000e+01]
 [2.78287476e+01 2.76074192e+00 1.50000000e+01]
 [2.54392737e+01 1.50794725e+01 1.60000000e+01]
 [2.54883951e+01 1.95885474e+01 1.70000000e+01]
 [2.37061987e+01 4.22894520e+00 1.80000000e+01]
 [2.06250943e+01 2.80761354e+00 1.90000000e+01]
 [1.66046698e+01 0.00000000e+00 2.00000000e+01]
 [1.14790482e+01 9.41396663e-01 2.100000

In [11]:
multipleLR(X,y)

RMSE:  0.2924386847297578
R2:  0.12283038574886185


In [12]:
polynomialRegression(X,y,2)

Polynomial Degree: 2
RMSE:  0.2895762963690349
R2:  0.13991781210790655


In [13]:
DTRegression(X,y)

RMSE:  0.4336580046190202
R2:  -0.928895364647701


In [14]:
RFRegression(X,y)

RMSE:  0.30354800144396643
R2:  0.05491974292827162
