In [1]:
import pandas as pd
import numpy as np
from numpy import mean
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import metrics
import matplotlib.pyplot as plt

from sklearn.metrics import r2_score

from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
import time
import datetime

In [None]:
def date_2_year(date):
    return(date.year)

def datetojd(stddate): # Date to Julian day
    sdtdate = stddate.timetuple()
    jdate = sdtdate.tm_yday
    return(jdate)

In [None]:
# import datasets

# Define the working directory path here
wd = "/path/to/working/directory"
dataset = pd.read_csv(wd+"Pusa_data.csv")
dataset['Date'] = pd.to_datetime(dataset['Date'])
dataset.insert(loc= 1, column= "Year", value= dataset['Date'].dt.year)
dataset.insert(loc= 2, column= "Jday", value= dataset['Date'].apply(datetojd))

# dropping the rows having NaN values
dataset = dataset.dropna()

# To reset the indices
dataset = dataset.reset_index(drop = True)

# Spliting Train and test set
train = dataset[dataset["Year"] < 2015] # Trainig set from 2010 to 2014
test = dataset[dataset["Year"] >= 2015] # Test set from 2015 to 2017

X_train = train[['T_min','T_max','T_mean','Ra', 'Rs']].values
X_test = test[['T_min','T_max','T_mean','Ra', 'Rs']].values
y_train = train[['ETo']].values
y_test = test[['ETo']].values

sc_X = MinMaxScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

######################################################################################################
# GBR

param_grid = {
    'n_estimators' : [100,200,300,400,500, 1000, 1500, 2000],
    'learning_rate' : [0.01, 0.02, 0.03, 0.04, 0.05],
    'max_depth' : [3, 5, 7, 9],
    'min_samples_split' : [2, 4, 6, 8, 10]} # [5, 10,20, 25,30,40,50]


regressor = GridSearchCV(GradientBoostingRegressor(random_state=123), param_grid, n_jobs=5, scoring='neg_mean_absolute_error', cv = 5, verbose=True)
regressor.fit(X_train, y_train.ravel())

# test result
y_pred = regressor.predict(X_test)

# train result
y_pred_train = regressor.predict(X_train)

# Preparing Observed and Predicted Test dataset
y_pred = pd.DataFrame({"Predicted": y_pred})
y_test = pd.DataFrame({"Observed": y_test[:,0]})

# Preparing Observed and Predicted Train dataset
y_pred_train = pd.DataFrame({"Predicted_Train": y_pred_train})
y_train = pd.DataFrame({"Observed_Train": y_train[:,0]})

# Test Plot
r_squared = r2_score(y_test, y_pred)

plt.scatter(y_test, y_pred, 
            c ="purple", 
            linewidths = 0.5, 
            marker ="o", 
            edgecolor ="black", 
            s = 50)
plt.xlabel('Observed PET')
plt.ylabel('Predicted PET')
plt.text(0.7*y_test.max(), 1.1*y_pred.min(), 'R-sq = %0.3f' % r_squared)
fig1 = plt.gcf()
fig1.set_size_inches(4.5, 4.5)
plt.show()
plt.draw()

# Train plot
r_squared = r2_score(y_train, y_pred_train)

plt.scatter(y_train, y_pred_train, 
            c ="forestgreen", 
            linewidths = 0.5, 
            marker ="o", 
            edgecolor ="black", 
            s = 50)
plt.xlabel('Observed PET')
plt.ylabel('Predicted PET')
plt.text(0.7*y_test.max(), 1.1*y_pred.min(), 'R-sq = %0.3f' % r_squared)
fig1 = plt.gcf()
fig1.set_size_inches(4.5, 4.5)
plt.show()
plt.draw()

# Export Observed and Predicted dataset
df1 = pd.concat([y_test, y_pred], axis=1)
df2 = pd.concat([y_train, y_pred_train], axis=1)
df1.to_csv(wd+"Radiation_based_GBR_Test.csv", index = False, header=True)
df2.to_csv(wd+"Radiation_based_GBR_Train.csv", index = False, header=True)

# Best parameter
print(regressor.best_params_)