In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.metrics import mean_absolute_error,mean_squared_error,accuracy_score
import matplotlib.pyplot as plt
from math import sqrt
import seaborn as sns

from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

In [2]:
def calculateR2(y_test,y_pred):
    v = ((y_test - y_test.mean())**2).sum()
    u = ((y_test - y_pred)**2).sum()
    return 1-u/v

def calculateMetric(y_pred,y_test):
    print("MAE=",mean_absolute_error(y_test,y_pred))
    print("RMSE=",sqrt(mean_squared_error(y_test,y_pred)))
    print("R2 Score",calculateR2(y_test,y_pred))

def AdaBoostRegression(y_train,y_test,X_train_scaled,X_test_scaled,X_train,X_test): 
  # svr = SVR(kernel="linear")
    rgr = AdaBoostRegressor(n_estimators=100,learning_rate=0.1)
    rgr.fit(X_train_scaled,y_train)
    print("Training Data");
    y_pred = rgr.predict(X_train_scaled)
    accuracy(y_pred,X_train)
    calculateMetric(y_pred,y_train)
    y_pred = rgr.predict(X_test_scaled)
    print("Testing Data");
    accuracy(y_pred,X_test)
    calculateMetric(y_pred,y_test)
    return rgr


def accuracy(y_pred,data):
    correct=0
    i=0
    for index,row in data.iterrows():
        if(y_pred[i]>=row['block_min'] and y_pred[i]<=row['past_max']):
            correct+=1
        i+=1
    acc = correct/data.shape[0]
    print("Accuracy", acc)
    return acc 

In [3]:
data = pd.read_csv("../Data/unscaled.csv")
del data['Unnamed: 0']
scaler = StandardScaler()
y = data['block_min']
X = data
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)
X_train_scaled = X_train.copy()
X_train_scaled = X_train_scaled.drop(['block_min'], axis=1)
X_test_scaled = X_test.copy()
X_test_scaled = X_test_scaled.drop(['block_min'], axis=1)
X_train_scaled[X_train_scaled.columns[:]] = scaler.fit_transform(X_train_scaled[X_train_scaled.columns[:]])
X_test_scaled[X_test_scaled.columns[:]] = scaler.fit_transform(X_test_scaled[X_test_scaled.columns[:]])

In [4]:
adaBoost = AdaBoostRegression(y_train,y_test,X_train_scaled,X_test_scaled,X_train,X_test)

Training Data
Accuracy 0.570738146739812
MAE= 3641920109.331841
RMSE= 4481166807.194535
R2 Score 0.6554527945910209
Testing Data
Accuracy 0.5713044501362102
MAE= 3662863017.4820356
RMSE= 4699540548.529071
R2 Score 0.6048365853260302


## Saving the model

In [5]:
import pickle
fileName = "../weights/ADABoost.sav"
pickle.dump(adaBoost,open(fileName,"wb"))

In [7]:
loaded_model = pickle.load(open(fileName, 'rb'))
y_pred = loaded_model.predict(X_test_scaled)

In [8]:
print("Testing Data");
accuracy(y_pred,X_test)
calculateMetric(y_pred,y_test)

Testing Data
Accuracy 0.5713044501362102
MAE= 3662863017.4820356
RMSE= 4699540548.529071
R2 Score 0.6048365853260302
