In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
import math

In [12]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [13]:
nocluster_df = pd.read_csv('clustering_dataset.csv',low_memory=False)

## Linear Regression

In [14]:
df = nocluster_df[nocluster_df['int_rate'].notnull()]
x_set = df.drop(['int_rate'], axis=1)
y_set = df['int_rate']
x_train, x_test, y_train, y_test = train_test_split(x_set, y_set, random_state=0)
linearRegr = LinearRegression()
linearRegr.fit(x_train, y_train)
lr_predictions = linearRegr.predict(x_test)
print("MAE : ", mean_absolute_error(y_test, lr_predictions))
print("RMSE : ", math.sqrt(mean_squared_error(y_test, lr_predictions)))
print("MAPE : ", mean_absolute_percentage_error(y_test, lr_predictions))
print("Training score : ", linearRegr.score(x_train, y_train))
print("Testing score : ", linearRegr.score(x_test, y_test))

MAE :  0.618970479787
RMSE :  0.8518170072798339
MAPE :  4.78651760599
Training score :  0.960154068593
Testing score :  0.96008086952


## Random Forest

In [15]:
df = nocluster_df[nocluster_df['int_rate'].notnull()]
x_set = df.drop(['int_rate'], axis=1)
y_set = df['int_rate']
x_train, x_test, y_train, y_test = train_test_split(x_set, y_set, random_state=0)
x_train = StandardScaler().fit_transform(x_train)
x_test = StandardScaler().fit_transform(x_test)

rfr = RandomForestRegressor(n_jobs = 2)
rfr.fit(x_train, y_train)
rfr_predictions = rfr.predict(x_test)
print("MAE : ", mean_absolute_error(y_test, rfr_predictions))
print("RMSE : ", math.sqrt(mean_squared_error(y_test, rfr_predictions)))
print("MAPE : ", mean_absolute_percentage_error(y_test, rfr_predictions))

MAE :  0.645193359153
RMSE :  0.9014211504800198
MAPE :  4.98595689646


## Neural Network

In [None]:
df = nocluster_df[nocluster_df['int_rate'].notnull()]
x_set = df.drop(['int_rate'], axis=1)
y_set = df['int_rate']
x_train, x_test, y_train, y_test = train_test_split(x_set, y_set, random_state=0)
x_train = StandardScaler().fit_transform(x_train)
x_test = StandardScaler().fit_transform(x_test)

mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
                               max_iter=150, shuffle=True, random_state=1)
mlp.fit(x_train, y_train)
mlp_predictions = mlp.predict(x_test)
print("MAE : ", mean_absolute_error(y_test, mlp_predictions))
print("RMSE : ", math.sqrt(mean_squared_error(y_test, mlp_predictions)))
print("MAPE : ", mean_absolute_percentage_error(y_test, mlp_predictions))
print("Training score : ", mlp.score(x_train, y_train))
print("Testing score : ", mlp.score(x_test, y_test))

MAE :  0.61396271141
RMSE :  0.8463640001851587
MAPE :  4.74632735112
Training score :  0.960984000172
Testing score :  0.960590327655


## KNN

In [None]:
df = nocluster_df[nocluster_df['int_rate'].notnull()]
x_set = df.drop(['int_rate'], axis=1)
y_set = df['int_rate']
x_train, x_test, y_train, y_test = train_test_split(x_set, y_set, random_state=0)
x_train = StandardScaler().fit_transform(x_train)
x_test = StandardScaler().fit_transform(x_test)
    
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(x_train,y_train.values.ravel())
knn_predictions = knn.predict(x_test)
    
print("MAE : ", mean_absolute_error(y_test, knn_predictions))
print("RMSE : ", math.sqrt(mean_squared_error(y_test, knn_predictions)))
print("MAPE : ", mean_absolute_percentage_error(y_test, knn_predictions))
print("Training score : ", knn.score(x_train, y_train))
print("Testing score : ", knn.score(x_test, y_test))