In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.datasets import load_boston
from sklearn.datasets import load_diabetes
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [31]:
# dataset preparation
D=load_boston()
data_X=D.data
data_X=pd.DataFrame(data_X,columns=D.feature_names)
#print(data_X)
data_y=D.target
split_size=5

X_train, X_test, y_train, y_test =train_test_split(data_X, data_y, test_size=0.2,shuffle=True) 
X_train, X_test, y_train, y_test =np.asarray(X_train),np.asarray(X_test),np.asarray(y_train),np.asarray(y_test)
k_fold=KFold(n_splits=split_size,shuffle=True)

TRAIN_INDICES=[]
VALIDATION_INDICES=[]

for train_indices, validation_indices in k_fold.split(X_train):
    TRAIN_INDICES.append(train_indices)
    VALIDATION_INDICES.append(validation_indices)


In [32]:
# squared sum error function
def E(t,y):
    return np.sum((t-y)**2)/2

a=np.array([1,2,3])
b=np.array([4,4,4])
E(a,b)

7.0

In [37]:
# Linear Regression
LR=LinearRegression()

# Linear Regression has no hyper parameter. So no need for cross validation 


LR.fit(X_train,y_train)
    
train_pred=LR.predict(X_train)
train_error=E(y_train,train_pred)
train_score=LR.score(X_train,y_train)
    
test_pred=LR.predict(X_test)
test_error=E(y_test,test_pred)
test_score=LR.score(X_test,y_test)
    
print("train error: ",train_error)
print(" test error: ", test_error)
print()
print("train score: ",train_score)
print("test score: ",test_score)
print()

train error:  4647.407439958362
 test error:  931.8602678876899

train score:  0.7395171829330857
test score:  0.722705851012406



In [38]:
# Ridge Regression
RIDGE=Ridge(alpha=10)

# Cross validation is needed for hyper parameter alpha

train_error=0
validation_error=0

train_score=0
validation_score=0

print("Cross Validation for alpha")
for i in range(split_size):
    train=TRAIN_INDICES[i]
    valid=VALIDATION_INDICES[i]
    RIDGE.fit(X_train[train],y_train[train])
    
    train_pred=RIDGE.predict(X_train[train])
    train_error+=E(y_train[train],train_pred)
    train_score+=RIDGE.score(X_train[train],y_train[train])
    
    validation_pred=RIDGE.predict(X_train[valid])
    validation_error+=E(y_train[valid],validation_pred)
    validation_score+=RIDGE.score(X_train[valid],y_train[valid])
    
print("train error in CV: ",train_error/split_size)
print("train score in CV: ",train_score/split_size)
print()
print("validation error in CV: ", validation_error/split_size)
print("validation score in CV: ",validation_score/split_size)
print("End of CV")
print()

train_pred=RIDGE.predict(X_train)
train_error=E(y_train,train_pred)
train_score=RIDGE.score(X_train,y_train)
    
test_pred=RIDGE.predict(X_test)
test_error=E(y_test,test_pred)
test_score=RIDGE.score(X_test,y_test)

print("train error: ",train_error)
print("train score: ",train_score)
print()
print("test error: ",test_error)
print("test_score: ",test_score)

Cross Validation for alpha
train error in CV:  3855.9026265413113
train score in CV:  0.7299450491994953

validation error in CV:  1045.547591657963
validation score in CV:  0.7077458475808622
End of CV

train error:  4877.954503632479
train score:  0.7265952367968379

test error:  952.7108078726245
test_score:  0.71650134488601


In [40]:
# Neural Network Regression
MLPR=MLPRegressor(hidden_layer_sizes=(15,),solver='adam',max_iter=10000,alpha=0.1)

train_error=0
test_error=0

train_score=0
test_score=0

print("Cross Validation for alpha")
for i in range(split_size):
    train=TRAIN_INDICES[i]
    valid=VALIDATION_INDICES[i]
    MLPR.fit(X_train[train],y_train[train])
    
    train_pred=MLPR.predict(X_train[train])
    train_error+=E(y_train[train],train_pred)
    train_score+=MLPR.score(X_train[train],y_train[train])
    
    validation_pred=MLPR.predict(X_train[valid])
    validation_error+=E(y_train[valid],validation_pred)
    validation_score+=MLPR.score(X_train[valid],y_train[valid])
    
print("train error in CV: ",train_error/split_size)
print("train score in CV: ",train_score/split_size)
print()
print("validation error in CV: ", validation_error/split_size)
print("validation score in CV: ",validation_score/split_size)
print("End of CV")
print()

train_pred=MLPR.predict(X_train)
train_error=E(y_train,train_pred)
train_score=MLPR.score(X_train,y_train)
    
test_pred=MLPR.predict(X_test)
test_error=E(y_test,test_pred)
test_score=MLPR.score(X_test,y_test)

print("train error: ",train_error)
print("train score: ",train_score)
print()
print("test error: ",test_error)
print("test_score: ",test_score)


Cross Validation for alpha
train error in CV:  4073.852640304163
train score in CV:  0.7129624578279937

validation error in CV:  3211.5885971853754
validation score in CV:  2.1055108547740478
End of CV

train error:  5221.835691128675
train score:  0.7073210195880865

test error:  1048.4730633837685
test_score:  0.6880053202542402
