In [3]:
# -*- coding: utf-8 -*-
"""
Created on Sat Sep  8 10:32:13 2018

@author: sarveswara rao
"""

import pandas as pd
import numpy as np

from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import f_regression, SelectKBest
from sklearn.preprocessing import StandardScaler

import seaborn as sn
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [4]:
def mse(y_test, y_pred):
   # print("\nMSE: %.3f\n" % mean_squared_error(y_test, y_pred))
    return mean_squared_error(y_test, y_pred)

def rmse(y_test, y_pred):
    print("RMSE: %.3f\n" % np.sqrt(mean_squared_error(y_test, y_pred)))  

def mae(y_test, y_pred):
    error = (1/y_test.shape[0])*np.sum(np.absolute(y_test - y_pred), axis = 0)
    print("MAE: %.3f\n" % error)  

def mape(y_test, y_pred):
    error = (100/y_test.shape[0])*np.sum(np.absolute(y_test - y_pred)/y_test, axis = 0)
    print("MAPE: %.3f\n" % error)  

def smape(y_test, y_pred):
    error = (100/y_test.shape[0])*np.sum(np.absolute(y_test - y_pred)/((y_test+y_pred)/2), axis = 0)
    print("SMAPE: %.3f\n" % error)  

def r2(y_test, y_pred):
    print("R2: %.3f\n" % r2_score(y_test,y_pred))

def ridge_regression(X, y):
    
    test = SelectKBest(score_func = f_regression, k=6).fit(X, y)
    X_new = test.transform(X)
    scaler = StandardScaler()
    X_std = scaler.fit_transform(X_new)
    
    # Changing the ransom_state = different value will changes the results okay.....
    X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size = .3, random_state = 10)

    reg  = linear_model.RidgeCV(
                    alphas = [10, 5, 2, 3, 1, 0.1, 0.3, 0.6, 0.9, 0.01, 0.05],
                    cv = 5)
    reg.fit(X_train, y_train)

    print("Best alpha: {}\n".format(reg.alpha_))
    
    print("R2: %.3f" % reg.score(X_test, y_test))
    print("MSE: %.3f" % mse(y_test ,reg.predict(X_test)))
    

In [5]:

# Load the diabetes dataset
dataset = pd.read_excel('../dataset/energy-efficiency-dataset.xlsx')

# taking the first 8 columns as features
X = pd.DataFrame(data=dataset.iloc[:, 0:8])

# Y1 corresponds to heating load 
y1 = pd.DataFrame(data=dataset.iloc[:, 8])
y2 = pd.DataFrame(data=dataset.iloc[:, 9])

y1 = np.ravel(y1)
y2 = np.ravel(y2)

print("\n----------------For Heating Load----------------\n")
ridge_regression(X, y1)

print("\n----------------For Cooling Load----------------\n")
ridge_regression(X, y2)
print("\n------------------------------------------------\n")



----------------For Heating Load----------------

Best alpha: 0.3

R2: 0.921
MSE: 8.056

----------------For Cooling Load----------------

Best alpha: 0.3

R2: 0.901
MSE: 9.073

------------------------------------------------

