## Measuring Regression Performance

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns

In [4]:
df = pd.read_excel('ENB2012_data.xlsx')
df.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y1,Y2,Unnamed: 10,Unnamed: 11
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33,,
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33,,
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33,,
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33,,
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28,,


In [6]:
column_names = {'X1':'Relative_Compactness', 'X2': 'Surface_Area',
'X3': 'Wall_Area', 'X4': 'Roof_Area', 'X5': 'Overall_Height',
'X6': 'Orientation', 'X7': 'Glazing_Area',
'X8': 'Glazing_Area_Distribution',
'Y1': 'Heating_Load', 'Y2': 'Cooling_Load'}

In [12]:
df = df.rename(columns=column_names)
df.drop(columns=['Unnamed: 10', 'Unnamed: 11'], inplace=True)

### Mean Absolute Error (MAE) 


In [13]:

#Firstly, we normalise our dataset to a common scale using the min max scaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
# The fit_transform method is applied to df and the resulting normalized values are stored in a new pandas DataFrame normalised_df.
#The columns of the new DataFrame are set to the columns of the original DataFrame (df.columns).
normalised_df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
features_df = normalised_df.drop(columns=['Heating_Load', 'Cooling_Load'])
#The column 'Heating_Load' of normalised_df is stored in the variable heating_target
heating_target = normalised_df['Heating_Load']

In [14]:
#Now, we split our dataset into the training and testing dataset. Recall that we
#had earlier segmented the features and target variables.
# random_state is set to 1, which means that the same random sample will be used each time the code is run,
#making the results reproducible.
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features_df, heating_target,
test_size=0.3, random_state=1)


In [15]:
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
#fit the model to the training dataset
linear_model.fit(x_train, y_train)
#obtain predictions
predicted_values = linear_model.predict(x_test)
#MAE
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, predicted_values)
round(mae, 3)

0.063

### Residual Sum of Squares (RSS) 


In [16]:
rss = np.sum(np.square(y_test - predicted_values))
round(rss, 3)

1.817

### Root Mean Square Error (RMSE)

In [17]:
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, predicted_values))
round(rmse, 3) 

0.089

### R-Squared 

In [18]:
from sklearn.metrics import r2_score
r2_score = r2_score(y_test, predicted_values)
round(r2_score, 3)

0.894