In [78]:
# Importing necessary libraries
import pandas as pd 
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler

# Reading the dataset
df = pd.read_excel('energydata_complete.xlsx') 

# Selecting features (independent variables) and target (dependent variable)
# Here, 'T2' is the feature and 'T6' is the target
# Reshaping x to make it a 2D array as required by scikit-learn
features = df['T2'].values.reshape(-1,1)
target = df['T6']

# Splitting the data into training and testing sets
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Creating a Linear Regression model
model = LinearRegression()

# Fitting the model on the training data
model.fit(features_train, target_train)

# Making predictions on the test data
target_pred = model.predict(features_test)

# Calculating evaluation metrics
rmse = np.sqrt(mean_squared_error(target_pred, target_test))
mae = mean_absolute_error(target_pred, target_test)
r2 = r2_score(target_pred, target_test)

# Printing the evaluation metrics
print(f'RMSE: {round(rmse,3)}')
print('MAE: ', mae)



RMSE: 3.633
MAE:  2.826851760546555


In [79]:
# Removing columns 'date' and 'lights' from the DataFrame as they might not be relevant for modeling
df.drop("date", axis=1, inplace=True)
df.drop("lights", axis=1, inplace=True)

# Scaling the features using MinMaxScaler to bring them within a specific range
scaler = MinMaxScaler()

# Separating features and target variable
features_df = df.drop("Appliances", axis=1)
target = df["Appliances"]

# Splitting the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features_df, target, test_size=0.3, random_state=42)

# Normalizing the features using MinMaxScaler
x_train_normalized = scaler.fit_transform(x_train)
x_test_normalized = scaler.transform(x_test)

# Creating a Linear Regression model
model_2 = LinearRegression()

# Fitting the model on the normalized training data
model_2.fit(x_train_normalized, y_train)


In [80]:
# Making predictions on the training data
y_pred_train = model_2.predict(x_train_normalized)

# Calculating mean absolute error for the training set
mae_train = mean_absolute_error(y_train, y_pred_train)

# Printing the mean absolute error for the training set
print(f"The mean absolute error for the training set is {round(mae_train,3)}")


The mean absolute error for the training set is 53.742


In [81]:
# tasks 19
# Calculating root mean squared error (RMSE) for the training set predictions
rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))

# Printing the RMSE for the training set
print(f"The root mean squared error for the training set is {round(rmse_train,3)}")



The root mean squared error for the training set is 95.216


In [82]:
# Making predictions on the normalized test data
y_pred_test = model_2.predict(x_test_normalized)

# Calculating mean absolute error for the test set
mae_test = mean_absolute_error(y_test, y_pred_test)

# Printing the mean absolute error for the test set
print(f"The mean absolute error for the test set is {round(mae_test,3)}")


The mean absolute error for the test set is 53.643


In [83]:
#task 21
# Calculating root mean squared error (RMSE) for the test set predictions
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred))

# Printing the RMSE for the test set
print(f"The root mean squared error for the test set is {round(rmse_test,3)}")


The root mean squared error for the test set is 93.892


In [84]:
#task 23
# Creating a Ridge Regression model
ridge_model = Ridge()

# Fitting the Ridge model on the normalized training data
ridge_model.fit(x_train_normalized, y_train)

# Making predictions on the normalized test data
y_pred_ridge = ridge_model.predict(x_test_normalized)

# Calculating root mean squared error (RMSE) for the test set predictions using Ridge regression
rmse_ridge = np.sqrt(mean_squared_error(y_test, y_pred_ridge))

# Printing the RMSE for the test set using Ridge regression
print(f"The root mean squared error for the test set using Ridge regression is {round(rmse_ridge,3)}")


The root mean squared error for the test set using Ridge regression is 93.709


In [85]:
#task 24
# Creating a Lasso Regression model
lasso_model = Lasso()

# Fitting the Lasso model on the training data
lasso_model.fit(x_train, y_train)

# Defining a function to get the weights of the features from the trained model
def get_weights_df(model, feat, col_name):  
    # Extracting feature weights and sorting them
    weights = pd.Series(model.coef_, index=feat.columns).sort_values()
    
    # Creating a DataFrame to store feature names and their corresponding weights
    weights_df = pd.DataFrame({'Features': weights.index, col_name: weights.values})
    
    # Rounding the weights to 3 decimal places
    weights_df[col_name] = weights_df[col_name].round(3)
    return weights_df

# Getting the weights of the features using the defined function
lasso_weights_df = get_weights_df(lasso_model, x_train, 'Lasso_weight')

# Printing the DataFrame containing feature names and their Lasso regression weights
print(lasso_weights_df)


       Features  Lasso_weight
0            T9       -16.392
1            T2       -10.757
2          RH_2       -10.147
3          RH_8        -5.458
4         T_out        -3.696
5          RH_7        -1.724
6            T5        -1.589
7          RH_9        -1.550
8        RH_out        -0.018
9            T1        -0.000
10    Tdewpoint         0.000
11           T7        -0.000
12         RH_4         0.000
13           T4        -0.000
14          rv2         0.000
15          rv1         0.024
16  Press_mm_hg         0.087
17   Visibility         0.177
18         RH_5         0.251
19         RH_6         0.304
20    Windspeed         2.192
21         RH_3         3.658
22           T6         4.817
23           T8         6.598
24         RH_1        14.840
25           T3        21.215


In [86]:
#task 25
# Making predictions on the test data using the trained Lasso regression model
y_pred_lasso = lasso_model.predict(x_test)

# Calculating root mean squared error (RMSE) for the test set predictions using Lasso regression
rmse_lasso = np.sqrt(mean_squared_error(y_test, y_pred_lasso))

# Printing the RMSE for the test set using Lasso regression
print(f"The root mean squared error for the test set using Lasso regression is {round(rmse_lasso, 3)}")


The root mean squared error for the test set using Lasso regression is 93.892
