#  Importing libraries

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import seaborn as sns

# Loading Data


In [11]:
dataset = pd.read_csv('load_data.csv1.csv')
dataset = dataset.rename(columns={
    'X1':'L(t-3)',
    'X2':'L(t-2)',
    'X3':'L(t-1)',
    'Y' :'L(t)'   
})
dataset.tail()

Unnamed: 0,L(t-3),L(t-2),L(t-1),L(t)
2174,5949.9396,5658.444,5315.508,5294.62008
2175,5658.444,5315.508,5294.62008,5045.52384
2176,5315.508,5294.62008,5045.52384,4626.5184
2177,5294.62008,5045.52384,4626.5184,4567.90752
2178,5045.52384,4626.5184,4567.90752,4723.63164


In [12]:
dataset.isnull().sum()

L(t-3)    0
L(t-2)    0
L(t-1)    0
L(t)      0
dtype: int64

# Detecting Outliers

In [14]:
outliers = []
def detect_outliers_zscore(dataset):
    thres = 3
    mean = np.mean(dataset)
    std = np.std(dataset)
    # print(mean, std)
    for i in dataset:
        z_score = (i-mean)/std
        if (np.abs(z_score) > thres):
            outliers.append(i)
    return outliers

sample_outliers_X1 = detect_outliers_zscore(dataset['L(t-3)'])
sample_outliers_X2 = detect_outliers_zscore(dataset['L(t-2)'])
sample_outliers_X3 = detect_outliers_zscore(dataset['L(t-1)'])
sample_outliers_Y = detect_outliers_zscore(dataset['L(t)'])

print(f"Outliers in '{dataset.columns[0]}' column from Z-scores method: {len(sample_outliers_X1)}")
print(f"Outliers in '{dataset.columns[1]}' column from Z-scores method: {len(sample_outliers_X2)}")
print(f"Outliers in '{dataset.columns[2]}' column from Z-scores method: {len(sample_outliers_X3)}")

print(f"Outliers in '{dataset.columns[3]}' column from Z-scores method: {len(sample_outliers_Y)}")

Outliers in 'L(t-3)' column from Z-scores method: 0
Outliers in 'L(t-2)' column from Z-scores method: 0
Outliers in 'L(t-1)' column from Z-scores method: 0
Outliers in 'L(t)' column from Z-scores method: 0


# Data split

In [15]:
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(dataset, train_size = 0.9, test_size = 0.1, random_state = 100)

# Data Normalising

In [17]:
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

scaler = MinMaxScaler()

# Applying scaler()
num_vars = ['L(t-3)', 'L(t-2)', 'L(t-1)', 'L(t)']
df_train[num_vars] = scaler.fit_transform(df_train[num_vars])
df_test[num_vars] = scaler.fit_transform(df_test[num_vars])

df_train



Unnamed: 0,L(t-3),L(t-2),L(t-1),L(t)
1383,0.435537,0.429973,0.349947,0.308408
1087,0.467062,0.445294,0.520955,0.467604
704,0.628770,0.919517,0.935209,0.935209
1800,0.404839,0.353970,0.322302,0.301275
775,0.206157,0.497275,0.507118,0.317994
...,...,...,...,...
350,0.734359,0.956264,0.960315,0.938347
1930,0.721320,0.594648,0.249465,0.331802
79,0.734359,0.698240,0.555476,0.564891
1859,0.753188,0.506519,0.297338,0.332801


In [18]:
# Dividing the training data set into X and Y
y_train = df_train.pop('L(t)')
X_train = df_train
y_test = df_test.pop('L(t)')
X_test = df_test

# Model Training

In [19]:
from sklearn.linear_model import LinearRegression
lreg = LinearRegression()
lreg.fit(X_train, y_train)

LinearRegression()

In [20]:
print(f"""
Weights are {lreg.coef_}
Bias is {lreg.intercept_}      
""")


Weights are [-0.21215469  0.02217096  0.87359063]
Bias is 0.1529435018870301      



# Error Metrics

In [21]:
import math
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import mean_absolute_error


y_pred_test =  lreg.predict(X_test)
y_pred_train =  lreg.predict(X_train)

#Training Accuracies
rmse = math.sqrt(mean_squared_error(y_train, y_pred_train)) 
print('Root mean square error is :', rmse) 
mse = (mean_squared_error(y_train, y_pred_train)) 
print('Mean square error is :', mse) 
mae=mean_absolute_error(y_train, y_pred_train)
print('Mean absolute error is :', mae)

Root mean square error is : 0.11862031708461862
Mean square error is : 0.014070779625255466
Mean absolute error is : 0.08204097934613061


In [22]:
# Testing Errors
rmse = math.sqrt(mean_squared_error(y_test, y_pred_test)) 
print('Root mean square error is :', rmse) 
mse = (mean_squared_error(y_test, y_pred_test)) 
print('Mean square error is :', mse) 
mae=mean_absolute_error(y_test, y_pred_test)
print('Mean absolute error is :', mae)

Root mean square error is : 0.10253704702246756
Mean square error is : 0.010513846012087723
Mean absolute error is : 0.07532215746484065


In [23]:
print("Training set score:",lreg.score(X_train,y_train))
print("Test set score:",lreg.score(X_test,y_test))

Training set score: 0.62844538741859
Test set score: 0.7493282151276978


# Deploying

In [24]:
import pickle
# Save the model
filename = 'mlrmodel.model'
pickle.dump(lreg, open(filename, 'wb'))

In [25]:
load1 = input("Enter load at t-3 hours")
load2 = input("Enter load at t-2 hours")
load3 = input("Enter load at t-1 hours")


out = lreg.predict([[load1, load2, load2]])
print(f"Predicted Load is {out}")

Enter load at t-3 hours222
Enter load at t-2 hours2222
Enter load at t-1 hours222
Predicted Load is [1943.43686955]
