In [17]:
import numpy as np
import pandas as pd
import math
from numpy.linalg import inv
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
from sklearn.linear_model import RidgeCV
import joblib
import datetime   







In [18]:
#Reading the train data from the csv file
df=pd.read_csv("train.csv")
#Converting the dataframe into numpy aray
a=np.array(df)


In [19]:
#Fetching the date_time feature for preprocessing
date_time=df['date_time']
date=[]
time=[]

#Splitting the date column, saving date values into a date array and time to a time array
for i in date_time:
    m=i.split(" ")
    date.append(m[0])
    time.append(m[1])

    

timea=[]
times=[]
datea=[]


#Splitting the time
for i in time:
   m=i.split(":")
   times.append(int(m[0]))


#performing onehot encoding for differentiating between different range of time
for i in times:
    if(8<=i<=20):
        timea.append(1)
    else:
        timea.append(0)


#performing onehot encoding for differentiating between weekdays and weekends
for i in date:
    date1=str(i)
    #print(date1)
    day_name= ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Sunday']
    day = datetime.datetime.strptime(date1, '%Y-%m-%d').weekday()
    if(day_name[day] in ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']):
       datea.append(1)
    else:
       datea.append(0)
    
datea=np.array(datea)
datea=datea.reshape(7111,1)
timea=np.array(timea)
timea=timea.reshape(7111,1)
X=a[: , 1:9] 
Y=a[:,9:]

#Adding the two new features to the dataset
X=np.append(timea,X,axis=1)
X=np.append(datea,X,axis=1)


#Splitting the data for training and testing
X_train, X_test, Y_train, Y_test=train_test_split(X, Y, test_size=0.2,random_state=1)


#splitting the data for the three target varaibles separately
y_carbon=Y_train[:,:1]
y_benzene=Y_train[:,1:2]
y_nitrogen=Y_train[:,2:3]
ytest_carbon=Y_test[:,:1]
ytest_benzene=Y_test[:,1:2]
ytest_nitrogen=Y_test[:,2:3]


#Function for converting negative predicted values to 1 for calculating RMSLE error
def prediction(predictions):
    n = len(predictions)
    for i in range(n):
        if(predictions[i]<0):
            predictions[i]=1
    return predictions

In [10]:

#Training different models for the data
degree=3
regr = LinearRegression()

polyreg=make_pipeline(PolynomialFeatures(degree),LinearRegression())
poly_carb=polyreg.fit(X_train,y_carbon)
poly_benzene=polyreg.fit(X_train,y_benzene)
poly_nitrogen=polyreg.fit(X_train,y_nitrogen)
model=regr.fit(X_train,y_carbon)
model1=regr.fit(X_train,y_benzene)
model2 = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X_train,y_nitrogen)


In [4]:

#Linear Regression

regr = LinearRegression()
print("Linear Regression")

model=regr.fit(X_train,y_carbon)

#Prediction of carbon-monoxide
predictions=model.predict(X_test)

#calculating different error metrics for carbon

print("Crabon-Monoxide: MAE Error: ",mean_absolute_error( ytest_carbon, predictions ))
print("Carbon-Monoxide: RMSLE Error: ",np.sqrt(mean_squared_log_error( ytest_carbon, prediction(predictions) )) )
print("Crabon-Monoxide: RMSE Error: ",np.sqrt(mean_squared_error( ytest_carbon, predictions )))
print("Crabon-Monoxide: MAPE Error: ",mean_absolute_percentage_error( ytest_carbon, predictions ))
print("Crabon-Monoxide: R-square: ",r2_score( ytest_carbon, predictions ))


#Prediction of benzene
model1=regr.fit(X_train,y_benzene)
predictions1=model1.predict(X_test)


#calculating different error metrics for benzene
print("Benzene: MAE Error: ",mean_absolute_error( ytest_benzene, predictions1))
print("Benzene: RMSLE Error: ",np.sqrt(mean_squared_log_error( ytest_benzene, prediction(predictions1) )))
print("Benzene: RMSE Error: ",np.sqrt(mean_squared_error( ytest_benzene, predictions1)))
print("Benzene: MAPE Error: ",mean_absolute_percentage_error( ytest_benzene, predictions1 ))
print("Benzene: R-square: ",r2_score( ytest_benzene, predictions1  ))


#Prediction of nitogen using ridgeregression
model2 = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X_train,y_nitrogen)
predictions2=model2.predict(X_test)

#calculating different error metrics for nitogen
print("Nitrogen-Oxide: MAE Error: ",mean_absolute_error(ytest_nitrogen, predictions2 ))
print("Nitrogen-Oxide: RMSLE Error: ",np.sqrt(mean_squared_log_error( ytest_nitrogen, prediction(predictions2) )))
print("Nitrogen-Oxide: RMSE Error: ",np.sqrt(mean_squared_error( ytest_nitrogen, predictions2 )))
print("Nitrogen-Oxide: MAPE Error: ",mean_absolute_percentage_error( ytest_nitrogen, predictions2 ))
print("Nitrogen-Oxide: R-square: ",r2_score( ytest_nitrogen, predictions2  ))





Linear Regression
Crabon-Monoxide: MAE Error:  0.37339479984478546
Carbon-Monoxide: RMSLE Error:  0.17913654943017446
Crabon-Monoxide: RMSE Error:  0.5696438319919122
Crabon-Monoxide: MAPE Error:  0.2616004290480654
Crabon-Monoxide: R-square:  0.8388906059441648
Benzene: MAE Error:  1.2106383224766228
Benzene: RMSLE Error:  0.22703513106907655
Benzene: RMSE Error:  1.5977933563981848
Benzene: MAPE Error:  0.41650379874271254
Benzene: R-square:  0.9561323734759828
Nitrogen-Oxide: MAE Error:  67.32886799765119
Nitrogen-Oxide: RMSLE Error:  0.9513804132459138
Nitrogen-Oxide: RMSE Error:  100.34884093967258
Nitrogen-Oxide: MAPE Error:  0.47305345066247606
Nitrogen-Oxide: R-square:  0.711598483598191


In [3]:
#Polynomial Regreesion
print("Polynomial Regression")

#Performing polynomial regression for different degree range(2,6)
#Caculating different error metrics for all the gases for different degrees

for i in range(2,6):
    print("Errors for degree:",i)
    degree=i
    polyreg=make_pipeline(PolynomialFeatures(degree),LinearRegression())
    poly_carb=polyreg.fit(X_train,y_carbon)
    predictions=poly_carb.predict(X_test)
    print("Crabon-Monoxide: MAE Error: ",mean_absolute_error( ytest_carbon, predictions ))
    print("Carbon-Monoxide: RMSLE Error: ",np.sqrt(mean_squared_log_error( ytest_carbon, prediction(predictions) )) )
    print("Crabon-Monoxide: RMSE Error: ",np.sqrt(mean_squared_error( ytest_carbon, predictions )))
    print("Crabon-Monoxide: MAPE Error: ",mean_absolute_percentage_error( ytest_carbon, predictions ))
    print("Crabon-Monoxide: R-square: ",r2_score( ytest_carbon, predictions ))
    poly_benzene=polyreg.fit(X_train,y_benzene)
    predictions1=poly_benzene.predict(X_test)
    print("Benzene: MAE Error: ",mean_absolute_error( ytest_benzene, predictions1))
    print("Benzene: RMSLE Error: ",np.sqrt(mean_squared_log_error( ytest_benzene, prediction(predictions1) )))
    print("Benzene: RMSE Error: ",np.sqrt(mean_squared_error( ytest_benzene, predictions1)))
    print("Benzene: MAPE Error: ",mean_absolute_percentage_error( ytest_benzene, predictions1 ))
    print("Benzene: R-square: ",r2_score( ytest_benzene, predictions1  ))
    poly_nitrogen=polyreg.fit(X_train,y_nitrogen)
    predictions2=poly_nitrogen.predict(X_test)
    print("Nitrogen-Oxide: MAE Error: ",mean_absolute_error(ytest_nitrogen, predictions2 ))
    print("Nitrogen-Oxide: RMSLE Error: ",np.sqrt(mean_squared_log_error( ytest_nitrogen, prediction(predictions2) )))
    print("Nitrogen-Oxide: RMSE Error: ",np.sqrt(mean_squared_error( ytest_nitrogen, predictions2 )))
    print("Nitrogen-Oxide: MAPE Error: ",mean_absolute_percentage_error( ytest_nitrogen, predictions2 ))
    print("Nitrogen-Oxide: R-square: ",r2_score( ytest_nitrogen, predictions2  ))




Polynomial Regression
MAE errors
Errors for degree: 2
Crabon-Monoxide: MAE Error:  0.2945435604195074
Carbon-Monoxide: RMSLE Error:  0.1393776270193372
Crabon-Monoxide: RMSE Error:  0.45688058267888504
Crabon-Monoxide: MAPE Error:  0.20195740284036703
Crabon-Monoxide: R-square:  0.8963618939620921
Benzene: MAE Error:  0.7704547879617061
Benzene: RMSLE Error:  0.11226549533736074
Benzene: RMSE Error:  1.0994286887454177
Benzene: MAPE Error:  0.2039664649423604
Benzene: R-square:  0.9792299737218823
Nitrogen-Oxide: MAE Error:  51.43266489532495
Nitrogen-Oxide: RMSLE Error:  0.5064166623766195
Nitrogen-Oxide: RMSE Error:  81.76792204906835
Nitrogen-Oxide: MAPE Error:  0.36839582771413715
Nitrogen-Oxide: R-square:  0.8085132444139643
Errors for degree: 3
Crabon-Monoxide: MAE Error:  0.28461555589674875
Carbon-Monoxide: RMSLE Error:  0.13270027934570833
Crabon-Monoxide: RMSE Error:  0.4361396431116706
Crabon-Monoxide: MAPE Error:  0.190334742607961
Crabon-Monoxide: R-square:  0.905557996117

In [20]:
#Linear Model Load 
joblib.dump(model, "./carbon.joblib")
joblib.dump(model1, "./benz.joblib")
joblib.dump(model2, "./nitrogen.joblib")

#Polynomial Model Load
joblib.dump(poly_carb, "./carbonmono.joblib")
joblib.dump(poly_benzene, "./benzene.joblib")
joblib.dump(poly_nitrogen, "./nitrogenoxide.joblib")



['./nitrogenoxide.joblib']

In [21]:
#Linear Prediction training the models
predict_carbon=model.predict(X_test)
predict_benzene=model1.predict(X_test)
predict_nitrogenoxide=model2.predict(X_test)

#predicted values of gases using linear model
print(" Predicted Crabon-monoxide",predict_carbon)
print(" Predicted Benzene",predict_benzene)
print(" Predicted Nitogen-oxide",predict_nitrogenoxide)


#For Polynomial Prediction training the models
poly_carbonmono=poly_carb.predict(X_test)
poly_ben=poly_benzene.predict(X_test)
poly_nitrogenoxide=poly_nitrogen.predict(X_test)

#predicted values of gases using polynomial model
print(" Predicted Crabon-monoxide",poly_carbonmono)
print(" Predicted Benzene",poly_ben)
print(" Predicted Nitogen-oxide",poly_nitrogenoxide)






 Predicted Crabon-monoxide [[-1.15140786]
 [ 3.79536659]
 [12.02919406]
 ...
 [17.45738965]
 [ 5.7204887 ]
 [ 6.74384575]]
 Predicted Benzene [[-1.15140786]
 [ 3.79536659]
 [12.02919406]
 ...
 [17.45738965]
 [ 5.7204887 ]
 [ 6.74384575]]
 Predicted Nitogen-oxide [[272.26883564]
 [ 62.41167378]
 [126.10938542]
 ...
 [237.43396855]
 [  6.37108341]
 [ 76.55785299]]
 Predicted Crabon-monoxide [[480.09381917]
 [  7.36393949]
 [ 67.47439515]
 ...
 [225.65550356]
 [ 53.25415419]
 [ 96.51063563]]
 Predicted Benzene [[480.09381917]
 [  7.36393949]
 [ 67.47439515]
 ...
 [225.65550356]
 [ 53.25415419]
 [ 96.51063563]]
 Predicted Nitogen-oxide [[480.09381917]
 [  7.36393949]
 [ 67.47439515]
 ...
 [225.65550356]
 [ 53.25415419]
 [ 96.51063563]]
