## Ammonia concentration (ppm) prediction model development using Dataset-B2

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df=pd.read_excel('ammonia.xlsx')
df.head()

Unnamed: 0.1,Unnamed: 0,DOC,Salinity (ppt),SDR (cm),Water colour,Water level (cm),TSS (ml/L),Average PH,Average DO (ppm),Running aerator (HP),TDF (kg),ABW (gm),Water temperature (˚C),PH change,Ammonia (ppm),Water exchange (cm)
0,0,1,16,50,4,120,1.0,8.25,5.783333,56,1.7,0.008,29.1,0.3,0.026,0
1,1,2,16,48,5,120,2.0,8.25,5.6,31,2.4,0.022571,28.65,0.1,0.028,0
2,2,3,16,55,4,132,0.5,8.05,5.333333,29,3.4,0.037143,28.1,0.1,0.023,5
3,3,4,16,60,4,128,0.5,8.0,5.466667,44,4.2,0.051714,28.4,0.0,0.018,0
4,4,5,16,45,4,126,0.5,8.0,5.433333,57,5.0,0.066286,28.15,0.2,0.03,0


In [3]:
data=pd.DataFrame(df.dtypes)
data=data.rename(columns={0:'DataType'})
data

Unnamed: 0,DataType
Unnamed: 0,int64
DOC,int64
Salinity (ppt),int64
SDR (cm),int64
Water colour,int64
Water level (cm),int64
TSS (ml/L),float64
Average PH,float64
Average DO (ppm),float64
Running aerator (HP),int64


In [4]:
df.isnull().sum()

Unnamed: 0                0
DOC                       0
Salinity (ppt)            0
SDR (cm)                  0
Water colour              0
Water level (cm)          0
TSS (ml/L)                0
Average PH                0
Average DO (ppm)          0
Running aerator (HP)      0
TDF (kg)                  0
ABW (gm)                  0
Water temperature (˚C)    0
PH change                 0
Ammonia (ppm)             0
Water exchange (cm)       0
dtype: int64

In [4]:
x=df[['DOC', 'Salinity (ppt)', 'Average PH', 'ABW (gm)']]
x

Unnamed: 0,DOC,Salinity (ppt),Average PH,ABW (gm)
0,1,16,8.25,0.008000
1,2,16,8.25,0.022571
2,3,16,8.05,0.037143
3,4,16,8.00,0.051714
4,5,16,8.00,0.066286
...,...,...,...,...
395,96,22,7.70,19.100000
396,97,23,7.75,19.200000
397,98,23,7.75,19.300000
398,99,23,7.70,19.400000


In [5]:
y=df['Ammonia (ppm)']
y

0      0.026
1      0.028
2      0.023
3      0.018
4      0.030
       ...  
395    0.054
396    0.049
397    0.044
398    0.039
399    0.054
Name: Ammonia (ppm), Length: 400, dtype: float64

In [6]:
from sklearn.preprocessing import StandardScaler
rs=StandardScaler()
x=rs.fit_transform(x)

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [9]:
linreg=LinearRegression()
linreg.fit(x_train, y_train)

LinearRegression()

In [10]:
y_pred=linreg.predict(x_test) 

In [11]:
linreg.score(x_test, y_test)

0.12894035763116518

In [12]:
linreg.score(x_train,y_train)

0.16288602044654998

In [13]:
from sklearn.metrics import r2_score, mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))
print(np.sqrt(mean_squared_error(y_test, y_pred)))
print(r2_score(y_test, y_pred))
print(mean_absolute_percentage_error(y_test, y_pred))

0.053634199156105546
0.007070242779885055
0.08408473571276213
0.12894035763116518
0.8580160134253031


In [14]:
from sklearn.tree import DecisionTreeRegressor

In [15]:
treereg=DecisionTreeRegressor(splitter='best',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=2, random_state=3)
treereg.fit(x_train, y_train)

DecisionTreeRegressor(min_samples_leaf=2, random_state=3)

In [16]:
ytree_pred=treereg.predict(x_test)

In [17]:
treereg.score(x_train, y_train)

0.9344031656333975

In [18]:
treereg.score(x_test,y_test)

0.305384235072495

In [19]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(y_test, ytree_pred))
print(mean_squared_error(y_test, ytree_pred))
print(np.sqrt(mean_squared_error(y_test, ytree_pred)))
print(r2_score(y_test, ytree_pred))
print(mean_absolute_percentage_error(y_test, ytree_pred))

0.03753680555555556
0.005638077874228395
0.0750871352112224
0.305384235072495
0.6836506615207453


In [20]:
from sklearn.ensemble import AdaBoostRegressor

In [21]:
adareg=AdaBoostRegressor(random_state=3, n_estimators=180)
adareg.fit(x_train, y_train)

AdaBoostRegressor(n_estimators=180, random_state=3)

In [22]:
yada_pred=adareg.predict(x_test)

In [23]:
adareg.score(x_train, y_train)

0.6311673750605651

In [24]:
adareg.score(x_test, y_test)

0.25105439658540707

In [25]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(y_test, yada_pred))
print(mean_squared_error(y_test, yada_pred))
print(np.sqrt(mean_squared_error(y_test, yada_pred)))
print(r2_score(y_test, yada_pred))
print(mean_absolute_percentage_error(y_test, yada_pred))

0.05028275397710882
0.006079063921120698
0.07796835204825543
0.25105439658540707
0.8626133534883722


In [26]:
from sklearn.ensemble import GradientBoostingRegressor

In [27]:
gbrreg=GradientBoostingRegressor(n_estimators=180, learning_rate=0.5, max_depth=10, random_state=3, loss='ls',min_samples_leaf=5)
gbrreg.fit(x_train, y_train)

GradientBoostingRegressor(learning_rate=0.5, max_depth=10, min_samples_leaf=5,
                          n_estimators=180, random_state=3)

In [28]:
ygbr_pred=gbrreg.predict(x_test)

In [29]:
gbrreg.score(x_train, y_train)

0.9995723546776345

In [30]:
gbrreg.score(x_test, y_test)

0.5040115353533715

In [31]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(y_test, ygbr_pred))
print(mean_squared_error(y_test, ygbr_pred))
print(np.sqrt(mean_squared_error(y_test, ygbr_pred)))
print(r2_score(y_test, ygbr_pred))
print(mean_absolute_percentage_error(y_test, ygbr_pred))

0.039354480271709916
0.004025853903112209
0.06344961704464581
0.5040115353533715
0.6394350932133553


In [32]:
from sklearn.ensemble import RandomForestRegressor

In [33]:
rf=RandomForestRegressor(n_estimators=1000, min_samples_split=2, min_samples_leaf=1, max_features='sqrt', max_depth=25, random_state=42)
rf.fit(x_train, y_train)

RandomForestRegressor(max_depth=25, max_features='sqrt', n_estimators=1000,
                      random_state=42)

In [34]:
yrf_pred=rf.predict(x_test)

In [35]:
rf.score(x_train, y_train)

0.933470727490204

In [36]:
rf.score(x_test, y_test)

0.48123851938078055

In [37]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(y_test, yrf_pred))
print(mean_squared_error(y_test, yrf_pred))
print(np.sqrt(mean_squared_error(y_test, yrf_pred)))
print(r2_score(y_test, yrf_pred))
print(mean_absolute_percentage_error(y_test, yrf_pred))

0.034763033333333325
0.004210698595627812
0.06488989594403595
0.48123851938078055
0.5204634331639427
