In [1]:
# Import Packages
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import median_absolute_error
from sklearn.linear_model import LinearRegression

import pickle
import os


In [2]:
# Import Data Frame 
data = pd.read_csv('DATASET/220511_monatszahlenmonatszahlen2204_verkehrsunfaelle.csv')
data

Unnamed: 0,MONATSZAHL,AUSPRAEGUNG,JAHR,MONAT,WERT,VORJAHRESWERT,VERAEND_VORMONAT_PROZENT,VERAEND_VORJAHRESMONAT_PROZENT,ZWOELF_MONATE_MITTELWERT
0,Alkoholunfälle,insgesamt,2022,202201,,16.0,,,
1,Alkoholunfälle,insgesamt,2022,202202,,14.0,,,
2,Alkoholunfälle,insgesamt,2022,202203,,24.0,,,
3,Alkoholunfälle,insgesamt,2022,202204,,16.0,,,
4,Alkoholunfälle,insgesamt,2022,202205,,24.0,,,
...,...,...,...,...,...,...,...,...,...
2081,Verkehrsunfälle,Verletzte und Getötete,2000,200008,647.0,,-13.04,,584.0
2082,Verkehrsunfälle,Verletzte und Getötete,2000,200009,675.0,,4.33,,594.0
2083,Verkehrsunfälle,Verletzte und Getötete,2000,200010,615.0,,-8.89,,596.0
2084,Verkehrsunfälle,Verletzte und Getötete,2000,200011,578.0,,-6.02,,594.0


In [3]:
# Print the Data Frame columns
data.columns

Index(['MONATSZAHL', 'AUSPRAEGUNG', 'JAHR', 'MONAT', 'WERT', 'VORJAHRESWERT',
       'VERAEND_VORMONAT_PROZENT', 'VERAEND_VORJAHRESMONAT_PROZENT',
       'ZWOELF_MONATE_MITTELWERT'],
      dtype='object')

In [4]:
# Renaming german into to english 
data = data.rename(columns = {
    'MONATSZAHL' : 'Category',
    'AUSPRAEGUNG': 'Accident_Type',
    'JAHR'       : 'Year',
    'MONAT'      : 'Month',
    'WERT'       : 'Value',
    'VORJAHRESWERT': 'Previous Year',
    'VERAEND_VORMONAT_PROZENT':'Event_Prior_Month_Percent',
    'VERAEND_VORJAHRESMONAT_PROZENT':'Event_Previous Month_Percentage',
    'ZWOELF_MONATE_MITTELWERT':'Tweleve_Months_Average'
})

data['Category'].replace(['Alkoholunfälle', 'Fluchtunfälle','Verkehrsunfälle'],['alcohol accidents','escape accidents','traffic accidents'], inplace=True)
data['Accident_Type'].replace(['insgesamt' ,'Verletzte und Getötete', 'mit Personenschäden'],['all in all','injured and killed','with personal injury'], inplace=True)


In [5]:
data

Unnamed: 0,Category,Accident_Type,Year,Month,Value,Previous Year,Event_Prior_Month_Percent,Event_Previous Month_Percentage,Tweleve_Months_Average
0,alcohol accidents,all in all,2022,202201,,16.0,,,
1,alcohol accidents,all in all,2022,202202,,14.0,,,
2,alcohol accidents,all in all,2022,202203,,24.0,,,
3,alcohol accidents,all in all,2022,202204,,16.0,,,
4,alcohol accidents,all in all,2022,202205,,24.0,,,
...,...,...,...,...,...,...,...,...,...
2081,traffic accidents,injured and killed,2000,200008,647.0,,-13.04,,584.0
2082,traffic accidents,injured and killed,2000,200009,675.0,,4.33,,594.0
2083,traffic accidents,injured and killed,2000,200010,615.0,,-8.89,,596.0
2084,traffic accidents,injured and killed,2000,200011,578.0,,-6.02,,594.0


In [6]:
data.columns

Index(['Category', 'Accident_Type', 'Year', 'Month', 'Value', 'Previous Year',
       'Event_Prior_Month_Percent', 'Event_Previous Month_Percentage',
       'Tweleve_Months_Average'],
      dtype='object')

In [7]:
# Selecting columns 
imp = ['Category', 'Accident_Type', 'Year', 'Month', 'Value']

In [8]:
print(data['Category'].unique())

['alcohol accidents' 'escape accidents' 'traffic accidents']


In [9]:
print(data['Accident_Type'].unique())

['all in all' 'injured and killed' 'with personal injury']


In [10]:
print(data['Year'].unique())

[2022 2021 2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009
 2008 2007 2006 2005 2004 2003 2002 2001 2000]


In [11]:
print(data['Month'].unique())

['202201' '202202' '202203' '202204' '202205' '202206' '202207' '202208'
 '202209' '202210' '202211' '202212' 'Summe' '202101' '202102' '202103'
 '202104' '202105' '202106' '202107' '202108' '202109' '202110' '202111'
 '202112' '202001' '202002' '202003' '202004' '202005' '202006' '202007'
 '202008' '202009' '202010' '202011' '202012' '201901' '201902' '201903'
 '201904' '201905' '201906' '201907' '201908' '201909' '201910' '201911'
 '201912' '201801' '201802' '201803' '201804' '201805' '201806' '201807'
 '201808' '201809' '201810' '201811' '201812' '201701' '201702' '201703'
 '201704' '201705' '201706' '201707' '201708' '201709' '201710' '201711'
 '201712' '201601' '201602' '201603' '201604' '201605' '201606' '201607'
 '201608' '201609' '201610' '201611' '201612' '201501' '201502' '201503'
 '201504' '201505' '201506' '201507' '201508' '201509' '201510' '201511'
 '201512' '201401' '201402' '201403' '201404' '201405' '201406' '201407'
 '201408' '201409' '201410' '201411' '201412' '20130

In [12]:
# Remove the 'Summe' from Month column 
data = data[data['Month'] != 'Summe']

In [13]:
print(data['Month'].unique())

['202201' '202202' '202203' '202204' '202205' '202206' '202207' '202208'
 '202209' '202210' '202211' '202212' '202101' '202102' '202103' '202104'
 '202105' '202106' '202107' '202108' '202109' '202110' '202111' '202112'
 '202001' '202002' '202003' '202004' '202005' '202006' '202007' '202008'
 '202009' '202010' '202011' '202012' '201901' '201902' '201903' '201904'
 '201905' '201906' '201907' '201908' '201909' '201910' '201911' '201912'
 '201801' '201802' '201803' '201804' '201805' '201806' '201807' '201808'
 '201809' '201810' '201811' '201812' '201701' '201702' '201703' '201704'
 '201705' '201706' '201707' '201708' '201709' '201710' '201711' '201712'
 '201601' '201602' '201603' '201604' '201605' '201606' '201607' '201608'
 '201609' '201610' '201611' '201612' '201501' '201502' '201503' '201504'
 '201505' '201506' '201507' '201508' '201509' '201510' '201511' '201512'
 '201401' '201402' '201403' '201404' '201405' '201406' '201407' '201408'
 '201409' '201410' '201411' '201412' '201301' '2013

In [14]:
print(data['Value'].unique())

[      nan 1.600e+01 1.400e+01 2.400e+01 4.800e+01 4.400e+01 4.600e+01
 5.400e+01 4.300e+01 3.300e+01 2.100e+01 2.800e+01 4.000e+01 2.700e+01
 2.600e+01 4.900e+01 5.800e+01 3.400e+01 2.300e+01 1.300e+01 2.200e+01
 3.600e+01 3.900e+01 3.100e+01 4.700e+01 4.500e+01 2.900e+01 3.500e+01
 3.200e+01 2.500e+01 5.100e+01 1.900e+01 4.100e+01 5.200e+01 3.000e+01
 4.200e+01 1.000e+01 2.000e+01 5.000e+01 1.700e+01 3.700e+01 1.500e+01
 1.800e+01 5.600e+01 5.500e+01 5.700e+01 3.800e+01 6.300e+01 6.500e+01
 5.300e+01 6.400e+01 7.600e+01 6.700e+01 7.900e+01 6.800e+01 8.200e+01
 6.600e+01 7.400e+01 6.100e+01 8.400e+01 7.100e+01 1.070e+02 8.300e+01
 8.500e+01 6.000e+01 7.700e+01 7.800e+01 7.300e+01 9.600e+01 9.900e+01
 5.000e+00 9.000e+00 6.000e+00 7.000e+00 1.100e+01 1.200e+01 8.000e+00
 4.000e+00 3.000e+00 2.000e+00 0.000e+00 5.210e+02 6.020e+02 8.010e+02
 7.160e+02 8.650e+02 9.910e+02 1.027e+03 7.150e+02 9.510e+02 1.053e+03
 8.490e+02 7.880e+02 7.910e+02 8.700e+02 7.440e+02 6.070e+02 8.590e+02
 9.050

In [15]:
data = data[imp]

In [16]:
data

Unnamed: 0,Category,Accident_Type,Year,Month,Value
0,alcohol accidents,all in all,2022,202201,
1,alcohol accidents,all in all,2022,202202,
2,alcohol accidents,all in all,2022,202203,
3,alcohol accidents,all in all,2022,202204,
4,alcohol accidents,all in all,2022,202205,
...,...,...,...,...,...
2081,traffic accidents,injured and killed,2000,200008,647.0
2082,traffic accidents,injured and killed,2000,200009,675.0
2083,traffic accidents,injured and killed,2000,200010,615.0
2084,traffic accidents,injured and killed,2000,200011,578.0


In [17]:
# Remove null values
data = data.dropna()

In [18]:
data

Unnamed: 0,Category,Accident_Type,Year,Month,Value
13,alcohol accidents,all in all,2021,202101,16.0
14,alcohol accidents,all in all,2021,202102,14.0
15,alcohol accidents,all in all,2021,202103,24.0
16,alcohol accidents,all in all,2021,202104,16.0
17,alcohol accidents,all in all,2021,202105,24.0
...,...,...,...,...,...
2081,traffic accidents,injured and killed,2000,200008,647.0
2082,traffic accidents,injured and killed,2000,200009,675.0
2083,traffic accidents,injured and killed,2000,200010,615.0
2084,traffic accidents,injured and killed,2000,200011,578.0


In [19]:
# Get only month numbers in Month column
data['Month'] = data['Month'].str[-2:]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Month'] = data['Month'].str[-2:]


In [20]:
data

Unnamed: 0,Category,Accident_Type,Year,Month,Value
13,alcohol accidents,all in all,2021,01,16.0
14,alcohol accidents,all in all,2021,02,14.0
15,alcohol accidents,all in all,2021,03,24.0
16,alcohol accidents,all in all,2021,04,16.0
17,alcohol accidents,all in all,2021,05,24.0
...,...,...,...,...,...
2081,traffic accidents,injured and killed,2000,08,647.0
2082,traffic accidents,injured and killed,2000,09,675.0
2083,traffic accidents,injured and killed,2000,10,615.0
2084,traffic accidents,injured and killed,2000,11,578.0


In [21]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1848 entries, 13 to 2085
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Category       1848 non-null   object 
 1   Accident_Type  1848 non-null   object 
 2   Year           1848 non-null   int64  
 3   Month          1848 non-null   object 
 4   Value          1848 non-null   float64
dtypes: float64(1), int64(1), object(3)
memory usage: 86.6+ KB


In [22]:
data['Month'] = data['Month'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Month'] = data['Month'].astype(int)


In [23]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1848 entries, 13 to 2085
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Category       1848 non-null   object 
 1   Accident_Type  1848 non-null   object 
 2   Year           1848 non-null   int64  
 3   Month          1848 non-null   int64  
 4   Value          1848 non-null   float64
dtypes: float64(1), int64(2), object(2)
memory usage: 86.6+ KB


In [24]:
data

Unnamed: 0,Category,Accident_Type,Year,Month,Value
13,alcohol accidents,all in all,2021,1,16.0
14,alcohol accidents,all in all,2021,2,14.0
15,alcohol accidents,all in all,2021,3,24.0
16,alcohol accidents,all in all,2021,4,16.0
17,alcohol accidents,all in all,2021,5,24.0
...,...,...,...,...,...
2081,traffic accidents,injured and killed,2000,8,647.0
2082,traffic accidents,injured and killed,2000,9,675.0
2083,traffic accidents,injured and killed,2000,10,615.0
2084,traffic accidents,injured and killed,2000,11,578.0


In [25]:
data = data[data['Year'] < 2021]

In [26]:
data

Unnamed: 0,Category,Accident_Type,Year,Month,Value
26,alcohol accidents,all in all,2020,1,28.0
27,alcohol accidents,all in all,2020,2,40.0
28,alcohol accidents,all in all,2020,3,27.0
29,alcohol accidents,all in all,2020,4,26.0
30,alcohol accidents,all in all,2020,5,40.0
...,...,...,...,...,...
2081,traffic accidents,injured and killed,2000,8,647.0
2082,traffic accidents,injured and killed,2000,9,675.0
2083,traffic accidents,injured and killed,2000,10,615.0
2084,traffic accidents,injured and killed,2000,11,578.0


In [27]:
data.sort_values('Year')

Unnamed: 0,Category,Accident_Type,Year,Month,Value
2085,traffic accidents,injured and killed,2000,12,515.0
1778,traffic accidents,with personal injury,2000,3,414.0
1777,traffic accidents,with personal injury,2000,2,329.0
1776,traffic accidents,with personal injury,2000,1,321.0
1489,traffic accidents,all in all,2000,12,3789.0
...,...,...,...,...,...
1821,traffic accidents,injured and killed,2020,8,563.0
1822,traffic accidents,injured and killed,2020,9,634.0
1823,traffic accidents,injured and killed,2020,10,566.0
1228,traffic accidents,all in all,2020,11,3150.0


In [28]:
Final = data.copy()

In [29]:
Final

Unnamed: 0,Category,Accident_Type,Year,Month,Value
26,alcohol accidents,all in all,2020,1,28.0
27,alcohol accidents,all in all,2020,2,40.0
28,alcohol accidents,all in all,2020,3,27.0
29,alcohol accidents,all in all,2020,4,26.0
30,alcohol accidents,all in all,2020,5,40.0
...,...,...,...,...,...
2081,traffic accidents,injured and killed,2000,8,647.0
2082,traffic accidents,injured and killed,2000,9,675.0
2083,traffic accidents,injured and killed,2000,10,615.0
2084,traffic accidents,injured and killed,2000,11,578.0


In [30]:
Final.to_csv("DATASET/after_pre_processing.csv")

In [31]:
a = Final.copy()

In [32]:
a

Unnamed: 0,Category,Accident_Type,Year,Month,Value
26,alcohol accidents,all in all,2020,1,28.0
27,alcohol accidents,all in all,2020,2,40.0
28,alcohol accidents,all in all,2020,3,27.0
29,alcohol accidents,all in all,2020,4,26.0
30,alcohol accidents,all in all,2020,5,40.0
...,...,...,...,...,...
2081,traffic accidents,injured and killed,2000,8,647.0
2082,traffic accidents,injured and killed,2000,9,675.0
2083,traffic accidents,injured and killed,2000,10,615.0
2084,traffic accidents,injured and killed,2000,11,578.0


In [33]:
a['Category'].replace(['alcohol accidents' ,'escape accidents' ,'traffic accidents'],[0,1,2], inplace=True)

In [34]:
print(a['Category'].unique())

[0 1 2]


In [35]:
a

Unnamed: 0,Category,Accident_Type,Year,Month,Value
26,0,all in all,2020,1,28.0
27,0,all in all,2020,2,40.0
28,0,all in all,2020,3,27.0
29,0,all in all,2020,4,26.0
30,0,all in all,2020,5,40.0
...,...,...,...,...,...
2081,2,injured and killed,2000,8,647.0
2082,2,injured and killed,2000,9,675.0
2083,2,injured and killed,2000,10,615.0
2084,2,injured and killed,2000,11,578.0


In [36]:
a['Accident_Type'].replace(['all in all', 'injured and killed' ,'with personal injury'], [0,1,2], inplace=True)

In [37]:
print(a['Accident_Type'].unique())

[0 1 2]


In [38]:
a

Unnamed: 0,Category,Accident_Type,Year,Month,Value
26,0,0,2020,1,28.0
27,0,0,2020,2,40.0
28,0,0,2020,3,27.0
29,0,0,2020,4,26.0
30,0,0,2020,5,40.0
...,...,...,...,...,...
2081,2,1,2000,8,647.0
2082,2,1,2000,9,675.0
2083,2,1,2000,10,615.0
2084,2,1,2000,11,578.0


In [39]:
a.to_csv("DATASET/after_categ.csv")

In [40]:
# actual code

In [41]:
# Import Data Frame 
df = pd.read_csv('DATASET/after_categ.csv')
#df = pd.read_csv('DATASET/after_pre_processing.csv')

In [42]:
df

Unnamed: 0.1,Unnamed: 0,Category,Accident_Type,Year,Month,Value
0,26,0,0,2020,1,28.0
1,27,0,0,2020,2,40.0
2,28,0,0,2020,3,27.0
3,29,0,0,2020,4,26.0
4,30,0,0,2020,5,40.0
...,...,...,...,...,...,...
1759,2081,2,1,2000,8,647.0
1760,2082,2,1,2000,9,675.0
1761,2083,2,1,2000,10,615.0
1762,2084,2,1,2000,11,578.0


In [43]:
df.columns

Index(['Unnamed: 0', 'Category', 'Accident_Type', 'Year', 'Month', 'Value'], dtype='object')

In [44]:
# Specify the features
features = ['Category', 'Accident_Type', 'Year', 'Month']
X = df[features]
X

Unnamed: 0,Category,Accident_Type,Year,Month
0,0,0,2020,1
1,0,0,2020,2
2,0,0,2020,3
3,0,0,2020,4
4,0,0,2020,5
...,...,...,...,...
1759,2,1,2000,8
1760,2,1,2000,9
1761,2,1,2000,10
1762,2,1,2000,11


In [45]:
# Specify the prediction value
Y = data.Value
Y

26       28.0
27       40.0
28       27.0
29       26.0
30       40.0
        ...  
2081    647.0
2082    675.0
2083    615.0
2084    578.0
2085    515.0
Name: Value, Length: 1764, dtype: float64

In [46]:
# Specif train and train data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=0)


In [47]:
X_train

Unnamed: 0,Category,Accident_Type,Year,Month
748,1,0,2000,5
1614,2,1,2012,7
743,1,0,2001,12
482,0,1,2001,3
133,0,0,2009,2
...,...,...,...,...
835,1,1,2014,8
1216,2,0,2003,5
1653,2,1,2009,10
559,1,0,2016,8


In [48]:
X_test

Unnamed: 0,Category,Accident_Type,Year,Month
1165,2,0,2007,2
668,1,0,2007,9
412,0,1,2007,5
558,1,0,2016,7
1234,2,0,2002,11
...,...,...,...,...
1044,2,0,2017,1
970,1,1,2003,11
539,1,0,2018,12
1597,2,1,2013,2


In [49]:
Y_train

886     1086.0
1924     730.0
880      941.0
573       36.0
170       32.0
         ...  
1005      43.0
1443    3255.0
1966     432.0
681      958.0
817      749.0
Name: Value, Length: 1411, dtype: float64

In [50]:
Y_test

1388    2643.0
799      797.0
497       39.0
680     1095.0
1462    3529.0
         ...  
1257    3320.0
1151      59.0
659      900.0
1906     326.0
1265    4017.0
Name: Value, Length: 353, dtype: float64

In [51]:
#various algorithm 

In [52]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
regressor1 = LinearRegression()
regressor1.fit(X_train,Y_train)
y_pred1 = regressor1.predict(X_test)
print("Linear Regression :")
print("MAE : {:.8f}".format(mean_absolute_error(y_pred1, Y_test)))
print("MSE : {:.8f}".format(mean_squared_error(y_pred1, Y_test)))
print("RMSE : {:.8f}".format(mean_squared_error(y_pred1, Y_test,squared=False)))
print("MedAE : {:.8f}".format(median_absolute_error(y_pred1, Y_test)))
print("R2 : {:.8f}".format(r2_score(y_pred1, Y_test)))


Linear Regression :
MAE : 568.21870890
MSE : 458204.52350712
RMSE : 676.90806134
MedAE : 478.48380158
R2 : 0.48289786


In [53]:
from sklearn.tree import DecisionTreeRegressor
regressor2 = DecisionTreeRegressor(random_state=0)
regressor2.fit(X_train,Y_train)
y_pred2 = regressor2.predict(X_test)
print("Decision Tree :")
print("MAE : {:.8f}".format(mean_absolute_error(y_pred2, Y_test)))
print("MSE : {:.8f}".format(mean_squared_error(y_pred2, Y_test)))
print("RMSE : {:.8f}".format(mean_squared_error(y_pred2, Y_test,squared=False)))
print("MedAE : {:.8f}".format(median_absolute_error(y_pred2, Y_test)))
print("R2 : {:.8f}".format(r2_score(y_pred2, Y_test)))


Decision Tree :
MAE : 65.22946176
MSE : 20021.14447592
RMSE : 141.49609350
MedAE : 18.00000000
R2 : 0.98653198


In [54]:
from sklearn.ensemble import RandomForestRegressor
regressor3 = RandomForestRegressor(n_estimators=53,random_state=0)
regressor3.fit(X_train,Y_train)
y_pred3 = regressor3.predict(X_test)
print("Random Forest :")
print("MAE : {:.8f}".format(mean_absolute_error(y_pred3, Y_test)))
print("MSE : {:.8f}".format(mean_squared_error(y_pred3, Y_test)))
print("RMSE : {:.8f}".format(mean_squared_error(y_pred3, Y_test,squared=False)))
print("MedAE : {:.8f}".format(median_absolute_error(y_pred3, Y_test)))
print("R2 : {:.8f}".format(r2_score(y_pred3, Y_test)))



Random Forest :
MAE : 53.26121118
MSE : 12055.78870426
RMSE : 109.79885566
MedAE : 14.71698113
R2 : 0.99161312


In [55]:
from sklearn.neighbors import KNeighborsRegressor
regressor4 = KNeighborsRegressor()
regressor4.fit(X_train,Y_train)
y_pred4 = regressor4.predict(X_test)
print("KN Neighbors :")
print("MAE : {:.8f}".format(mean_absolute_error(y_pred4, Y_test)))
print("MSE : {:.8f}".format(mean_squared_error(y_pred4, Y_test)))
print("RMSE : {:.8f}".format(mean_squared_error(y_pred4, Y_test,squared=False)))
print("MedAE : {:.8f}".format(median_absolute_error(y_pred4, Y_test)))
print("R2 : {:.8f}".format(r2_score(y_pred4, Y_test)))

KN Neighbors :
MAE : 336.59603399
MSE : 298354.71478754
RMSE : 546.21855954
MedAE : 180.20000000
R2 : 0.52581747


In [56]:
from sklearn.svm import SVR
regressor5 = SVR()
regressor5.fit(X_train,Y_train)
y_pred5 = regressor5.predict(X_test)
print("KN Neighbors :")
print("MAE : {:.8f}".format(mean_absolute_error(y_pred5, Y_test)))
print("MSE : {:.8f}".format(mean_squared_error(y_pred5, Y_test)))
print("RMSE : {:.8f}".format(mean_squared_error(y_pred5, Y_test,squared=False)))
print("MedAE : {:.8f}".format(median_absolute_error(y_pred5, Y_test)))
print("R2 : {:.8f}".format(r2_score(y_pred5, Y_test)))

KN Neighbors :
MAE : 730.30266953
MSE : 1647932.73226344
RMSE : 1283.71832279
MedAE : 317.00282462
R2 : -473377580495.95947266


In [57]:
# test the output by for jan 0,0,2021,1
y_pred1 = regressor1.predict([[0,0,2021,1]])
y_pred2 = regressor2.predict([[0,0,2021,1]])
y_pred3 = regressor3.predict([[0,0,2021,1]])
y_pred4 = regressor4.predict([[0,0,2021,1]])
y_pred5 = regressor5.predict([[0,0,2021,1]])


# print the predicted price
print("Jan linear: % d\n"% y_pred1)
# print the predicted price
print("Jan decision tree: % d\n"% y_pred2)
# print the predicted price
print("Jan ramdon forest: % d\n"% y_pred3)
# print the predicted price
print("Jan KNN: % d\n"% y_pred4)
# print the predicted price
print("Jan SVM: % d\n" % y_pred5)


print("new")

# test the output by for jan 0,1,2021,1
y_pred1 = regressor1.predict([[0,1,2021,1]])
y_pred2 = regressor2.predict([[0,1,2021,1]])
y_pred3 = regressor3.predict([[0,1,2021,1]])
y_pred4 = regressor4.predict([[0,1,2021,1]])
y_pred5 = regressor5.predict([[0,1,2021,1]])


# print the predicted price
print("Jan linear: % d\n"% y_pred1)
# print the predicted price
print("Jan decision tree: % d\n"% y_pred2)
# print the predicted price
print("Jan ramdon forest: % d\n"% y_pred3)
# print the predicted price
print("Jan KNN: % d\n"% y_pred4)
# print the predicted price
print("Jan SVM: % d\n" % y_pred5)


Jan linear:  376

Jan decision tree:  28

Jan ramdon forest:  28

Jan KNN:  348

Jan SVM:  350

new
Jan linear: -730

Jan decision tree:  11

Jan ramdon forest:  10

Jan KNN:  178

Jan SVM:  350





In [58]:
#random forest for all jan 2021

# test the output by for jan 0,0,2021,1
y_pred3_1 = regressor3.predict([[0,0,2021,1]])
y_pred3_2 = regressor3.predict([[0,1,2021,1]])
y_pred3_3 = regressor3.predict([[1,0,2021,1]])
y_pred3_4 = regressor3.predict([[1,1,2021,1]])
y_pred3_5 = regressor3.predict([[2,0,2021,1]])
y_pred3_6 = regressor3.predict([[2,1,2021,1]])
y_pred3_7 = regressor3.predict([[2,2,2021,1]])

y_pred3_8 = regressor3.predict([[0,2,2021,1]])
y_pred3_9 = regressor3.predict([[1,2,2021,1]])

y_pred_final = y_pred3_1+y_pred3_2+y_pred3_3+y_pred3_4+y_pred3_5+y_pred3_6+y_pred3_7



print("Jan ramdon forest: % d\n"% y_pred3_1)
print("Jan ramdon forest: % d\n"% y_pred3_2)
print("Jan ramdon forest: % d\n"% y_pred3_3)
print("Jan ramdon forest: % d\n"% y_pred3_4)
print("Jan ramdon forest: % d\n"% y_pred3_5)
print("Jan ramdon forest: % d\n"% y_pred3_6)
print("Jan ramdon forest: % d\n"% y_pred3_7)
print("Jan ramdon forest: % d\n"% y_pred3_8)
print("Jan ramdon forest: % d\n"% y_pred3_9)
print("Final for jan 2021 % d\n"% y_pred_final)

# print the predicted price





Jan ramdon forest:  28

Jan ramdon forest:  10

Jan ramdon forest:  823

Jan ramdon forest:  29

Jan ramdon forest:  3155

Jan ramdon forest:  354

Jan ramdon forest:  261

Jan ramdon forest:  10

Jan ramdon forest:  29

Final for jan 2021  4662





In [59]:
#decison tree for all jan 2021

# test the output by for jan 0,0,2021,1
y_pred2_1 = regressor2.predict([[0,0,2021,1]])
y_pred2_2 = regressor2.predict([[0,1,2021,1]])
y_pred2_3 = regressor2.predict([[1,0,2021,1]])
y_pred2_4 = regressor2.predict([[1,1,2021,1]])
y_pred2_5 = regressor2.predict([[2,0,2021,1]])
y_pred2_6 = regressor2.predict([[2,1,2021,1]])
y_pred2_7 = regressor2.predict([[2,2,2021,1]])

y_pred2_8 = regressor2.predict([[0,2,2021,1]])
y_pred2_9 = regressor2.predict([[1,2,2021,1]])

y_pred_final_dt = y_pred2_1+y_pred2_2+y_pred2_3+y_pred2_4+y_pred2_5+y_pred2_6+y_pred2_7



print("Jan ramdon forest: % d\n"% y_pred2_1)
print("Jan ramdon forest: % d\n"% y_pred2_2)
print("Jan ramdon forest: % d\n"% y_pred2_3)
print("Jan ramdon forest: % d\n"% y_pred2_4)
print("Jan ramdon forest: % d\n"% y_pred2_5)
print("Jan ramdon forest: % d\n"% y_pred2_6)
print("Jan ramdon forest: % d\n"% y_pred2_7)
print("Jan ramdon forest: % d\n"% y_pred2_8)
print("Jan ramdon forest: % d\n"% y_pred2_9)
print("Final for jan 2021 % d\n"% y_pred_final_dt)

# print the predicted price





Jan ramdon forest:  28

Jan ramdon forest:  11

Jan ramdon forest:  791

Jan ramdon forest:  28

Jan ramdon forest:  3139

Jan ramdon forest:  360

Jan ramdon forest:  243

Jan ramdon forest:  11

Jan ramdon forest:  28

Final for jan 2021  4600





In [60]:
#random forest for all oct 2021

# test the output by for jan 0,0,2021,10
y_pred3_1 = regressor3.predict([[0,0,2021,10]])
y_pred3_2 = regressor3.predict([[0,1,2021,10]])
y_pred3_3 = regressor3.predict([[1,0,2021,10]])
y_pred3_4 = regressor3.predict([[1,1,2021,10]])
y_pred3_5 = regressor3.predict([[2,0,2021,10]])
y_pred3_6 = regressor3.predict([[2,1,2021,10]])
y_pred3_7 = regressor3.predict([[2,2,2021,10]])

y_pred3_8 = regressor3.predict([[0,2,2021,10]])
y_pred3_9 = regressor3.predict([[1,2,2021,10]])

y_pred_final = y_pred3_1+y_pred3_2+y_pred3_3+y_pred3_4+y_pred3_5+y_pred3_6+y_pred3_7



print("Jan ramdon forest: % d\n"% y_pred3_1)
print("Jan ramdon forest: % d\n"% y_pred3_2)
print("Jan ramdon forest: % d\n"% y_pred3_3)
print("Jan ramdon forest: % d\n"% y_pred3_4)
print("Jan ramdon forest: % d\n"% y_pred3_5)
print("Jan ramdon forest: % d\n"% y_pred3_6)
print("Jan ramdon forest: % d\n"% y_pred3_7)
print("Jan ramdon forest: % d\n"% y_pred3_8)
print("Jan ramdon forest: % d\n"% y_pred3_9)
print("Final for jan 2021 % d\n"% y_pred_final)

# print the predicted price





Jan ramdon forest:  32

Jan ramdon forest:  20

Jan ramdon forest:  938

Jan ramdon forest:  50

Jan ramdon forest:  3406

Jan ramdon forest:  577

Jan ramdon forest:  507

Jan ramdon forest:  20

Jan ramdon forest:  50

Final for jan 2021  5531





In [61]:
#decison tree for all oct 2021

# test the output by for jan 0,0,2021,1
y_pred2_1 = regressor2.predict([[0,0,2021,10]])
y_pred2_2 = regressor2.predict([[0,1,2021,10]])
y_pred2_3 = regressor2.predict([[1,0,2021,10]])
y_pred2_4 = regressor2.predict([[1,1,2021,10]])
y_pred2_5 = regressor2.predict([[2,0,2021,10]])
y_pred2_6 = regressor2.predict([[2,1,2021,10]])
y_pred2_7 = regressor2.predict([[2,2,2021,10]])

y_pred2_8 = regressor2.predict([[0,2,2021,10]])
y_pred2_9 = regressor2.predict([[1,2,2021,10]])

y_pred_final_dt = y_pred2_1+y_pred2_2+y_pred2_3+y_pred2_4+y_pred2_5+y_pred2_6+y_pred2_7



print("Jan decison tree: % d\n"% y_pred2_1)
print("Jan decison tree: % d\n"% y_pred2_2)
print("Jan decison tree: % d\n"% y_pred2_3)
print("Jan decison tree: % d\n"% y_pred2_4)
print("Jan decison tree: % d\n"% y_pred2_5)
print("Jan decison tree: % d\n"% y_pred2_6)
print("Jan decison tree: % d\n"% y_pred2_7)
print("Jan decison tree: % d\n"% y_pred2_8)
print("Jan decison tree: % d\n"% y_pred2_9)
print("Final for jan 2021 % d\n"% y_pred_final_dt)

# print the predicted price





Jan decison tree:  23

Jan decison tree:  20

Jan decison tree:  864

Jan decison tree:  50

Jan decison tree:  3659

Jan decison tree:  566

Jan decison tree:  529

Jan decison tree:  20

Jan decison tree:  50

Final for jan 2021  5711





In [62]:
#random forest for all 2021

# test the output by for jan 0,0,2021,10
y_pred3_1 = regressor3.predict([[0,0,2021,1]])
y_pred3_2 = regressor3.predict([[0,0,2021,2]])
y_pred3_3 = regressor3.predict([[0,0,2021,3]])
y_pred3_4 = regressor3.predict([[0,0,2021,4]])
y_pred3_5 = regressor3.predict([[0,0,2021,5]])
y_pred3_6 = regressor3.predict([[0,0,2021,6]])
y_pred3_7 = regressor3.predict([[0,0,2021,7]])
y_pred3_8 = regressor3.predict([[0,0,2021,8]])
y_pred3_9 = regressor3.predict([[0,0,2021,9]])
y_pred3_10 = regressor3.predict([[0,0,2021,10]])
y_pred3_11 = regressor3.predict([[0,0,2021,11]])
y_pred3_12 = regressor3.predict([[0,0,2021,12]])
y_pred_final = y_pred3_1+y_pred3_2+y_pred3_3+y_pred3_4+y_pred3_5+y_pred3_6+y_pred3_7+y_pred3_8+y_pred3_9+y_pred3_10+y_pred3_11+y_pred3_12



print("Jan ramdon forest: % d\n"% y_pred3_1)
print("Jan ramdon forest: % d\n"% y_pred3_2)
print("Jan ramdon forest: % d\n"% y_pred3_3)
print("Jan ramdon forest: % d\n"% y_pred3_4)
print("Jan ramdon forest: % d\n"% y_pred3_5)
print("Jan ramdon forest: % d\n"% y_pred3_6)
print("Jan ramdon forest: % d\n"% y_pred3_7)
print("Jan ramdon forest: % d\n"% y_pred3_8)
print("Jan ramdon forest: % d\n"% y_pred3_9)
print("Jan ramdon forest: % d\n"% y_pred3_10)
print("Jan ramdon forest: % d\n"% y_pred3_11)
print("Jan ramdon forest: % d\n"% y_pred3_12)
print("Final for jan 2021 % d\n"% y_pred_final)

# print the predicted price







Jan ramdon forest:  28

Jan ramdon forest:  36

Jan ramdon forest:  31

Jan ramdon forest:  29

Jan ramdon forest:  37

Jan ramdon forest:  39

Jan ramdon forest:  48

Jan ramdon forest:  44

Jan ramdon forest:  45

Jan ramdon forest:  32

Jan ramdon forest:  22

Jan ramdon forest:  18

Final for jan 2021  413



In [63]:
#random forest for all 2021

# test the output by for jan 0,0,2021,10
y_pred3_1 = regressor3.predict([[0,0,2021,1]])
y_pred3_2 = regressor3.predict([[0,0,2021,2]])
y_pred3_3 = regressor3.predict([[0,0,2021,3]])
y_pred3_4 = regressor3.predict([[0,0,2021,4]])
y_pred3_5 = regressor3.predict([[0,0,2021,5]])
y_pred3_6 = regressor3.predict([[0,0,2021,6]])
y_pred3_7 = regressor3.predict([[0,0,2021,7]])
y_pred3_8 = regressor3.predict([[0,0,2021,8]])
y_pred3_9 = regressor3.predict([[0,0,2021,9]])
y_pred3_10 = regressor3.predict([[0,0,2021,10]])
y_pred3_11 = regressor3.predict([[0,0,2021,11]])
y_pred3_12 = regressor3.predict([[0,0,2021,12]])
y_pred_final = y_pred3_1+y_pred3_2+y_pred3_3+y_pred3_4+y_pred3_5+y_pred3_6+y_pred3_7+y_pred3_8+y_pred3_9+y_pred3_10+y_pred3_11+y_pred3_12



print("Jan decison tree: % d\n"% y_pred3_1)
print("Jan decison tree: % d\n"% y_pred3_2)
print("Jan decison tree: % d\n"% y_pred3_3)
print("Jan decison tree: % d\n"% y_pred3_4)
print("Jan decison tree: % d\n"% y_pred3_5)
print("Jan decison tree: % d\n"% y_pred3_6)
print("Jan decison tree: % d\n"% y_pred3_7)
print("Jan decison tree: % d\n"% y_pred3_8)
print("Jan decison tree: % d\n"% y_pred3_9)
print("Jan decison tree: % d\n"% y_pred3_10)
print("Jan decison tree: % d\n"% y_pred3_11)
print("Jan decison tree: % d\n"% y_pred3_12)
print("Final for jan 2021 % d\n"% y_pred_final)

# print the predicted price







Jan decison tree:  28

Jan decison tree:  36

Jan decison tree:  31

Jan decison tree:  29

Jan decison tree:  37

Jan decison tree:  39

Jan decison tree:  48

Jan decison tree:  44

Jan decison tree:  45

Jan decison tree:  32

Jan decison tree:  22

Jan decison tree:  18

Final for jan 2021  413



In [64]:
# pickling the model
import pickle
pickle_out = open("regressor3.pkl", "wb")
pickle.dump(regressor3, pickle_out)
pickle_out.close()
