In [1]:
# Libraries
import os
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
import platform
import socket
from platform import python_version
from datetime import datetime

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show current working directory
print(os.getcwd())

c:\Users\sandr\Documents\01 Data\01 ZHAW\06 Semester\04 KI Anwendung\00 Project\app


In [2]:
df = pd.read_csv('merged_data.csv')
df.head()

Unnamed: 0,Richtung,AnzFahrzeuge,Luftfeuchtigkeit (%Hr),Luftdruck (hPa),Niederschlag,Temperatur (°C),Time,Date,holiday
0,0,4043.0,92.51,970.12,1,7.58,00:00:00,2012-01-01,1
1,1,2931.0,92.51,970.12,1,7.58,00:00:00,2012-01-01,1
2,0,11021.0,91.0,970.09,1,7.98,01:00:00,2012-01-01,1
3,1,6660.0,91.0,970.09,1,7.98,01:00:00,2012-01-01,1
4,0,7892.0,90.38,970.46,0,7.62,02:00:00,2012-01-01,1


In [4]:
# Create train and test samples
X_train, X_test, y_train, y_test = train_test_split(df[['Temperatur (°C)',
                                                        'Niederschlag',
                                                        'Richtung',
                                                        'Luftfeuchtigkeit (%Hr)',
                                                        'Luftdruck (hPa)',
                                                        'holiday']], 
                                                    df['AnzFahrzeuge'], 
                                                    test_size=0.20, 
                                                    random_state=42)
# Show X_train
print('X_train:')
print(X_train.head(), '\n')

# Show y_train
print('y_train:')
print(y_train.head())

X_train:
        Temperatur (°C)  Niederschlag  Richtung  Luftfeuchtigkeit (%Hr)  \
75941              9.91             1         1                   80.85   
38564             12.80             0         0                   49.99   
71511             11.93             0         1                   78.84   
17263              9.18             0         1                   58.24   
143786            10.79             0         0                   80.75   

        Luftdruck (hPa)  holiday  
75941            973.09        0  
38564            971.85        0  
71511            976.02        0  
17263            969.26        0  
143786           967.29        0   

y_train:
75941     18903.0
38564     16568.0
71511     25750.0
17263     12616.0
143786     7525.0
Name: AnzFahrzeuge, dtype: float64


In [5]:
randomforest_model = RandomForestRegressor(random_state=42)

scores = cross_val_score(randomforest_model, X_train, y_train, scoring="neg_root_mean_squared_error", cv=5)
print('--{}--'.format(randomforest_model))
print(scores)
print(np.mean(scores)) 

--RandomForestRegressor(random_state=42)--
[-6342.72669512 -6350.20473663 -6316.41014572 -6303.92367298
 -6297.78723622]
-6322.210497333469


In [8]:
from sklearn.model_selection import GridSearchCV


param_grid = {'max_depth': [25, 30, 35],
'max_features': [7, 9]
}

grid_search_rf = GridSearchCV(randomforest_model, param_grid, cv=5, scoring='neg_root_mean_squared_error', verbose=2) # add param verbose = 2 to see the state
grid_search_rf.fit(X_train, y_train)
print(grid_search_rf.best_estimator_)
print(grid_search_rf.best_params_)
print(grid_search_rf.best_score_)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END .......................max_depth=25, max_features=7; total time=  37.2s
[CV] END .......................max_depth=25, max_features=7; total time=  37.1s
[CV] END .......................max_depth=25, max_features=7; total time=  36.3s
[CV] END .......................max_depth=25, max_features=7; total time=  35.7s
[CV] END .......................max_depth=25, max_features=7; total time=  35.7s
[CV] END .......................max_depth=25, max_features=9; total time=  35.8s
[CV] END .......................max_depth=25, max_features=9; total time=  35.6s
[CV] END .......................max_depth=25, max_features=9; total time=  35.7s
[CV] END .......................max_depth=25, max_features=9; total time=  35.7s
[CV] END .......................max_depth=25, max_features=9; total time=  35.8s
[CV] END .......................max_depth=30, max_features=7; total time=  38.3s
[CV] END .......................max_depth=30, max