-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
86 lines (60 loc) · 2.34 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 17 20:26:02 2020
@author: khwaj
"""
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
data = pd.read_csv('Real_Combine.csv')
# Reading first 5 rows
data.head()
data.isnull().sum()
# if we have minimum null values we can drop null values
data = data.dropna()
# We don`t have null values
X = data.drop(['PM 2.5'], axis=1) # Droping the Target Variable i.e. PM 2.5
Y = pd.DataFrame(data['PM 2.5']) # Assigning the Target Vatiavle to Y
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train,Y_test = train_test_split(X, Y, test_size=0.3, train_size=0.7, random_state=0)
from sklearn.ensemble import RandomForestRegressor
# For selecting the best Parameters
from sklearn.model_selection import RandomizedSearchCV
# Randomized Search CV
# No.of Decision-Trees in Random Forest
n_est = [int(x) for x in np.linspace(start=100,stop=1200,num=12)]
# No,of Features to consider at every split
max_feat = ['auto','sqrt']
# Maximum no of levels in trees
maximum_depth = [int(x) for x in np.linspace(start=5,stop=30,num=6)]
# Minimum no.of samples required to sperate a node
minimum_sample_split = [2,5,10,15,100]
# Minimum no.of samples required at each leaf node
minimum_sample_leaf = [1,2,5,10]
RandomForestRegressor()
# All these parameters are the Parameters of RandomForestRegressor()
random_grid = {
'n_estimators' : n_est,
'max_depth' : maximum_depth,
'min_samples_split' : minimum_sample_split,
'min_samples_leaf' : minimum_sample_leaf,
'max_features' : max_feat
}
print(random_grid)
# Using random_grid to search for best parameter
# Creating the best Parameter to tune
rand_forrest = RandomForestRegressor()
rf_random = RandomizedSearchCV(estimator= rand_forrest ,
param_distributions= random_grid,
scoring='neg_mean_squared_error',
n_iter=100, cv=5, verbose=2 ,random_state= 42)
rf_random.fit(X_train,Y_train)
import pickle
# Open a file
file = open('model.pkl', 'wb')
# dump information to that file
pickle.dump(rf_random,file)
# Loading model to compare the results
model = pickle.load(open('model.pkl','rb'))
print(model.predict([[1,1,1,1,1,1,1,1]]))