**SETTING UP ENVIRONMENT**

In [22]:
# Importing library
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import math
import matplotlib.pyplot as plt
import pandas as pd

**ML MODELING FOR TEMPORAL DATA**

In [23]:
# Importing dataset
temporal_data = pd.read_csv('temporal data.csv')
temporal_data

Unnamed: 0,dtime,doy,dow,natholiday,observer_intensity,ave_precip,ave_temper
0,20170101,1.0,0.0,True,1246,2.991323,3.117668
1,20170102,2.0,1.0,False,1440,1.211957,5.923946
2,20170103,3.0,2.0,False,1194,1.973178,6.739538
3,20170104,4.0,3.0,False,1011,2.087190,7.395226
4,20170105,5.0,4.0,False,1526,0.006800,3.614235
...,...,...,...,...,...,...,...
176,20170626,177.0,1.0,False,1040,0.000829,20.361995
177,20170627,178.0,2.0,False,890,4.530172,21.403248
178,20170628,179.0,3.0,False,702,13.369516,21.741147
179,20170629,180.0,4.0,False,882,2.782316,20.857240


In [24]:
# Defining independent variables (input)
columns_temp = ['ave_temper',
           'ave_precip', 
           'doy',
           'dow',
           'natholiday']
x = temporal_data[columns_temp]
x

Unnamed: 0,ave_temper,ave_precip,doy,dow,natholiday
0,3.117668,2.991323,1.0,0.0,True
1,5.923946,1.211957,2.0,1.0,False
2,6.739538,1.973178,3.0,2.0,False
3,7.395226,2.087190,4.0,3.0,False
4,3.614235,0.006800,5.0,4.0,False
...,...,...,...,...,...
176,20.361995,0.000829,177.0,1.0,False
177,21.403248,4.530172,178.0,2.0,False
178,21.741147,13.369516,179.0,3.0,False
179,20.857240,2.782316,180.0,4.0,False


In [25]:
# Defining dependent variables (output)
y = temporal_data['observer_intensity']
y

0      1246
1      1440
2      1194
3      1011
4      1526
       ... 
176    1040
177     890
178     702
179     882
180     890
Name: observer_intensity, Length: 181, dtype: int64

**TRAIN AND TEST SPLIT**

In [26]:
# Initializing train and test dataset portion
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.3, random_state = 42)

**ML ACCURACY TEST**

In [27]:
# Linear regression (setting)
lr = linear_regression = LinearRegression(fit_intercept = True,
                                     normalize = False,
                                     copy_X = True,
                                     n_jobs = 1)

# Linear regression (accuracy test)
lr.fit(xtrain, ytrain)
ypred = lr.predict(xtest)
r2 = round(r2_score(ytest, ypred), 2)
mse = mean_squared_error(ytest, ypred)
rmse = round((math.sqrt(mse)), 2)
nrmse = round(rmse / (max(ytest)-min(ytest)), 2)

print("LR accuracy: ", r2)
print("LR NRMSE: ", nrmse)

# Decision tree (setting)
dt = DecisionTreeRegressor()

# Decision tree (accuracy test)
dt.fit(xtrain, ytrain)
ypred = dt.predict(xtest)
r2 = round(r2_score(ytest, ypred), 2)
mse = mean_squared_error(ytest, ypred)
rmse = round((math.sqrt(mse)), 2)
nrmse = round(rmse / (max(ytest)-min(ytest)), 2)

print("DT accuracy: ", r2)
print("DT NRMSE: ", nrmse)

# Random forest (setting)
rf = RandomForestRegressor(n_estimators = 100, 
                           criterion = "mse", 
                           max_depth = None, 
                           max_features = "auto", 
                           bootstrap = True, 
                           min_samples_split = 2, 
                           n_jobs = 1)

# Random forest (accuracy test)
rf.fit(xtrain, ytrain)
ypred = rf.predict(xtest)
r2 = round(r2_score(ytest, ypred), 2)
mse = mean_squared_error(ytest, ypred)
rmse = round((math.sqrt(mse)), 2)
nrmse = round(rmse / (max(ytest)-min(ytest)), 2)

print("RF accuracy: ", r2)
print("RF NRMSE: ", nrmse)

  warn(


LR accuracy:  0.02
LR NRMSE:  0.21
DT accuracy:  0.45
DT NRMSE:  0.15
RF accuracy:  0.68
RF NRMSE:  0.12


**FEATURE IMPORTANCE**

In [28]:
# Checking feature importance temporal
feature_importance = list (zip (rf.feature_importances_, columns_temp))
feature_importance.sort(reverse = True)
feature_importance

[(0.3277324887738567, 'doy'),
 (0.2509895018746259, 'dow'),
 (0.228070459510389, 'ave_precip'),
 (0.17464660726177125, 'ave_temper'),
 (0.018560942579357226, 'natholiday')]