# Predicting Forest Fires area using Neural Network

In [None]:
#pip install modin[all]


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import  keras
from kerastuner.tuners import RandomSearch
from keras import layers
from keras.layers import Dense
from keras import Sequential
#import modin.pandas as pd #using modin module to 70x pd computational speed
#import ray
#ray.init(runtime_env={'env_vars': {'__MODIN_AUTOIMPORT_PANDAS__': '1'}})

In [None]:
fire=pd.read_csv("C:\\Users\\Hi\\Desktop\\Python Datasets\\forestfires.csv")

In [None]:
fire

In [None]:
fire.describe()

In [None]:
fire.info()

In [None]:
fire=fire.drop(['month','day'], axis=1)

In [None]:
fire

In [None]:
fire.plot(kind='box',subplots=True, layout=(10,5),figsize=(20,20))
plt.show()

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [None]:
le=LabelEncoder()
fire['size_category']=le.fit_transform(fire['size_category'])
fire

In [None]:
scaler=StandardScaler()
fire[['FFMC','DMC','DC','ISI','temp','RH','wind','rain']]=scaler.fit_transform(fire[['FFMC','DMC','DC','ISI','temp','RH','wind','rain']])
fire

In [None]:
from scipy import stats
zsc=stats.zscore(fire) #Trying to remove outliers based on Z scores and removing the datapoints which are above Zscore=3 
zscores=np.abs(zsc)  #Making all values absolute to make -ve alues to +ve so that we can easily remove the Zscores above 3
filter_zscores=(zscores<3).all(axis=1)
filtered=fire[filter_zscores] #here there are 122 rows outliers as we can see after transforming df and trying to eliminate the zvalues above 3 and below -3

In [None]:
filtered.shape

In [None]:
fire.shape

In [None]:
filtered

In [None]:
filtered.plot(kind='box',subplots=True, layout=(10,5),figsize=(20,20))
plt.show()

In [None]:
from sklearn.ensemble import IsolationForest
clf=IsolationForest(contamination=0.05,random_state=0)
clf.fit(filtered)


In [None]:
filtered['anomaly']=clf.predict(filtered)
filtered

In [None]:
filtered.shape

In [None]:
# through Isolation forest we have dropped 20 records at assumption of 5% contamination
filtered.drop(filtered[filtered['anomaly']==-1].index,inplace=True)
filtered.shape

In [None]:
filtered=filtered.drop(['anomaly'],axis=1)
filtered

In [None]:
#Automatic EDA using Sweetviz
import sweetviz as sv
sweet_report=sv.analyze(filtered)
sweet_report.show_html('EDA_of_FireForests.html')

In [None]:
import seaborn as sns
plt.figure(figsize=(10,10))
sns.heatmap(filtered.corr(),annot=True)
plt.show()

In [None]:
X=filtered.iloc[:,:28]
Y=filtered.iloc[:,28:]
Y

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.33,random_state=0)
#converting train test variable to array in order to use modin,pandas to improve computational speed by 70x
x_train=np.asarray(x_train)
y_train=np.asarray(y_train)
x_test=np.asarray(x_test)
y_test=np.asarray(y_test)
from keras.layers import Dropout
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV


def create_model(learning_rate,dropout_rate,activation_function,init,neuron1,neuron2):
    model=Sequential()
    model.add(Dense(neuron1,input_dim=8,kernel_initializer=init,activation=activation_function))
    model.add(Dropout(dropout_rate))
    model.add(Dense(neuron2,input_dim=8,kernel_initializer=init,activation=activation_function))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    
    adam= Adam(learning_rate=learning_rate)
    model.compile(loss='rmsprop',optimizer=adam,metrics=['accuracy','mse'])
    return model

#Create Model 
model =KerasRegressor(build_fn=create_model,verbose=0)

#Define Grid Search
batch_size=[10,20,40]
epochs=[10,50,100]
learning_rate=[0.1,0.01,0.001]
dropout_rate=[0.0,0.1,0.2]
activation_function=['softmax','relu','tanh','linear']
init=['uniform','normal','zero']
neuron1=[4,8,16]
neuron2=[2,4,8]

#Make a dictionary of Grid Search parameters
params_grid =dict(batch_size=batch_size,epochs=epochs,learning_rate=learning_rate,dropout_rate=dropout_rate,activation_function=activation_function
                  , init=init,neuron1=neuron1,neuron2=neuron2)

#Build and fit the GridsearchCV
grid=GridSearchCV(estimator=model,param_grid=params_grid,verbose=11)
grid_result=grid.fit(X,Y)

# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))


In [None]:

def build_model(hp):
    model=keras.Sequential()
    for i in range(hp.Int('num_layers',2,20)):
        model.add(layers.Dense(units=hp.Int('units_'+ str(i), min_value=32,max_value=152,step=32),activation=hp.Choice('activation'+str(i),values=['relu','tanh','sigmoid'])))
        model.add(Dropout(hp.Choice('dropout'+str(i),values=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9])))
        model.add(layers.Dense(1,activation='linear'))
        model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate',[1e-2,1e-3,1e-4])),loss='mean_absolute_error',metrics=['mean_absolute_error'])
    return model

In [None]:
tuner= RandomSearch(build_model,objective='val_mean_absolute_error',max_trials=5,executions_per_trial=3,directory='project',project_name='Forest Fires',overwrite=True)

In [None]:
tuner.search_space_summary()

In [None]:
tuner.search(x_train,y_train,epochs=100,validation_data=(x_test,y_test))

In [None]:
tuner.get_best_hyperparameters()[0].values

In [None]:
model=tuner.get_best_models(num_models=1)[0]

In [None]:
model.fit(x_train,y_train,epochs=100,initial_epoch=6,validation_data=(x_test,y_test))

In [None]:
model.summary()