In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, KFold, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, LogisticRegression
from imblearn.over_sampling import SMOTE

In [27]:
df = pd.read_csv('E:\CCBST\Projects\AlgerianFireForest\Dataset\Algerian_forest_fires.csv')

In [28]:
# Fix Classes column by removing extra spaces from values

df['Classes'] = df['Classes'].apply(lambda x: x.strip())

In [29]:
# Convert Datatype of month column to Object type

df['month'].astype(str)

0      6
1      6
2      6
3      6
4      6
      ..
238    9
239    9
240    9
241    9
242    9
Name: month, Length: 243, dtype: object

In [30]:
# Replace Numerical values of month Column to String values

df['month'] = df['month'].replace({6: 'June', 7: 'July', 8: 'August', 9: 'September'})

In [31]:
df.columns

Index(['day', 'month', 'year', 'Temperature', 'RH', 'Ws', 'Rain', 'FFMC',
       'DMC', 'DC', 'ISI', 'BUI', 'FWI', 'Classes', 'Region'],
      dtype='object')

In [32]:
# Drop unnecessary columns

df.drop(df[['day', 'year', 'DC', 'BUI']], axis = 1, inplace=True)

In [33]:
# Rename specific columns for better understanding

df.rename(columns={
    'month': 'Months',
    'RH': 'Relative_Humidity',
    'Ws': 'Wind_speed',
    'FFMC': 'Fine_Fuel_Moisture_Code',
    'DMC': 'Duff_Moisture_Code',
    'ISI': 'Initial_Spread_IDX',
    'FWI': 'Fire_Weather_IDX',
    # add other column renames as needed
}, inplace=True)

In [34]:
# Check new Columns Names

df.columns

Index(['Months', 'Temperature', 'Relative_Humidity', 'Wind_speed', 'Rain',
       'Fine_Fuel_Moisture_Code', 'Duff_Moisture_Code', 'Initial_Spread_IDX',
       'Fire_Weather_IDX', 'Classes', 'Region'],
      dtype='object')

**Label Encoding**

In [35]:
encoder = LabelEncoder()
df['Classes'] = encoder.fit_transform(df['Classes'])

In [36]:
# Split into input & target variables

X = df.drop('Fire_Weather_IDX', axis=1)
y = df['Fire_Weather_IDX']

In [37]:
# Split Numerical & Category Features

num_col = X.drop(['Classes', 'Region', 'Months'], axis=1)
cat_col = X[['Classes', 'Region']]

In [38]:
# Scalar Standardization

sc = StandardScaler()
df_sc = sc.fit_transform(num_col)
df_sc_1 = pd.DataFrame(df_sc, columns = num_col.columns)

In [39]:
# Concatenate Scaled df & category features

final_df = pd.concat([df_sc_1, cat_col], axis = 1)
final_df

Unnamed: 0,Temperature,Relative_Humidity,Wind_speed,Rain,Fine_Fuel_Moisture_Code,Duff_Moisture_Code,Initial_Spread_IDX,Classes,Region
0,-0.870655,-0.340673,0.893277,-0.381657,-0.847927,-0.912120,-0.830356,1,0
1,-0.870655,-0.070359,-0.888877,0.268642,-0.938709,-0.855520,-0.902720,1,0
2,-1.699254,1.348788,2.319000,6.171355,-2.146802,-0.984891,-1.071571,1,0
3,-1.975454,1.821837,-0.888877,0.868918,-3.438694,-1.081920,-1.143935,1,0
4,-1.423054,1.010896,0.180416,-0.381657,-0.910776,-0.944463,-0.854477,1,0
...,...,...,...,...,...,...,...,...,...
238,-0.594455,0.199954,-0.532446,-0.381657,0.527763,0.106678,-0.058467,0,1
239,-1.146855,1.686680,-0.176015,1.819355,-2.565794,-0.661463,-1.119814,1,1
240,-1.423054,1.686680,4.814014,-0.131542,-2.230600,-0.904034,-1.047449,1,1
241,-2.251654,-0.543409,0.893277,-0.331634,0.129721,-0.839349,-0.733870,1,1


In [40]:
X_train, X_test, y_train, y_test = train_test_split(final_df, y, test_size = 0.25, random_state=42)

## **1. Linear Regression**

In [41]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [42]:
y_pred_test = lr.predict(X_test)
y_pred_train = lr.predict(X_train)

In [43]:
lr.coef_, lr.intercept_

(array([-0.03560044, -0.17236446,  0.0074911 , -0.03842353, -0.82331222,
         3.62549623,  4.64564592, -0.81242559, -0.78524598]),
 7.798603449809412)

## ***Ridge Model***

In [None]:
%%time
rd = Ridge()
rd.fit(X_train, y_train)
y_pred_test_rd = rd.predict(X_test)
y_pred_train_rd = rd.predict(X_train)

print("Ridge Metrics on Test Data:\n")
print('R2 Score: ', r2_score(y_test, y_pred_test_rd))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred_test_rd))
print('Root Mean Squared Error: ', np.sqrt(mean_squared_error(y_test, y_pred_test_rd)))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred_test_rd))

# evaluate the models using metrics on train data
print("\nRidge Metrics on Train Data:\n")
print('R2 Score_train: ', r2_score(y_train, y_pred_train_rd))
print('Mean Squared Error_train: ', mean_squared_error(y_train, y_pred_train_rd))
print('Root Mean Squared Error_train: ', np.sqrt(mean_squared_error(y_train, y_pred_train_rd)))
print('Mean Absolute Error_train: ', mean_absolute_error(y_train, y_pred_train_rd))
print("\n Coefficients are as:\n")

for i, col in enumerate(X_train.columns):
    print('The coefficient for {} is {}'.format(col, rd.coef_[i]))

## ***Lasso Model***

In [None]:
%%time
ls=Lasso(alpha=1)
ls.fit(X_train, y_train)
y_pred_test_ls = ls.predict(X_test)
y_pred_train_ls = ls.predict(X_train)

# evaluate the models using metrics on test data
print("Lasso Metrics on Test Data:\n")
print('R2 Score: ', r2_score(y_test, y_pred_test_ls))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred_test_ls))
print('Root Mean Squared Error: ', np.sqrt(mean_squared_error(y_test, y_pred_test_ls)))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred_test_ls))

# evaluate the models using metrics on train data
print("\nLasso Metrics on Train Data:\n")
print('R2 Score_train: ', r2_score(y_train, y_pred_train_ls))
print('Mean Squared Error_train: ', mean_squared_error(y_train, y_pred_train_ls))
print('Root Mean Squared Error_train: ', np.sqrt(mean_squared_error(y_train, y_pred_train_ls)))
print('Mean Absolute Error_train: ', mean_absolute_error(y_train, y_pred_train_ls))
print("\n Coefficients are as:\n")

for i, col in enumerate(X_train.columns):
    print('The coefficient for {} is {}'.format(col, ls.coef_[i]))

## ***ElasticNet Model***

In [None]:
en=ElasticNet(alpha=1)
en.fit(X_train, y_train)
y_pred_test_en = en.predict(X_test)
y_pred_train_en = en.predict(X_train)

# evaluate the models using metrics on test data
print("ElassticNet Metrics on Test Data:\n")
print('R2 Score: ', r2_score(y_test, y_pred_test_en))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred_test_en))
print('Root Mean Squared Error: ', np.sqrt(mean_squared_error(y_test, y_pred_test_en)))
print('Mean Absolute Error: ', mean_absolute_error(y_test, y_pred_test_en))

# evaluate the models using metrics on train data
print("\nElassticNet Metrics on Train Data:\n")
print('R2 Score_train: ', r2_score(y_train, y_pred_train_en))
print('Mean Squared Error_train: ', mean_squared_error(y_train, y_pred_train_en))
print('Root Mean Squared Error_train: ', np.sqrt(mean_squared_error(y_train, y_pred_train_en)))
print('Mean Absolute Error_train: ', mean_absolute_error(y_train, y_pred_train_en))
print("\n Coefficients are as:\n")

for i, col in enumerate(X_train.columns):
    print('The coefficient for {} is {}'.format(col, en.coef_[i]))

In [47]:
import pickle

In [50]:
pickle.dump(rd,open('ridge_model.pkl', 'wb' ))