## Algerian_Model_Training

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
#load clean dataset
df = pd.read_csv("Algerian_Forest_firesdataset_Cleaned.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
#drop month day and year
df.drop(['day','month','year'],axis = 1,inplace = True)
df.head()

In [None]:
df["Classes"].value_counts()

In [None]:
#modify the other 'not fire' and use only fire and 'not fire'
df['Classes'] = np.where(df['Classes'].str.contains('not fire'),0,1)
df.head()

In [None]:
df['Classes'].value_counts()

In [None]:
#independent features and dependent features
X = df.drop('FWI',axis= 1)
y = df['FWI']

In [None]:
#train test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)

In [None]:
X_train.shape,X_test.shape

In [None]:
X_train.corr()

## 1. Feature selection based on Correlation

Remove - highly positively correlated Important = highly negatively correlated

#### Check for multicolinearity

In [None]:
plt.figure(figsize=(12,10))
corr = X_train.corr()
sns.heatmap(corr,annot = True)

In [None]:
print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)
print("y_train shape: ", y_train.shape)
print("y_test shape: ", y_test.shape)

In [None]:
#function to remove highly positively correlated features
def correlation(dataset, threshold):
    col_corr = set()  # Set of all the names of correlated columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value
                colname = corr_matrix.columns[i]  # getting the name of column
                col_corr.add(colname)
    return col_corr

In [None]:
## threshold -- Domain expertise... here we are giving 85% correlation
corr_featuers = correlation(X_train, 0.85) #greater than 85%

corr_featuers

In [None]:
## drop the highly correlated features (more than 85%
X_train.drop(corr_featuers,axis=1,inplace=True)
X_test.drop(corr_featuers,axis=1,inplace=True)
X_train.shape,X_test.shape

#### Feature Scaling or Standardisation

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train_scaled

#### Box plot to understand effect of standard scaler

In [None]:
#plotbox plot
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
sns.boxplot(data=X_train)
plt.xticks(rotation = 90)
plt.title ('Before scaling')

plt.subplot(1,2,2)
sns.boxplot(data=X_train_scaled)
plt.xticks(rotation = 90)
plt.title ('After scaling')

## ML Models

### 1. Linear Regression Model

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
linreg = LinearRegression()
linreg.fit(X_train_scaled,y_train)
y_pred = linreg.predict(X_test_scaled)
mae= mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)
print("Mean absolute error",mae)
print("R2 score",score)
plt.scatter(y_test,y_pred)

### 2. Lasso Regerssion Model

In [None]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

lasso = Lasso()
lasso.fit(X_train_scaled,y_train)
y_pred = lasso.predict(X_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)
print("Mean absolute error",mae)
print("R2 score",score)
plt.scatter(y_test,y_pred)

### 3. Cross Validation Lasso

In [None]:
from sklearn.linear_model import LassoCV
lassocv = LassoCV(cv=5)
lassocv.fit(X_train_scaled,y_train)

In [None]:
lassocv.alpha_

In [None]:
#what all the alphas that it tried
lassocv.alphas_

In [None]:
lassocv.mse_path_

In [None]:
y_pred = lassocv.predict(X_test_scaled)
plt.scatter(y_test,y_pred)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)
print("Mean absolute eror",mae)
print("R2 Score",score)

### 4. Ridge Regression

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge()
ridge.fit(X_train_scaled,y_train)
y_pred = ridge.predict(X_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)
print("Mean absolute eror",mae)
print("R2 Score",score)
plt.scatter(y_test,y_pred)

### 5. Ridge Regression CV

In [None]:
from sklearn.linear_model import RidgeCV
ridgecv = RidgeCV(cv=5)
ridgecv.fit(X_train_scaled,y_train)
y_pred = ridgecv.predict(X_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)
print("Mean absolute eror",mae)
print("R2 Score",score)

In [None]:
ridgecv.get_params()

### 6. ElasticNet

In [None]:
from sklearn.linear_model import ElasticNet
elasticNet = ElasticNet()
elasticNet.fit(X_train_scaled, y_train)
y_pred = elasticNet.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Absolute Error: ", mae)
print("R2 Score: ", r2)
plt.scatter(y_test, y_pred)

### 7. ElasticNet CV

In [None]:
from sklearn.linear_model import ElasticNetCV
elasticNetCV = ElasticNetCV(cv=5)
elasticNetCV.fit(X_train_scaled, y_train)
plt.scatter(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error: ", mae)
print("R2 Score: ", r2)

#### Model Pickling

In [None]:
import pickle
pickle.dump(scaler, open('scaler.pkl', 'wb'))
pickle.dump(ridge, open('ridge.pkl', 'wb'))

## Summary of Linear Regression Model Selection

Based on the evaluation of various linear regression models, **Ridge Regression emerges as a strong and likely the right choice** for this prediction task.

**Performance Comparison:**

| Model             | Mean Absolute Error (MAE) | R2 Score | Regularization | Feature Selection |
|-------------------|---------------------------|----------|----------------|-------------------|
| Linear Regression | 0.5648                    | 0.9847   | None           | No                |
| Lasso             | 1.1203                    | 0.9511   | L1             | Yes (potential)   |
| LassoCV           | 0.6356                    | 0.9822   | L1 (CV Tuned)  | Yes (potential)   |
| **Ridge** | **0.5845** | **0.9842** | **L2** | **No** |
| RidgeCV           | 0.7943                    | 0.9762   | L2 (CV Tuned)  | No                |
| Elastic Net       | 1.8555                    | 0.8804   | L1 & L2        | Yes (potential)   |
| ElasticNetCV      | 1.8555 (Incorrect Eval)   | 0.8804 (Incorrect Eval) | L1 & L2 (CV Tuned)| Yes (potential)   |

**Key Observations:**

* **Ridge Regression** achieved an R2 score (0.9842) and MAE (0.5845) very close to the best-performing Linear Regression (R2: 0.9847, MAE: 0.5648).
* **Regularization Benefit:** Ridge's L2 regularization offers potential benefits in handling multicollinearity and preventing overfitting without significantly sacrificing predictive performance compared to simple Linear Regression.
* **Lasso and Elastic Net:** While these models offer feature selection capabilities, their performance with default parameters was generally lower. LassoCV showed improvement but didn't surpass Ridge. The ElasticNetCV evaluation was flawed and requires correction.
* **RidgeCV:** Cross-validation for Ridge with default alpha values did not improve upon the performance of the simple Ridge model.

**Conclusion:**

Given the high R2 score and low MAE, coupled with the benefits of L2 regularization for robustness, **Ridge Regression appears to be the most suitable choice among the tested models for this prediction task.** While simple Linear Regression performed slightly better on the test set, Ridge offers a good balance of predictive accuracy and potential for better generalization due to regularization. Further hyperparameter tuning of Ridge could potentially yield even better results. The primary goal of the modeling and the importance of coefficient interpretability should also be considered in the final decision.