In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline

In [None]:
df= pd.read_csv('Algerian_cleaned.csv')

In [None]:
df.head()

In [None]:
df.columns

In [5]:
#drop 'day', 'month', 'year' 
df.drop(['day', 'month', 'year'],axis=1,inplace=True)

In [None]:
df.head()

In [None]:
df['Classes'].value_counts()

In [8]:
# Encoding
df['Classes'] = np.where(df['Classes'].str.contains("not fire"),0,1)

In [None]:
df.head()

In [None]:
df['Classes'].value_counts()

In [11]:
## independent and dependent feature
X = df.drop('FWI',axis=1)
y = df["FWI"]

In [None]:
X

In [None]:
y

In [14]:
## Train Test Split 
from sklearn.model_selection import train_test_split

In [15]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [None]:
X_train.shape ,X_test.shape

In [None]:
## feature selection based on correletion 
X_train.corr()

In [None]:
## Check for multicolinearity
plt.figure(figsize=(12,10))
sns.heatmap(X_train.corr(),annot=True)

In [19]:
def corelation(dataset,threshold):
    col_corr = set()
    cor_matrix = dataset.corr()
    for i in range(len(cor_matrix.columns)):
        for j in range(i):
            if abs(cor_matrix.iloc[i,j]) > threshold:
                colname = cor_matrix.columns[i]
                col_corr.add(colname)
    return col_corr

In [20]:
corr_features = corelation(X_train,0.85)

In [21]:
X_train.drop(corr_features,axis=1,inplace=True)
X_test.drop(corr_features,axis=1,inplace=True)

In [None]:
X_train.shape , X_test.shape

## Feature Scaling or Standardization

In [24]:
from sklearn.preprocessing import StandardScaler

In [25]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train_scaled

## Box Plots to understand effect of outliers 

In [None]:
plt.figure(figsize =(15,5))
plt.subplot(1, 2, 1)
sns.boxplot(data=X_train)
plt.title("Before Scaling")
plt.subplot(1, 2, 2)
sns.boxplot(data=X_train_scaled)
plt.title("After Scaling")

# Linear Regression Model

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
linreg = LinearRegression()
linreg.fit(X_train_scaled,y_train)
y_pred =linreg.predict(X_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)
print("Mean Absolute Error",mae)
print("R2 Score",score)
plt.scatter(y_test,y_pred)

## Lasso Regression

In [None]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
lasso = Lasso()
lasso.fit(X_train_scaled,y_train)
y_pred =lasso.predict(X_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)
print("Mean Absolute Error",mae)
print("R2 Score",score)
plt.scatter(y_test,y_pred)

## Ridge Regression Model

In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
rig = Ridge()
rig.fit(X_train_scaled,y_train)
y_pred =rig.predict(X_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)
print("Mean Absolute Error",mae)
print("R2 Score",score)
plt.scatter(y_test,y_pred)

In [None]:
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
EL = LinearRegression()
EL.fit(X_train_scaled,y_train)
y_pred =EL.predict(X_test_scaled)
mae = mean_absolute_error(y_test,y_pred)
score = r2_score(y_test,y_pred)
print("Mean Absolute Error",mae)
print("R2 Score",score)
plt.scatter(y_test,y_pred)

In [90]:
import pickle
pickle.dump(scaler,open("scaler.pkl","wb"))
pickle.dump(rig,open("rig.pkl","wb"))