# In this notebook this points will be covered:

#### - Quick Visualization
#### - Robust Regression
#### - Ridge Regression
#### - Lasso Regression
#### - ElasticNet regression

## - Importing Libraries and Data:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
df_fire= pd.read_csv('../input/fires-from-space-australia-and-new-zeland/fire_archive_M6_96619.csv')
df_fire

In [None]:
df_fire.info()

## - Data preprations :

In [None]:
df_fire=df_fire.drop(['acq_date','acq_time','satellite','instrument','version','type'],axis=1)

In [None]:
daynight = pd.get_dummies(df_fire['daynight'],drop_first=True)
df_fire.drop(['daynight'],axis=1,inplace=True)
df_fire = pd.concat([df_fire,daynight],axis=1)


#### View null values in the heatmab bellow:

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap( df_fire.isnull() , yticklabels=False ,cbar=False )

#### To show the correlation between variables

In [None]:
figure= plt.figure(figsize=(10,10))
sns.heatmap(df_fire.corr(), annot=True)


#### Qiuck visualization on whole dataset

In [None]:
sns.pairplot(df_fire)

## - Train Test Split

In [None]:
X= df_fire.drop('frp',axis=1)
y=df_fire['frp']

In [None]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X ,y , test_size=0.4, random_state=108)

## - Create Data Frame to cotain regression Evaluation:

In [None]:
from sklearn import metrics
from sklearn.model_selection import cross_val_score

results_df = pd.DataFrame()
columns = ["Model", "Cross Val Score", "MAE", "MSE", "RMSE", "R2"]

def evaluate(true, predicted):
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(metrics.mean_squared_error(true, predicted))
    r2_square = metrics.r2_score(true, predicted)
    return mae, mse, rmse, r2_square

def append_results(model_name, model, results_df, y_test, pred):
    results_append_df = pd.DataFrame(data=[[model_name, *evaluate(y_test, pred) , cross_val_score(model, X, y, cv=10).mean()]], columns=columns)
    results_df = results_df.append(results_append_df, ignore_index = True)
    return results_df

# - Regression model Techniques:

## 1- Robust Regression :
#### It's interactive method for an observed data that has outliers _outliers influance the values of estimates_

In [None]:
from sklearn.linear_model import RANSACRegressor
ransacReg= RANSACRegressor()
ransacReg.fit(X_train,y_train)
pred= ransacReg.predict(X_test)


In [None]:
results_df= append_results("Robust Regression",RANSACRegressor(),results_df,y_test,pred)
results_df

In [None]:
figure= plt.figure(figsize=(10,10))
sns.distplot((y_test,pred))
#To see the distribution between predection and acual value, if it normally distributed it means that model is correct

## 2- Ridge Regression:
#### The ridge coefficients minimize apenalized residue of sum of squares (For multi functions data)

In [None]:
from sklearn.linear_model import Ridge
RidgeReg= Ridge()
RidgeReg.fit(X_train,y_train)
pred= RidgeReg.predict(X_test)


In [None]:
results_df= append_results("Ridge Regression",Ridge(),results_df,y_test,pred)
results_df

In [None]:
figure= plt.figure(figsize=(10,10))
sns.distplot((y_test,pred))

## 3- Lasso Regression:
#### It use shrinkage "like mean", it's perfect for models that have high level of multicollinearity

In [None]:
from sklearn.linear_model import Lasso
LassoReg= Lasso()
LassoReg.fit(X_train,y_train)
pred= LassoReg.predict(X_test)


In [None]:
results_df= append_results("Lasso Regression",Lasso(),results_df,y_test,pred)
results_df

In [None]:
figure= plt.figure(figsize=(10,10))
sns.distplot((y_test,pred))

## 4- ElasticNet regression
#### It's simply compine between Laso and Ridge regression

In [None]:
from sklearn.linear_model import ElasticNet
ElasticNetReg= ElasticNet()
ElasticNetReg.fit(X_train,y_train)
pred= ElasticNetReg.predict(X_test)


In [None]:
results_df= append_results("ElasticNet Regression",ElasticNet(),results_df,y_test,pred)
results_df

In [None]:
figure= plt.figure(figsize=(10,10))
sns.distplot((y_test,pred))

In [None]:
results_df.to_csv('resultsEval.csv')

# We see that Robust Regression has the best evaluation cause it deals with the outliers in the dataset.

### Source : https://www.kaggle.com/aadhavvignesh/regression-with-scikit-learn-practical-ml-1
