<a href="https://colab.research.google.com/github/mmayeedsa/RegressionModels/blob/main/regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Random Forest Regression

## Importing the libraries

In [10]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score


## Importing the dataset

In [11]:
from google.colab import drive
drive.mount('/content/drive')

# Reload the Excel file
file_path = '/content/drive/My Drive/MLAZ/Regression/Data.csv'
dataset = pd.read_csv(file_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
#dataset = pd.read_csv('ENTER_THE_NAME_OF_YOUR_DATASET_HERE.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [20]:
X.shape

(9568, 4)

In [21]:
y.shape

(9568,)

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Random Forest Regression model on the whole dataset

In [14]:
rfregressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
rfregressor.fit(X_train, y_train)

In [15]:
dtregressor = DecisionTreeRegressor(random_state = 0)
dtregressor.fit(X_train, y_train)

In [16]:
lregressor = LinearRegression()
lregressor.fit(X_train, y_train)

In [25]:
lasregressor = Lasso()
lasregressor.fit(X_train, y_train)

In [26]:
listregressors = [rfregressor, dtregressor, lregressor, lasregressor]

diry_pred = {}
dirr2_score = {}

for regressor in listregressors:
  y_pred = regressor.predict(X_test)
  diry_pred[regressor] = y_pred
  dirr2_score[regressor] = r2_score(y_test, y_pred)

In [27]:
print(dirr2_score)

{RandomForestRegressor(n_estimators=10, random_state=0): 0.9615908334363876, DecisionTreeRegressor(random_state=0): 0.922905874177941, LinearRegression(): 0.9325315554761303, Lasso(): 0.9322987861931235}


In [28]:
def find_best_model_using_gridsearchcv(X,y):
    algos = {
        'linear_regression' : {
            'model': LinearRegression(),
            'params': {
            }
        },
        'lasso': {
            'model': Lasso(),
            'params': {
                'alpha': [1,2],
                'selection': ['random', 'cyclic']
            }
        },
        'decision_tree': {
            'model': DecisionTreeRegressor(),
            'params': {
                'splitter': ['best','random']
            }
        },
        'random_forest': {
            'model': RandomForestRegressor(),
            'params': {
                'n_estimators': [1,5,10]
            }
        }
    }
    scores = []
    cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
    for algo_name, config in algos.items():
        gs =  GridSearchCV(config['model'], config['params'], cv=cv, return_train_score=False)
        gs.fit(X,y)
        scores.append({
            'model': algo_name,
            'best_score': gs.best_score_,
            'best_params': gs.best_params_
        })

    return pd.DataFrame(scores,columns=['model','best_score','best_params'])

find_best_model_using_gridsearchcv(X,y)

Unnamed: 0,model,best_score,best_params
0,linear_regression,0.929125,{}
1,lasso,0.928977,"{'alpha': 1, 'selection': 'cyclic'}"
2,decision_tree,0.926142,{'splitter': 'best'}
3,random_forest,0.957924,{'n_estimators': 10}





Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

