# Random Forest Regression

## Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [None]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Random Forest Regression model on the whole dataset

In [None]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

## Evaluating the Model Performance

In [None]:
from sklearn.metrics import r2_score
print('R squared score : {:2f}'.format(r2_score(y_test, y_pred)*100))

## Applying K-Fold Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score
accuracy = cross_val_score(estimator=regressor, X=X_train, y=y_train, cv=10)
print('Accuracy : {:2f}'.format(accuracy.mean()*100))
print('Standard Deviation : {:2f}'.format(accuracy.std()*100))

In [None]:
accuracy = cross_val_score(estimator=regressor, X=X_test, y=y_test, cv=10)
print('Accuracy : {:2f}'.format(accuracy.mean()*100))
print('Standard Deviation : {:2f}'.format(accuracy.std()*100))

## Grid Selection

In [9]:
from types import coroutine
from sklearn.model_selection import GridSearchCV
parameters = [{'max_depth': [i for i in range(32)],
                'min_samples_split': np.linspace(0.1, 1.0, 10, endpoint=True), 
                'min_samples_leaf' : np.linspace(0.1, 0.5, 5, endpoint=True),
                'max_features' : list(range(1,X_train.shape[1])),
                'n_estimators' : [1, 2, 4, 8, 16, 32, 64, 100, 200]}]
grid_search = GridSearchCV(estimator=regressor,
                            param_grid=parameters,
                            scoring='accuracy',
                            cv=10,
                            n_jobs=-1)
grid_search.fit(X_train, y_train.ravel())
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print('Accuracy : ', best_accuracy*100)
print('Parameters : ', best_parameters)

KeyboardInterrupt: 