# Random Forest Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('500_Person_Gender_Height_Weight_Index.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

### Encoding the Independent Variable

In [4]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder="passthrough")
X_train = np.array(ct.fit_transform(X_train))

In [5]:
X_train = X_train[:,1:]

In [6]:
X_train

array([[0.0, 174, 65],
       [1.0, 188, 65],
       [1.0, 183, 131],
       ...,
       [1.0, 187, 96],
       [1.0, 170, 81],
       [1.0, 144, 80]], dtype=object)

## Training the Random Forest Regression model on the whole dataset

In [7]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 100)
regressor.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

# Predicting the x_test

In [8]:
y_test_pred = regressor.predict(ct.fit_transform(X_test)[:,1:])

# Accurecy

In [9]:
import sklearn.metrics as sm
print("Mean absolute error =", round(sm.mean_absolute_error(y_test, y_test_pred), 2)) 
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_test_pred), 2)) 
print("Median absolute error =", round(sm.median_absolute_error(y_test, y_test_pred), 2)) 
print("Explain variance score =", round(sm.explained_variance_score(y_test, y_test_pred), 2)) 
print("R2 score =", round(sm.r2_score(y_test, y_test_pred), 2))

Mean absolute error = 0.18
Mean squared error = 0.09
Median absolute error = 0.03
Explain variance score = 0.95
R2 score = 0.95


## Predicting a new result

In [10]:
regressor.predict(ct.transform([["Male",163,89]])[:,1:])

array([3.97])

# Conclusion


> In this model I have used data of 500 person Gender, Height, Weight and Index

> data terms:

1. Gender : Male / Female
2. Height: Number (cm)
3. Weight: Number (Kg)
4. Index :
  * 0 - Extremely Weak
  * 1 - Weak
  * 2 - Normal
  * 3 - Overweight
  * 4 - Obesity
  * 5 - Extreme Obesity

> Conclusion:
Random Forest Regression model is one of the perfect model to predict the index based on the gender's height and weight
<br>
As model gives the 95% accuracy
