In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import r2_score, classification_report

%matplotlib inline

In [None]:
df = pd.read_csv("bodyfat.csv")

In [4]:
X = df[["Density", "Weight", "Chest", "Abdomen", "Hip"]]
y = df["BodyFat"]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4242)

# Decision Tree

In [10]:
dtr = DecisionTreeRegressor()
dtr.fit(X_train, y_train)
y_pred = dtr.predict(X_test)
print("Decision Tree: ", r2_score(y_test, y_pred))

Decision Tree:  0.9813308342739095


In [20]:
dtr_params = {
    "splitter": ["best", "random"],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [2, 5, 10],
    "max_features": ["sqrt"],
    "ccp_alpha": [1, 2, 2.5, 5]
}

In [21]:
dtr = DecisionTreeRegressor()
dtr_cv_model = GridSearchCV(dtr, dtr_params, cv=10, n_jobs=-1).fit(X_train, y_train)
dtr_cv_model.best_params_

{'ccp_alpha': 1,
 'max_features': 'sqrt',
 'min_samples_leaf': 10,
 'min_samples_split': 10,
 'splitter': 'best'}

In [23]:
dtr_tuned = DecisionTreeRegressor(ccp_alpha=1, max_features="sqrt", min_samples_leaf=10, min_samples_split=10, splitter="best").fit(X_train, y_train)
y_pred = dtr_tuned.predict(X_test)
print("Decision Tree: ", r2_score(y_test, y_pred))

Decision Tree:  0.859586077770098


# Random Forest

In [13]:
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print("Random Forest: ", r2_score(y_test, y_pred))

Random Forest:  0.9762476270256676


In [37]:
rf_params = {
    "n_estimators": [100, 250, 500,],
    "max_depth": [1, 15, 30],
    "min_samples_split": [2, 5],
    "min_samples_leaf": [2, 5],
    "max_features": ["sqrt"],
    "ccp_alpha": [1, 2, 2.5]
}

In [38]:
rf = RandomForestRegressor()
rf_cv_model = GridSearchCV(rf, rf_params, cv=10, n_jobs=-1).fit(X_train, y_train)
rf_cv_model.best_params_

{'ccp_alpha': 1,
 'max_depth': 15,
 'max_features': 'sqrt',
 'min_samples_leaf': 2,
 'min_samples_split': 5,
 'n_estimators': 500}

In [39]:
rf_tuned = RandomForestRegressor(ccp_alpha=1, max_depth=15, max_features="sqrt", min_samples_leaf=2, min_samples_split=5, n_estimators=500).fit(X_train, y_train)
y_pred = rf_tuned.predict(X_test)
print("Random Forest: ", r2_score(y_test, y_pred))

Random Forest:  0.8816160163239681


# SVR

In [14]:
svr = SVR()
svr.fit(X_train, y_train)
y_pred = svr.predict(X_test)
print("SVR: ", r2_score(y_test, y_pred))

SVR:  0.353818358958947


In [41]:
svr_params = {
    "C": [0.25, 0.50, 0.75, 0.1],
    "tol": [1e-10, 1e-5],
    "kernel": ["linear", "sigmoid"],
    "max_iter": [1, 100, 250]
}

In [43]:
svr = SVR()
svr_cr_model = GridSearchCV(svr, svr_params, cv=10, n_jobs=-1).fit(X_train, y_train)
svr_cr_model.best_params_



{'C': 0.1, 'kernel': 'linear', 'max_iter': 250, 'tol': 1e-10}

In [45]:
svr_tuned = SVR(C=0.1, kernel="linear", max_iter=250, tol=1e-10).fit(X_train, y_train)
y_pred = svr_tuned.predict(X_test)
print("SVR: ", r2_score(y_test, y_pred))

SVR:  0.7063420527711204




In [49]:
pickle.dump(rf_tuned, open("RandomForest.pkl", "wb"))