# Introduction

1. Linear Regression
2. Multiple Regression
3. Polynomial Regression
4. Decision Tree
5. Random Forest

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import operator

from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.ensemble import RandomForestRegressor

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Dataset

## Columns

Each patient is represented in the data set by six biomechanical attributes derived from the shape and orientation of the pelvis and lumbar spine (each one is a column):

* pelvic incidence
* pelvic tilt
* lumbar lordosis angle
* sacral slope
* pelvic radius
* grade of spondylolisthesis


In [None]:
data_2c = pd.read_csv("/kaggle/input/biomechanical-features-of-orthopedic-patients/column_2C_weka.csv")
data_3c = pd.read_csv("/kaggle/input/biomechanical-features-of-orthopedic-patients/column_3C_weka.csv")
data = pd.concat([data_2c , data_3c] , axis=0)
data.head()

## Describe

In [None]:
data.info()

In [None]:
data.describe().T

In [None]:
data["class"].unique()

## Visualize

In [None]:
sns.pairplot(data , hue="class");

In [None]:
sns.heatmap(data.corr(), annot = True, cmap="coolwarm");

## Preprocessing

In [None]:
data.drop(["class" , "pelvic_tilt" , "pelvic_tilt numeric" ], axis=1, inplace=True)
data.head()

# Linear Regression
## Data Processing

In [None]:
X, y = data["sacral_slope"].values.reshape(-1,1), data["pelvic_incidence"].values.reshape(-1,1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

## Create Model & Fit

In [None]:
model_lr = LinearRegression()
model_lr.fit(X_train, y_train)

## Get Results

In [None]:
print("intercept : " , model_lr.intercept_)
print("slope : " , model_lr.coef_)

## Predict

In [None]:
y_pred = model_lr.predict(X_test)
y_pred_df = pd.DataFrame(y_pred, columns=["Predicted Response" ])
y_test_df = pd.DataFrame(y_test, columns=["Real Values"])
pd.concat([y_test_df , y_pred_df] , axis=1)

## Metrics

In [None]:
score = r2_score(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)

print("R2 Score : {}".format(score))
print("MSE : {}".format(MSE))

## Visualize

In [None]:
plt.scatter(X_test, y_test , color="navy")
plt.plot(X_test , y_pred, color="orange")
plt.xlabel("pelvic_incidence")
plt.ylabel("sacral_slope")
plt.show()

# Multiple Linear Regression
# Preprocessing

In [None]:
X, y = data.drop(["pelvic_incidence"] , axis=1) , data["pelvic_incidence"].values.reshape(-1,1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

## Create Model & Fit

In [None]:
model_mlr = LinearRegression()
model_mlr.fit(X_train, y_train)

## Get Results

In [None]:
print("intercept : " , model_mlr.intercept_)
print("slope : " , model_mlr.coef_)

## Predict

In [None]:
y_pred_mlr = model_mlr.predict(X_test)
y_pred_df = pd.DataFrame(y_pred_mlr, columns=["Predicted Response" ])
y_test_df = pd.DataFrame(y_test, columns=["Real Values"])
pd.concat([y_test_df , y_pred_df] , axis=1)

## Metrics

In [None]:
score = r2_score(y_test, y_pred_mlr)
MSE = mean_squared_error(y_test, y_pred_mlr)

print("R2 Score : {}".format(score))
print("MSE : {}".format(MSE))

# Polynomial Regression
## Data Preprocessing

In [None]:
data_2c = pd.read_csv("/kaggle/input/biomechanical-features-of-orthopedic-patients/column_2C_weka.csv")
data_3c = pd.read_csv("/kaggle/input/biomechanical-features-of-orthopedic-patients/column_3C_weka.csv")
data = pd.concat([data_2c , data_3c] , axis=0)
data.drop(["class" , "pelvic_tilt" , "pelvic_tilt numeric" ], axis=1, inplace=True)
data.head()

In [None]:
data = data.sort_values(by=['degree_spondylolisthesis'])

In [None]:
X, y = data["degree_spondylolisthesis"].values.reshape(-1,1), data["pelvic_incidence"].values.reshape(-1,1)

In [None]:
poly = PolynomialFeatures(degree=3)
X_poly = poly.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.20, random_state=0)

## Create Model & Fit

In [None]:
model_poly = LinearRegression()
model_poly.fit(X_train, y_train)

## Get Results

In [None]:
print("intercept : " , model_poly.intercept_)
print("slope : " , model_poly.coef_)

In [None]:
y_pred_poly = model_poly.predict(X_test)
y_pred_df = pd.DataFrame(y_pred_poly, columns=["Predicted Response" ])
y_test_df = pd.DataFrame(y_test, columns=["Real Values"])
pd.concat([y_test_df , y_pred_df] , axis=1)

## Metrics

In [None]:
score = r2_score(y_test, y_pred_poly)
MSE = mean_squared_error(y_test, y_pred_poly)

print("R2 Score : {}".format(score))
print("MSE : {}".format(MSE))

## Visualize

In [None]:
y_pred = model_poly.predict(X_poly)
sorted_zip = sorted(zip(X,y))
X, y = zip(*sorted_zip)

plt.scatter(X, y , color="navy")
plt.plot(X , y_pred, color="orange")
plt.xlabel("pelvic_incidence")
plt.ylabel("sacral_slope")
plt.show()

# Decision Tree
## Preprocessing

In [None]:
X, y = data.drop(["pelvic_incidence"] , axis=1) , data["pelvic_incidence"].values.reshape(-1,1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

## Create Model & Fit

In [None]:
tree = DecisionTreeRegressor()
model_dtree = tree.fit(X_train, y_train)

## Predict

In [None]:
y_pred = model_dtree.predict(X_test)
y_pred_df = pd.DataFrame(y_pred, columns=["Predicted Response" ])
y_test_df = pd.DataFrame(y_test, columns=["Real Values"])
pd.concat([y_test_df , y_pred_df] , axis=1)

## Visualize

In [None]:
fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (17,10) , dpi=500 )
plot_tree(model_dtree);

## Metrics

In [None]:
score = r2_score(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)

print("R2 Score : {}".format(score))
print("MSE : {}".format(MSE))

# Random Forest
## Preprocessing

In [None]:
X, y = data.drop(["pelvic_incidence"] , axis=1) , data["pelvic_incidence"].values.reshape(-1,1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

## Create Model & Fit

In [None]:
model_rf = RandomForestRegressor(n_estimators=100)
model_rf.fit(X_train, y_train)

## Predict

In [None]:
y_pred = model_rf.predict(X_test)
y_pred_df = pd.DataFrame(y_pred, columns=["Predicted Response" ])
y_test_df = pd.DataFrame(y_test, columns=["Real Values"])
pd.concat([y_test_df , y_pred_df] , axis=1)

## Metrics

In [None]:
score = r2_score(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)

print("R2 Score : {}".format(score))
print("MSE : {}".format(MSE))