# Introduction

1. [Load and Check Data](#1)
1. [Linear Regression](#2)
1. [Multiple Linear Regression](#3)
1. [Polynomial Regression](#4)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.ensemble import RandomForestRegressor

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<a id = '1'></a><br>
## Load and Check Data

In [None]:
data = pd.read_csv("/kaggle/input/biomechanical-features-of-orthopedic-patients/column_2C_weka.csv")

In [None]:
data

In [None]:
data.info()

pd.plotting.scatter_matrix:
* red: normal and blue: abnormal
* c: color
* figsize: figure size
* diagonal: histohram of each features
* alpha: opacity
* s: size of marker
* marker: marker type

In [None]:
list = ['blue' if i=='Abnormal' else 'red' for i in data.loc[:,'class']]
pd.plotting.scatter_matrix(data.loc[:,data.columns != 'class'],
                          c=list,
                          figsize=[15,15],
                          diagonal='hist',
                          alpha=0.5,
                          s=200,
                          marker='*',
                          edgecolor='black')
plt.show()

In [None]:
data.corr()

<a id = '2'></a><br>
## Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

linear_regression = LinearRegression()

x = data["lumbar_lordosis_angle"].values.reshape(-1,1)
y = data.sacral_slope.values.reshape(-1,1)

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size=0.2, random_state=40)

## Fit

In [None]:
linear_regression.fit(x_train,y_train)

In [None]:
b0 = linear_regression.intercept_
b1 = linear_regression.coef_
print("intercept: ",b0)
print("coef: ",b1)

## Predict

In [None]:
y_head = linear_regression.predict(x_test)
y_head_df = pd.DataFrame(y_head, columns=["Predicted Response"])
y_test_df = pd.DataFrame(y_test, columns=["Reals Values"])

pd.concat([y_head_df,y_test_df],axis=1)

## Visualize

In [None]:
plt.scatter(x_test,y_test,color="red")
plt.plot(x_test,y_head, color="orange")
plt.xlabel("lumbar_lordosis_angle")
plt.ylabel("sacral_slope")
plt.show()

<a id = '3'></a><br>
# Multiple Linear Regression

In [None]:

x=data.iloc[:,[0,1,3,4,5]].values
y=data.lumbar_lordosis_angle.values.reshape(-1,1)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=0)

## Fit

In [None]:
multiple_lr = LinearRegression()
multiple_lr.fit(x_train, y_train)

In [None]:
print("intercept: ",multiple_lr.intercept_)
print("coef: ",multiple_lr.coef_)

## Predict

In [None]:
y_head = multiple_lr.predict(x_test)
y_head_df = pd.DataFrame(y_head,columns=["Predicted Response"])
y_test_df = pd.DataFrame(y_test,columns=["Real Values"])
pd.concat([y_head_df,y_test_df],axis=1)

In [None]:
score = r2_score(y_test,y_head)
MSE = mean_squared_error(y_test,y_head)

print("R^2 Score: ",score)
print("MSE: ",MSE)

<a id = '4'></a><br>
# Polynomial Regression

In [None]:
x = data["lumbar_lordosis_angle"].values.reshape(-1,1)
y = data.sacral_slope.values.reshape(-1,1)

In [None]:
poly = PolynomialFeatures(degree=6)
x_poly = poly.fit_transform(x)

In [None]:
x_train, x_test, y_train,y_test = train_test_split(x_poly,y,test_size=0.2,random_state=10)

## Fit

In [None]:
poly_reg = LinearRegression()
poly_reg.fit(x_train,y_train)

## Get Results

In [None]:
print("intercept: ",poly_reg.intercept_)
print("coef: ",poly_reg.coef_)

In [None]:
y_head = poly_reg.predict(x_test)
y_head_df = pd.DataFrame(y_head,columns=["Predicted Response"])
y_test_df = pd.DataFrame(y_test,columns=["Real Values"])
pd.concat([y_head_df,y_test_df],axis=1)

In [None]:
score = r2_score(y_test,y_head)
MSE = mean_squared_error(y_test,y_head)

print("R2 Score : {}".format(score))
print("MSE : {}".format(MSE))

## Visualize

In [None]:
y_head = poly_reg.predict(x_poly)
sorted_zip = sorted(zip(x,y))
x,y = zip(*sorted_zip)

plt.scatter(x,y, color="blue")
plt.plot(x,y_head,color="red")
plt.xlabel("lumbar_lordosis_angle")
plt.ylabel("sacral_slope")
plt.show()

# Decision Tree

In [None]:
data

In [None]:
x = data.drop(["lumbar_lordosis_angle","class"],axis=1)
#x=data.iloc[:,[0,1,3,4,5]].values

y = data["lumbar_lordosis_angle"].values.reshape(-1,1)

In [None]:
x_train,x_test,y_train,t_test = train_test_split(x,y,test_size=0.2,random_state=20)

## Fit

In [None]:
tree = DecisionTreeRegressor()
dtree = tree.fit(x_train,y_train)

In [None]:
y_head = dtree.predict(x_test)
y_head_df = pd.DataFrame(y_head, columns=["Predicted Response"])
y_test_df = pd.DataFrame(y_test, columns=["Real Values"])
pd.concat([y_head_df,y_test_df],axis=1)

## Visualize

In [None]:
fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (17,10) , dpi=500 )
plot_tree(dtree);


In [None]:
score = r2_score(y_test,y_head)
MSE = mean_squared_error(y_test, y_head)

print("R2 Score : {}".format(score))
print("MSE : {}".format(MSE))

# Random Forest

In [None]:
x, y = data.drop(["pelvic_incidence","class"] , axis=1) , data["pelvic_incidence"].values.reshape(-1,1)

In [None]:
x_train,x_test,y_train,t_test = train_test_split(x,y,test_size=0.2,random_state=20)

## Fit

In [None]:
model_rf = RandomForestRegressor(n_estimators=100)
model_rf.fit(x_train, y_train)

In [None]:
y_pred = model_rf.predict(x_test)
y_pred_df = pd.DataFrame(y_pred, columns=["Predicted Response" ])
y_test_df = pd.DataFrame(y_test, columns=["Real Values"])
pd.concat([y_test_df , y_pred_df] , axis=1)

In [None]:
score = r2_score(y_test, y_pred)
MSE = mean_squared_error(y_test, y_pred)

print("R2 Score : {}".format(score))
print("MSE : {}".format(MSE))