In [None]:
!pip install lazypredict

In [4]:
# import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

%matplotlib inline


In [5]:
df = pd.read_csv('kyphosis.csv')
df.head()

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15


In [6]:
# replace the target variable with 0 and 1
df['Kyphosis'] = df['Kyphosis'].map({'absent':0, 'present':1})
df.head()


Unnamed: 0,Kyphosis,Age,Number,Start
0,0,71,3,5
1,0,158,3,14
2,1,128,4,5
3,0,2,5,1
4,0,1,4,15


In [5]:
# check for missing values
df.isnull().sum()

Kyphosis    0
Age         0
Number      0
Start       0
dtype: int64

In [7]:
# make kyphosis as the last column
df = df[['Age', 'Number', 'Start', 'Kyphosis']]
df.head()



Unnamed: 0,Age,Number,Start,Kyphosis
0,71,3,5,0
1,158,3,14,0
2,128,4,5,1
3,2,5,1,0
4,1,4,15,0


In [None]:
# visualize the correlation between the features and the target with plotly
import plotly.express as px
fig = px.scatter_matrix(df, dimensions=['Age', 'Number', 'Start'], color='Kyphosis')
fig.show()


In [None]:
correlation = df.corr()
fig = px.imshow(correlation, text_auto=True)
fig.show()


In [None]:
fig = px.scatter_3d(df, x='Age', y='Number', z='Start', color='Kyphosis', color_continuous_scale='Viridis')
fig.show()


In [10]:
# use Smote to balance the dataset
from imblearn.over_sampling import SMOTE

def balance_dataset(X, y):
    sm = SMOTE(random_state=42)
    X_res, y_res = sm.fit_resample(X, y)
    return X_res, y_res

X = df.drop('Kyphosis', axis=1)
y = df['Kyphosis']

X_res, y_res = balance_dataset(X, y)

# check the balance of the dataset
y_res.value_counts()



0    64
1    64
Name: Kyphosis, dtype: int64

In [None]:
# check the info of the data
df.info()

# check the description of the data
df.describe()

# check the correlation of the data
df.corr()

# visualize the correlation of the data
sns.heatmap(df.corr(), annot=True)

# visualize the data
sns.pairplot(df, hue='Kyphosis')

# split the data into train and test
from sklearn.model_selection import train_test_split
X = df.drop('Kyphosis', axis=1)
y = df['Kyphosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# import the decision tree classifier
from sklearn.tree import DecisionTreeClassifier

# create an instance of the decision tree classifier
dtree = DecisionTreeClassifier()

# fit the model
dtree.fit(X_train, y_train)

# predict the model
predictions = dtree.predict(X_test)

# import the classification report and confusion matrix
from sklearn.metrics import classification_report, confusion_matrix

# print the classification report and confusion matrix
print(classification_report(y_test, predictions))

print(confusion_matrix(y_test, predictions))

# visualize the decision tree
from IPython.display import Image
from sklearn.externals.six import StringIO
from sklearn.tree import export_graphviz
import pydot

features = list(df.columns[1:])
features

dot_data = StringIO()
export_graphviz(dtree, out_file=dot_data, feature_names=features, filled=True, rounded=True)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
Image(graph[0].create_png())

# import the random forest classifier
from sklearn.ensemble import RandomForestClassifier

# create an instance of the random forest classifier
rfc = RandomForestClassifier(n_estimators=200)

# fit the model
rfc.fit(X_train, y_train)

# predict the model
rfc_pred = rfc.predict(X_test)

# print the classification report and confusion matrix
print(classification_report(y_test, rfc_pred))

print(confusion_matrix(y_test, rfc_pred))

# import the gradient boosting classifier
from sklearn.ensemble import GradientBoostingClassifier

# create an instance of the gradient boosting classifier
gbc = GradientBoostingClassifier()

# fit the model
gbc.fit(X_train, y_train)

# predict the model
gbc_pred = gbc.predict(X_test)

# print the classification report and confusion matrix
print(classification_report(y_test, gbc_pred))

print(confusion_matrix(y_test, gbc_pred))
