# Task for Today  

***

## Back Problem Prediction  

Given *data about back pain*, let's try to predict whether a given subject has **abnormal** conditions or not.  
  
We will try many models and pick the best one to make our predictions.

# Getting Started

In [None]:
!pip install pycaret

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import pycaret.classification

from sklearn.metrics import confusion_matrix, classification_report

In [None]:
data = pd.read_csv('../input/lower-back-pain-symptoms-dataset/Dataset_spine.csv')

In [None]:
data.head(10)

In [None]:
data.info()

# Cleaning

In [None]:
data = data.drop('Unnamed: 13', axis=1)

In [None]:
data['Class_att'].unique()

In [None]:
data['Class_att'] = data['Class_att'].apply(lambda x: 1 if x == 'Abnormal' else 0)

In [None]:
data

# Visualizing Correlations

In [None]:
corr = data.corr()

plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, vmin=-1.0)
plt.title("Correlation Heatmap")
plt.show()

# Splitting/Scaling

In [None]:
scaler = StandardScaler()

data.loc[:, 'Col1':'Col12'] = scaler.fit_transform(data.loc[:, 'Col1':'Col12'])

In [None]:
data

In [None]:
data_train, data_test = train_test_split(data, train_size=0.7, random_state=200)

# Model Selection

In [None]:
pycaret.classification.setup(data_train, target='Class_att')

In [None]:
best_model = pycaret.classification.compare_models()

In [None]:
y_train = data_train['Class_att'].copy()
X_train = data_train.drop('Class_att', axis=1).copy()

y_test = data_test['Class_att'].copy()
X_test = data_test.drop('Class_att', axis=1).copy()

In [None]:
best_model.score(X_test, y_test)

# Custom Model

In [None]:
from sklearn.linear_model import LogisticRegressionCV

In [None]:
model = LogisticRegressionCV()

model.fit(X_train, y_train)

In [None]:
model.score(X_test, y_test)

# Confusion Matrix

In [None]:
y_true = np.array(y_test)
y_pred = model.predict(X_test)

In [None]:
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(4, 4))

sns.heatmap(cm, annot=True, fmt='g', vmin=0, cbar=False)

plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")

plt.show()

In [None]:
print(classification_report(y_true, y_pred))

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/SmN-LA5b0rM