### Logistic Regression Classification - Visual QST & Responsiveness to Treatment Across 5 Chronic Pain Cohorts

In [None]:
## For data handling
import pandas as pd
import numpy as np

## For plotting
import matplotlib.pyplot as plt
import seaborn as sns

## This sets the plot style
## to have a grid on a white background
sns.set_style("whitegrid")

In [None]:
## Load in data
data = pd.read_csv("/Users/noahwaller/Documents/3cohort-GIMME PAPER/csv_for-code/7cohort_visQST_allmetrics_outrem.csv", delimiter = ",")
data.head()

In [None]:
## Requires no missing values
data.dropna(subset=['responder_bin'], inplace=True)
data.dropna(subset=['fm_score_bsl'], inplace=True)
data.dropna(subset=['sss_bsl'], inplace=True)
data.dropna(subset=['wpi_bsl'], inplace=True)
data.dropna(subset=['pd02_bsl'], inplace=True)
data


In [None]:
## Import
from sklearn.model_selection import train_test_split

In [None]:
#split dataset in features and target variable
#feature_cols = ['vis01_unpl_avg', 'vis02_unpl_avg', 'vis03_unpl_avg', 'vis04_unpl_avg','vis05_unpl_avg','vis06_unpl_avg', 
                #'fm_score_bsl', 'wpi_bsl', 'sss_bsl']
feature_cols = ['vis_unpl_avg', 'vis_bright_avg', 'pd02_bsl', 'fm_score_bsl']
X = data[feature_cols] # Features
y = data.responder_bin # Target variable

In [None]:
# split X and y into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.25, random_state=10, stratify=y)

In [None]:
## import LogisticRegression
from sklearn.linear_model import LogisticRegression

In [None]:
# instantiate the model (using the default parameters)
logreg = LogisticRegression(random_state=10)

# fit the model with data
logreg.fit(X_train, y_train)

y_pred = logreg.predict(X_test)

In [None]:
# import the metrics class
from sklearn import metrics

# create confusion matrix (bottom right is True Positive, top left is False Negative)
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix

In [None]:
# visualize confusion matrix
class_names=[0,1] # name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
# create heatmap
sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, annot_kws={'fontsize': 12, 'color':'b', 'alpha': 0.6,
                        'verticalalignment': 'center', 'backgroundcolor': 'w'}, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

In [None]:
from sklearn.metrics import classification_report

# print more easily interpretable report of accuracy, precision, and recall
target_names = ['Non-responder', 'responder']
print(classification_report(y_test, y_pred, target_names=target_names))

In [None]:
# create AUC graph for model performance visualization - .5 is a worthless model and 1 is perfect
y_pred_proba = logreg.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()