In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import mean_squared_error
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.preprocessing import StandardScaler
from mlxtend.plotting import plot_decision_regions
from sklearn import datasets
from sklearn import preprocessing
from utils import md

# Formatting options and variables
params = { 'figure.figsize': (15.,5.),
          'axes.labelsize': 20,
          'xtick.labelsize': 16,
          'ytick.labelsize': 16,
          'legend.fontsize': 14,
         # 'font.sans-serif': 'Arial'
         }
plt.rcParams.update(params)
colors = ["#377eb8", "#ff7f00", "#4daf4a"]

In [None]:
md('# Inferred parameters provided in `model_parameters.csv`')
data = pd.read_csv("model_parameters.csv", index_col=0)
X = data.drop(['label'], axis=1)
y = data['label']

data

In [None]:
md('## Showing exmples of random splitting of the dataset into training and test subsets')
k = 5 # An aexmple of 5
kf = KFold(n_splits=k, shuffle=True)
for i in range(20):
    for train_fold, test_fold in kf.split(X):
        print(train_fold, test_fold)

In [None]:
k = 5
kf = KFold(n_splits=k, shuffle=True)
model = LogisticRegression(penalty=None, max_iter=1000)

acc_score_ = []

for i in range(20):
    for train_fold, test_fold in kf.split(X):
        X_train , X_test = X.iloc[train_fold,:], X.iloc[test_fold,:]
        y_train , y_test = y.iloc[train_fold] , y.iloc[test_fold]
     
        model.fit(X_train,y_train)
        pred_values = model.predict(X_test)
        acc = accuracy_score(pred_values , y_test)
        acc_score_.append(acc)

In [None]:
print("Accuracy (mean and standard deviation over splitting)", np.mean(acc_score_), np.std(acc_score_))

In [None]:
md("## Training logistic regression")
logreg = LogisticRegression(penalty=None, max_iter=1000)
logreg.fit(X, y)

In [None]:
md('## Projections of the decision boundaries')
import warnings
# Disable warnings about missing feature names
warnings.filterwarnings("ignore", message="X does not have valid feature names, but LogisticRegression was fitted with feature names")

scatter_kwargs = {'s': 100,'alpha': 0.9}
contourf_kwargs = {'alpha': 0.7}

logreg = LogisticRegression(penalty=None, max_iter=1000)
logreg.fit(X, y)

fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15,4))

plot_decision_regions(np.array(X), np.array(y), clf=logreg, legend=0, ax=axs[0],
                      feature_index=[0,1],
                      filler_feature_values={2: np.mean(np.array(X)[:,2])}, 
                      filler_feature_ranges={2: np.mean(np.array(X)[:,2])*20},
                      scatter_kwargs=scatter_kwargs,
                      contourf_kwargs=contourf_kwargs)
axs[0].set_xlabel(r"$b$")
axs[0].set_ylabel(r"$a$")


plot_decision_regions(np.array(X), np.array(y), clf=logreg, legend=0, ax=axs[1],
                      feature_index=[1,2],
                      filler_feature_values={0: np.mean(np.array(X)[:,0])}, 
                      filler_feature_ranges={0: np.mean(np.abs(np.array(X))[:,0])*100},
                      scatter_kwargs=scatter_kwargs,
                      contourf_kwargs=contourf_kwargs)
axs[1].set_xlabel(r"$a$")
axs[1].set_ylabel(r"$\epsilon$")

plot_decision_regions(np.array(X), np.array(y), clf=logreg, legend=0, ax=axs[2],
                      feature_index=[0,2],
                      filler_feature_values={1: np.mean(np.array(X)[:,1])}, 
                      filler_feature_ranges={1: np.mean(np.abs(np.array(X))[:,1])*100},
                      scatter_kwargs=scatter_kwargs,
                      contourf_kwargs=contourf_kwargs)
axs[2].set_xlabel(r"$b$")
axs[2].set_ylabel(r"$\epsilon$") 


handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, ["AA patients", "AVM patients", "Treated vessels"], loc='upper right', bbox_to_anchor=(1,1.1), ncol=3, 
           frameon=False, fontsize=15)

fig.tight_layout()

# plt.savefig('../figures/classification_2d.pdf', format='pdf', bbox_inches='tight', transparent=True)