In [34]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
import pymc3 as pm
import math
plt.rcParams['figure.figsize'] = [16, 8]
plt.style.use("ggplot")

In [35]:
df = pd.read_csv("Cryotherapy.CSV", sep="\t")

In [36]:
df.head(20)

Unnamed: 0,sex,age,Time,Number_of_Warts,Type,Area,Result_of_Treatment
0,1,35,12.0,5,1,100,0
1,1,29,7.0,5,1,96,1
2,1,50,8.0,1,3,132,0
3,1,32,11.75,7,3,750,0
4,1,67,9.25,1,1,42,0
5,1,41,8.0,2,2,20,1
6,1,36,11.0,2,1,8,0
7,1,59,3.5,3,3,20,0
8,1,20,4.5,12,1,6,1
9,2,34,11.25,3,3,150,0


In [37]:
df_train = df.iloc[1:60] #train set(75%)

In [38]:
df_test = df.iloc[60:90] #test set (25%)

In [None]:
with pm.Model() as logistic_model:
    pm.glm.GLM.from_formula('Result_of_Treatment ~ sex + age + Time + Number_of_Warts + Area + Type',
                            df_train,
                            family=pm.glm.families.Binomial())
    trace = pm.sample(10000, tune=1000, cores = 6, init='adapt_diag')

Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (6 chains in 6 jobs)
NUTS: [Type, Area, Number_of_Warts, Time, age, sex, Intercept]
Sampling 6 chains:  95%|█████████▌| 62789/66000 [02:57<00:09, 346.08draws/s]

In [None]:
pm.traceplot(trace, varnames=['sex', 'age' , 'Time' , 'Number_of_Warts' , 'Area' , 'Type']);

In [None]:
df_trace = pm.trace_to_dataframe(trace[10000//2:])

sex, age, Time, Number_of_Warts, Area, Type , intercept = df_trace[['sex','age', 'Time', 'Number_of_Warts', 'Area','Type','Intercept' ]].mean(0)

In [None]:
sex, age, Time

In [None]:
def predict(s, a, T, N, A, t, intercept=intercept, sex = sex, age=age, Time=Time, Number_of_Warts=Number_of_Warts, Area=Area, Type=Type):
 
    v = intercept + sex*s+age*a+ Time*T+ Number_of_Warts*N+ Area*A +Type*t
    return np.exp(v)/(1+np.exp(v))

In [None]:
df_predict = predict(df_test['sex'],df_test['age'], df_test['Time'],df_test['Number_of_Warts'],df_test['Area'],df_test['Type'])

In [None]:
df_predict = round(df_predict)

In [None]:
df_predict

In [None]:
confusion_matrix(df_test['Result_of_Treatment'], df_predict)

In [None]:
accuracy_score(df_test['Result_of_Treatment'], df_predict)

In [None]:
f1_score(df_test['Result_of_Treatment'], df_predict)