In [199]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import scipy.stats as st
import pingouin as pg
%matplotlib inline 

In [200]:
data = pd.read_csv('condom.csv')

In [201]:
data.head()

Unnamed: 0,particip,safety,use,gender,sexexp,previous,selfcon,perceive
0,5,3,Unprotected,Female,5,No,5,4
1,6,1,Unprotected,Male,3,No,2,2
2,9,0,Unprotected,Female,2,No,3,0
3,13,3,Unprotected,Male,3,No,4,4
4,14,2,Unprotected,Female,3,No,6,3


In [202]:
data['previous'].unique()

array(['No', 'Condom', 'First'], dtype=object)

In [203]:
data['usex'] = data['use'].replace({'Unprotected':0, 'CondomUsed':1 })
data['genderx'] = data['gender'].replace({'Male':0, 'Female':1})
data['previousx'] = data['previous'].replace({'No':0, 'Condom':1, 'First':2})

In [204]:
dummies = pd.get_dummies(data['previous'])
dummy = dummies.iloc[:,0:2]
dummy

Unnamed: 0,Condom,First
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
95,1,0
96,1,0
97,1,0
98,1,0


In [205]:
data = pd.merge(data, dummy, how='inner', left_index=True, right_index=True)

In [206]:
data.head()

Unnamed: 0,particip,safety,use,gender,sexexp,previous,selfcon,perceive,usex,genderx,previousx,Condom,First
0,5,3,Unprotected,Female,5,No,5,4,0,1,0,0,0
1,6,1,Unprotected,Male,3,No,2,2,0,0,0,0,0
2,9,0,Unprotected,Female,2,No,3,0,0,1,0,0,0
3,13,3,Unprotected,Male,3,No,4,4,0,0,0,0,0
4,14,2,Unprotected,Female,3,No,6,3,0,1,0,0,0


In [207]:
from statsmodels.tools.tools import add_constant

data = add_constant(data)

In [208]:
from statsmodels.genmod import families

In [209]:
m01 = sm.GLM(data['usex'] , data[['const','safety','perceive','genderx']], 
          family=families.Binomial()).fit(attach_wls=True, atol=1e-10)
print(m01.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                   usex   No. Observations:                  100
Model:                            GLM   Df Residuals:                       96
Model Family:                Binomial   Df Model:                            3
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -52.885
Date:                Sat, 04 Jul 2020   Deviance:                       105.77
Time:                        13:50:51   Pearson chi2:                     101.
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.4761      0.752     -3.294      0.0

In [210]:
m02 = sm.GLM(data['usex'] , data[['const','safety','perceive','genderx','Condom','First', 'selfcon', 'sexexp']], 
          family=families.Binomial()).fit(attach_wls=True, atol=1e-10)
print(m02.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                   usex   No. Observations:                  100
Model:                            GLM   Df Residuals:                       92
Model Family:                Binomial   Df Model:                            7
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -43.986
Date:                Sat, 04 Jul 2020   Deviance:                       87.971
Time:                        13:50:51   Pearson chi2:                     87.8
No. Iterations:                     6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -4.9597      1.147     -4.326      0.0

In [211]:
data['predict'] = m02.fittedvalues

In [212]:
data.iloc[[11,52,74]]

Unnamed: 0,const,particip,safety,use,gender,sexexp,previous,selfcon,perceive,usex,genderx,previousx,Condom,First,predict
11,1.0,43,3,Unprotected,Female,1,No,1,6,0,1,0,0,0,0.454654
52,1.0,83,5,Unprotected,Male,3,Condom,7,6,0,0,1,1,0,0.915625
74,1.0,56,2,CondomUsed,Female,2,Condom,4,3,1,1,1,1,0,0.441225


### A female who used a condom in her previous encounter with her new partner scores 2 on all variables except perceived risk (for which she scores 6). Use the model to estimate the probability that she will use a condom in her next encounter. 

In [213]:
m02.params

const      -4.959739
safety     -0.482460
perceive    0.949088
genderx     0.002656
Condom      1.087196
First      -0.016615
selfcon     0.347626
sexexp      0.180423
dtype: float64

In [214]:
Z = m02.params['const'] + 2*m02.params['safety'] + 6* m02.params['perceive'] + 1* m02.params['genderx']+m02.params['Condom'] + 2*m02.params['selfcon'] + 2*m02.params['sexexp']

In [215]:
Z

1.9158214940637004

In [216]:
probablity = 1/(1+np.exp(-Z))
probablity

0.87167175100175

### Therefore, there is a 87% chance that she will use a condom on her next encounter.