In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

In [2]:
data = pd.read_csv('binary_log.csv')

In [3]:
data

Unnamed: 0,SAT,Admitted,Gender
0,1363,No,Male
1,1792,Yes,Female
2,1954,Yes,Female
3,1653,No,Male
4,1593,No,Male
...,...,...,...
163,1722,Yes,Female
164,1750,Yes,Male
165,1555,No,Male
166,1524,No,Male


In [4]:
data['Admitted'] = data['Admitted'].map({'Yes':1,'No':0})
data['Gender'] = data['Gender'].map({'Female':1,'Male':0})

In [5]:
data

Unnamed: 0,SAT,Admitted,Gender
0,1363,0,0
1,1792,1,1
2,1954,1,1
3,1653,0,0
4,1593,0,0
...,...,...,...
163,1722,1,1
164,1750,1,0
165,1555,0,0
166,1524,0,0


In [6]:
y = data['Admitted'] #dependent

In [7]:
y

0      0
1      1
2      1
3      0
4      0
      ..
163    1
164    1
165    0
166    0
167    0
Name: Admitted, Length: 168, dtype: int64

In [8]:
x=  data[['SAT','Gender']] #independent

In [9]:
x

Unnamed: 0,SAT,Gender
0,1363,0
1,1792,1
2,1954,1
3,1653,0
4,1593,0
...,...,...
163,1722,1
164,1750,0
165,1555,0
166,1524,0


In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=0)

In [12]:
X_train.shape

(134, 2)

In [13]:
X_test.shape

(34, 2)

In [14]:
from sklearn.linear_model import LogisticRegression

In [15]:
log_reg = LogisticRegression()

In [16]:
log_reg.fit(X_train,y_train)

LogisticRegression()

In [17]:
pred = log_reg.predict(X_test)

In [18]:
pred

array([1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1], dtype=int64)

In [19]:
y_test

151    1
56     0
123    0
60     0
100    1
51     1
7      1
5      1
86     0
135    0
54     1
94     0
157    1
101    0
73     1
45     0
74     1
97     0
139    0
96     1
107    1
106    1
150    1
166    0
55     0
152    1
26     1
143    1
155    0
33     1
8      1
108    0
122    0
37     1
Name: Admitted, dtype: int64

In [20]:
log_reg.predict_proba(X_test).round(2)

array([[0.  , 1.  ],
       [0.55, 0.45],
       [1.  , 0.  ],
       [0.99, 0.01],
       [0.01, 0.99],
       [0.  , 1.  ],
       [0.  , 1.  ],
       [0.01, 0.99],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [0.01, 0.99],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [0.99, 0.01],
       [0.99, 0.01],
       [0.  , 1.  ],
       [0.  , 1.  ],
       [0.01, 0.99],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [0.68, 0.32],
       [0.01, 0.99],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.  , 1.  ]])

In [21]:
from sklearn.metrics import confusion_matrix

In [22]:
print(confusion_matrix(y_test,pred))

[[15  0]
 [ 1 18]]


In [23]:
from sklearn.metrics import classification_report

In [24]:
print(classification_report(y_test,pred))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97        15
           1       1.00      0.95      0.97        19

    accuracy                           0.97        34
   macro avg       0.97      0.97      0.97        34
weighted avg       0.97      0.97      0.97        34

