# Basics of logistic regression

## Import the relevant libraries

In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
#sns.set()

#Apply a fix to the statsmodels library
from scipy import stats
stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)

## Load the data

In [3]:
raw_data = pd.read_csv('Admittance.csv')
data = raw_data.copy()
data['Admitted'] = raw_data['Admitted'].map({'Yes': 1, 'No': 0})
data

Unnamed: 0,SAT,Admitted
0,1363,0
1,1792,1
2,1954,1
3,1653,0
4,1593,0
...,...,...
163,1722,1
164,1750,1
165,1555,0
166,1524,0


## Declare the dependent and the independent variables

In [4]:
y = data['Admitted']
x1 = data['SAT']

## Regression

In [5]:
x = sm.add_constant(x1)
reg_log = sm.Logit(y,x)
results_log = reg_log.fit()

Optimization terminated successfully.
         Current function value: 0.137766
         Iterations 10


## Summary

In [None]:
# Get the regression summary
results_log.summary()

## Looking into LL-null

In [None]:
# Create a variable only of 1s
const = np.ones(168)
const

In [None]:
reg_null = sm.Logit(y,const)
results_null = reg_null.fit()
results_null.summary()

### Plot a logistic regression curve

In [None]:
# Creating a logit regression (we will discuss this in another notebook)
reg_log = sm.Logit(y,x)
# Fitting the regression
results_log = reg_log.fit()

# Creating a logit function, depending on the input and coefficients
def f(x,b0,b1):
    return np.array(np.exp(b0+x*b1) / (1 + np.exp(b0+x*b1)))

# Sorting the y and x, so we can plot the curve
f_sorted = np.sort(f(x1,results_log.params[0],results_log.params[1]))
x_sorted = np.sort(np.array(x1))
ax = plt.scatter(x1,y,color='C0')
#plt.xlabel('SAT', fontsize = 20)
#plt.ylabel('Admitted', fontsize = 20)
# Plotting the curve
ax2 = plt.plot(x_sorted,f_sorted,color='red')
plt.figure(figsize=(20,20))
plt.show()

In [None]:
np.exp(4.20)