# Logit Model Estimation

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, log_loss
import pickle
from tkinter import Tk, StringVar, OptionMenu, mainloop

In [8]:
# Import data
df = pd.read_stata('/Users/nbs/Documents/Georgetown/Semester 5/1 Courses/OPIM 244/freestyle_project/primary_sample.dta')

# Extract variables of interest
df = df[['year', 'school', 'admit', 'gpa', 'lsat', 'urm', 'fee_waived', 'non_trad', 'intl']]

# Drop observations with missing variables
df = df.dropna(axis='index') # Drop missing

# Convert year, school to dummies for TWFE
df = pd.get_dummies(df, columns=['year', 'school'], drop_first=True) # First column is dropped to prevent collinearity

# Clean up
df = df.replace(['False', 'True'], [0, 1])

# Model: Logit

In [9]:
# Define features and outcome
y = df['admit'].ravel()
X = df.drop(['admit'], axis=1)

In [None]:
# Define model
model = LogisticRegression(n_jobs=-1, max_iter=10000, solver='sag')
model.fit(X, y)

# Predict admit
y_hat = np.array([i for i in model.predict(X)])

# Print outputs
print('Coefficients')
[print(a, ':', round(b, 3)) for a, b in zip(model.feature_names_in_[0:6], model.coef_.flatten()[0:6])]
print('')

print('Intercept:', round(model.intercept_.item(), 3), '\n')

print('Goodness of Fit')
print('Cross Entropy (Log Loss):', round(log_loss(y, y_hat), 3))
print('Accuracy:', round(model.score(X, y), 3))
print('MSE:', round(mean_squared_error(y, y_hat), 3))

In [None]:
# Save model
pickle.dump(model, open('logit.sav', 'wb'))

# Save columns
pickle.dump(list(df.columns), open('columns.pkl', 'wb'))

# Web App

In [2]:
# Call pickle files
columns = pickle.load(open('columns.pkl', 'rb'))
model = pickle.load(open('logit.sav', 'rb'))

# Define relevant variables
schools = [i.strip('school_') for i in list(columns) if 'school_' in i]
years = [i for i in list(columns) if 'year_' in i]

### User Inputs

In [33]:
# School (note: I consulted https://www.geeksforgeeks.org/tkinter-optionmenu-widget/ for this question)
root = Tk()
school = StringVar(root)
question_menu = OptionMenu(root, school, *schools)
question_menu.pack()
mainloop()
school = school.get()

# GPA
while True:
    try:
        gpa = float(input('Please input your undergraduate GPA: '))
        if gpa>=0 and gpa<=4.3:
            break
    except:
        print("Error. GPA must be between 0 and 4.3. Please try again.")

# LSAT
while True:
    try:
        lsat = int(input('Please input your LSAT score: '))
        if lsat>=120 and lsat<=180:
            break
    except:
        print('Error. LSAT score is an integer between 120 and 180. Please try again.')

# URM
while True:
    try:
        urm = input('Are you black, Hispanic, and/or American Indian? (yes/no) ').upper()
        if urm=='YES' or urm=='NO':
            break
    except:
        print('Error. You answer must be yes or no. Please try again.')

# Fee waived
while True:
    try:
        fee_waived = input('Did you receive an application fee waiver? (yes/no) ').upper()
        if fee_waived=='YES' or fee_waived=='NO':
            break
    except:
        print('Error. You answer must be yes or no. Please try again.')

# Non-traditional
while True:
    try:
        non_trad = input('Do you consider yourself a non-traditional student? (yes/no) ').upper()
        if non_trad=='YES' or non_trad=='NO':
            break
    except:
        print('Error. You answer must be yes or no. Please try again.')

# International
while True:
    try:
        intl = input('Are you an international student? (yes/no) ').upper()
        if intl=='YES' or intl=='NO':
            break
    except:
        print('Error. You answer must be yes or no. Please try again.')

### User Prediction

In [85]:
user = pd.DataFrame(

    np.array(

        [0 if i=='NO' else 1 if i=='YES' else i for i in [gpa, lsat, urm, fee_waived, non_trad, intl]]
        + [1 if i=='year_2023' else 0 for i in years]
        + [1 if i==school else 0 for i in schools]
    
    ).reshape(1, -1), 

    columns=columns[1:]
)

prob = int(100 * model.predict_proba(user).flatten()[1])
print('You have a ' + str(prob) + '% ' + 'chance of being admitted to ' + school.title() + ' for law school. Good luck!')

You have a 55% chance of being admitted to Harvard University for law school. Good luck!
