## Logistic Classifier

**Karina Huang, Lipika Ramaswamy**

---

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

In [3]:
data = pd.read_csv('data/data_final.csv', index_col = ['Unnamed: 0'])
#check data
data.head()

Unnamed: 0,id,sex,age,race,juv_fel_count,juv_misd_count,juv_other_count,priors_count,decile_score,score_text,is_recid,v_decile_score,v_score_text,is_violent_recid,two_year_recid
0,1,Male,69,Other,0,0,0,0,1,Low,0,1,Low,0,0
1,3,Male,34,African-American,0,0,0,0,3,Low,1,1,Low,1,1
2,4,Male,24,African-American,0,0,1,4,4,Low,1,3,Low,0,1
3,5,Male,23,African-American,0,1,0,1,8,High,0,6,Medium,0,0
4,6,Male,43,Other,0,0,0,2,1,Low,0,1,Low,0,0


In [6]:
#get dummy variables
data_new = pd.get_dummies(data, columns = ['sex', 'race'])

In [25]:
#set predictor and response columns
predictors = ['age', 'juv_misd_count', 'priors_count', 'sex_Female', 
              'sex_Male', 'race_African-American', 'race_Asian', 
              'race_Caucasian', 'race_Hispanic', 'race_Native American', 'race_Other']
# genR = ['decile_score']
genR = ['score_text']
# vR = ['v_decile_score']
vR = ['v_score_text']

In [26]:
#train test split data 
train, test = train_test_split(data_new, test_size = 0.20, random_state = 221)

In [27]:
def getModel(df, pred, resp):
    m = LogisticRegressionCV(cv = 5, solver = 'newton-cg',random_state = 221)
    x, y = getXY(df, pred, resp)
    m.fit(x, y)
    return m

In [28]:
def getXY(df, pred, resp):
    
    X = df[pred]
    Y = df[resp]
    
    return X, Y

In [29]:
#model for general decile score
mG = getModel(train, predictors, genR)
#model for violence decile score
mV = getModel(train, predictors, vR)

In [30]:
#get test predictions
test_predG = mG.predict(getXY(test, predictors, genR)[0])
test_predV = mV.predict(getXY(test, predictors, genR)[0])

#get training predictions
train_predG = mG.predict(getXY(train, predictors, vR)[0])
train_predV = mV.predict(getXY(train, predictors, vR)[0])

In [31]:
#get test prediction accuracy scores
test_accG = accuracy_score(getXY(test, predictors, genR)[1], test_predG)
test_accV = accuracy_score(getXY(test, predictors, vR)[1], test_predV)

#get training prediction accuracy scores
train_accG = accuracy_score(getXY(train, predictors, genR)[1], train_predG)
train_accV = accuracy_score(getXY(train, predictors, vR)[1], train_predV)

print('Training Prediction Accuracy Score: ')
print('General Decile Score: ', train_accG)
print('Violence Decile Score: ', train_accV)
print()
print('Test Prediction Accuracy Score: ')
print('General Decile Score: ', test_accG)
print('Violence Decile Score: ', test_accV)

Training Prediction Accuracy Score: 
General Decile Score:  0.6303933460405475
Violence Decile Score:  0.7244844914226304

Test Prediction Accuracy Score: 
General Decile Score:  0.6160776160776161
Violence Decile Score:  0.7331947331947332


In [32]:
#get test prediction f1 scores
test_f1G = f1_score(getXY(test, predictors, genR)[1], test_predG, average = 'weighted')
test_f1V = f1_score(getXY(test, predictors, vR)[1], test_predV, average = 'weighted')

#get training prediction f1 scores
train_f1G = f1_score(getXY(train, predictors, genR)[1], train_predG, average = 'weighted')
train_f1V = f1_score(getXY(train, predictors, vR)[1], train_predV, average = 'weighted')

print('Training Prediction F1 Score: ')
print('General Decile Score: ', train_f1G)
print('Violence Decile Score: ', train_f1V)
print()
print('Test Prediction F1 Score: ')
print('General Decile Score: ', test_f1G)
print('Violence Decile Score: ', test_f1V)

Training Prediction F1 Score: 
General Decile Score:  0.5526109872634061
Violence Decile Score:  0.6681618897704493

Test Prediction F1 Score: 
General Decile Score:  0.531584603113242
Violence Decile Score:  0.6758784709760625
