In [6]:
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest
pd.set_option('display.max_rows', 100000)
pd.set_option('display.max_colwidth', -1)

file = 'Traffic_Violations_Final.csv'
df = pd.read_csv(file)
df = df.drop('Unnamed: 0', axis=1)

p_prop = {
    'WHITE': .438,
    'BLACK': .197,
    'ASIAN': .156,
    'HISPANIC': .196,
    'NATIVE AMERICAN': .007,
    'OTHER': .006
}

race = df[df['Violation Type'] == 'Citation']['Race'].value_counts()
total = len(df[df['Violation Type'] == 'Citation'])

print(f'CITATION COUNT BY RACE')
print('--------------------------')
print(race)
print('--------------------------')
print(f'TOTAL CITATIONS: {total}')

for var in race.index:
    count = race[var]
    prop = count/total
    stat, pval = proportions_ztest(count, total, p_prop[var])
    print('--------------------------')
    print(f'{var}:')
    print(f'     sample proportion: {"{:,.3f}".format(prop)}')
    print(f'     population proportion: {"{:,.3f}".format(p_prop[var])}')
    print(f'     z-statistic = {"{:,.2f}".format(stat)}')
    print(f'     p-value = {pval}')

CITATION COUNT BY RACE
--------------------------
WHITE              215584
BLACK              208743
HISPANIC           155051
ASIAN              31442 
OTHER              29945 
NATIVE AMERICAN    1280  
Name: Race, dtype: int64
--------------------------
TOTAL CITATIONS: 642045
--------------------------
WHITE:
     sample proportion: 0.336
     population proportion: 0.438
     z-statistic = -173.44
     p-value = 0.0
--------------------------
BLACK:
     sample proportion: 0.325
     population proportion: 0.197
     z-statistic = 219.16
     p-value = 0.0
--------------------------
HISPANIC:
     sample proportion: 0.241
     population proportion: 0.196
     z-statistic = 85.18
     p-value = 0.0
--------------------------
ASIAN:
     sample proportion: 0.049
     population proportion: 0.156
     z-statistic = -397.39
     p-value = 0.0
--------------------------
OTHER:
     sample proportion: 0.047
     population proportion: 0.006
     z-statistic = 154.43
     p-value = 0.0