In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.options.display.max_columns = 50

In [3]:
def read_in_csv(file_path='./parking-geo.csv'):
    # let's be memory efficient when loading our data
    dtypes_dict = \
    {
        'ticket_number': np.int32,
        'violation_location': str,
        'license_plate_number': str,
        'license_plate_state': 'category',
        'license_plate_type': 'category',
        'zipcode': str,
        'violation_code': 'category',
        'violation_description': 'category',
        'unit': 'category',
        'unit_description': 'category',
        'vehicle_make': 'category',
        'fine_level1_amount': np.int32,
        'fine_level2_amount': np.int32,
        'current_amount_due': np.float64,
        'total_payments': np.float64,
        'ticket_queue': 'category',
        'notice_level': 'category',
        'hearing_disposition': 'category',
        'notice_number': np.int32,
        'dismissal_reason': str,
        'officer': str,
        'address': str,
        'license_hash': str,
        'year': np.int32,
        'month': 'category',
        'hour': 'category',
        'penalty': np.float64,
        'ward': 'category',
        'geocode_accuracy': np.float64,
        'geocode_accuracy_type': 'category',
        'geocoded_address': str,
        'geocoded_lng': str,
        'geocoded_lat': str,
        'geocoded_city': 'category',
        'geocoded_state': 'category'
    }
    #still better than strings
    parse_dates_list = \
    [
        'issue_date',
        'ticket_queue_date',
    ]
    
    # read csv into memory -- this takes quite a while
    df = pd.read_csv(file_path, dtype=dtypes_dict, parse_dates=parse_dates_list)
    return df

In [4]:
def calculate_summary_stats(df_raw, min_year = 1995, max_year = 2019):
    
    # private helper function
    def rank_series(series):
        out_series = series.rank(ascending=False)
        return out_series
    
    df_filtered = df_raw[
        (df_raw['year'] > min_year) & (df_raw['year'] < max_year) & 
        (df_raw['geocode_accuracy_type'].isin(['rooftop', 'range_interpolation', 'intersection', 'point'])) & 
        (df_raw['geocoded_city'] == 'Chicago')
    ]
    # not used
    # df_filtered_na = df_filtered[df_filtered['ward'].isnull()]
    df_filtered = df_filtered[df_filtered['ward'].notnull()]

    # calculate base dataframes
    df_dict = dict()
    df_dict['filtered'] = df_filtered
    df_dict['police_tickets'] = df_filtered[
        df_filtered['unit_description'].isin(['CPD', 'CPD-Other', 'CPD-Airport'])
    ]
    df_dict['contested_tickets'] = df_filtered[
        df_filtered['hearing_disposition'].isin(['Liable', 'Not Liable'])
    ]
    df_dict['paid_tickets'] = df_filtered[
        df_filtered['ticket_queue'] == 'Paid'
    ]
    df_dict['dismissed_tickets'] = df_filtered[
        df_filtered['ticket_queue'] == 'Dismissed'
    ]
    df_dict['seized_or_suspended_tickets'] = df_filtered[
        df_filtered['notice_level'].isin(['SEIZ', 'DLS'])
    ]
    df_dict['bankruptcy_tickets'] = df_filtered[
        df_filtered['ticket_queue'] == 'Bankruptcy'
    ]

    # group dataframes by ward
    gb_dict = dict()
    for key in df_dict:
        gb_dict[key] = df_dict[key].groupby('ward')

    # calculate the different stats
    out_dict = dict()
    ticket_count = gb_dict['filtered']['ticket_number'].count()
    out_dict['ticket_count'] = ticket_count
    out_dict['current_amount_due'] = gb_dict['filtered']['current_amount_due'].sum()
    out_dict['fine_level1_amount'] = gb_dict['filtered']['fine_level1_amount'].sum()
    out_dict['total_payments'] = gb_dict['filtered']['total_payments'].sum()
    out_dict['avg_per_ticket'] = out_dict['fine_level1_amount']/ticket_count
    out_dict['paid_pct'] = out_dict['total_payments']/(out_dict['current_amount_due']+out_dict['total_payments'])
    out_dict['police_ticket_count'] = gb_dict['police_tickets']['ticket_number'].count()
    out_dict['police_ticket_count_pct'] = out_dict['police_ticket_count'] / ticket_count
    out_dict['contested_ticket_count'] = gb_dict['contested_tickets']['ticket_number'].count()
    out_dict['contested_ticket_count_pct'] = out_dict['contested_ticket_count'] / ticket_count
    out_dict['paid_ticket_count'] = gb_dict['paid_tickets']['ticket_number'].count()
    out_dict['paid_ticket_count_pct'] = out_dict['paid_ticket_count'] / ticket_count
    out_dict['dismissed_ticket_count'] = gb_dict['dismissed_tickets']['ticket_number'].count()
    out_dict['dismissed_ticket_count_pct'] = out_dict['dismissed_ticket_count'] / ticket_count
    out_dict['seized_or_suspended_ticket_count'] = gb_dict['seized_or_suspended_tickets']['ticket_number'].count()
    out_dict['seized_or_suspended_ticket_count_pct'] = out_dict['seized_or_suspended_ticket_count'] / ticket_count
    out_dict['bankruptcy_ticket_count'] = gb_dict['bankruptcy_tickets']['ticket_number'].count()
    out_dict['bankruptcy_ticket_count_pct'] = out_dict['bankruptcy_ticket_count'] / ticket_count

    # calculate ranks; combine and format output dataframe
    df_out = pd.DataFrame()
    for key in out_dict:
        df_out[key] = out_dict[key]
        df_out[key+'_rank'] = rank_series(out_dict[key]).astype(int)
    df_out.index = df_out.index.astype(int)
    df_out = df_out.sort_index()
    
    return df_out

In [5]:
%%time
# takes quite a while...
df = read_in_csv()

CPU times: user 9min 15s, sys: 2min 30s, total: 11min 46s
Wall time: 12min 12s


In [6]:
%%time
df_1996to2018 = calculate_summary_stats(df, min_year=1995, max_year=2019)

CPU times: user 1min 22s, sys: 2min 39s, total: 4min 2s
Wall time: 4min 42s


In [7]:
%%time
df_2013to2017 = calculate_summary_stats(df, min_year=2012, max_year=2018)

CPU times: user 19.4 s, sys: 10.4 s, total: 29.9 s
Wall time: 24.4 s


In [8]:
df_1996to2018.to_csv('df_1996to2018.csv')
# df_1996to2018.to_csv()

In [9]:
df_2013to2017.to_csv('df_2013to2017.csv')
# df_2013to2017.to_csv()

In [10]:
df_check = pd.read_csv('./wardstotals (1).csv', index_col='ward').sort_index(ascending=True)
df_check5yr = pd.read_csv('./wardstotals5yr (1).csv', index_col='ward').sort_index(ascending=True)
df_1996to2018_check = df_check[df_1996to2018.columns.tolist()]
df_2013to2017_check = df_check5yr[df_2013to2017.columns.tolist()]

In [11]:
wardstotals_sql_minus_pandas = df_1996to2018_check - df_1996to2018
wardstotals5yr_sql_minus_pandas = df_2013to2017_check - df_2013to2017

In [12]:
df_1996to2018.head()

Unnamed: 0_level_0,ticket_count,ticket_count_rank,current_amount_due,current_amount_due_rank,fine_level1_amount,fine_level1_amount_rank,total_payments,total_payments_rank,avg_per_ticket,avg_per_ticket_rank,paid_pct,paid_pct_rank,police_ticket_count,police_ticket_count_rank,police_ticket_count_pct,police_ticket_count_pct_rank,contested_ticket_count,contested_ticket_count_rank,contested_ticket_count_pct,contested_ticket_count_pct_rank,paid_ticket_count,paid_ticket_count_rank,paid_ticket_count_pct,paid_ticket_count_pct_rank,dismissed_ticket_count,dismissed_ticket_count_rank,dismissed_ticket_count_pct,dismissed_ticket_count_pct_rank,seized_or_suspended_ticket_count,seized_or_suspended_ticket_count_rank,seized_or_suspended_ticket_count_pct,seized_or_suspended_ticket_count_pct_rank,bankruptcy_ticket_count,bankruptcy_ticket_count_rank,bankruptcy_ticket_count_pct,bankruptcy_ticket_count_pct_rank
ward,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
1,1681635,8,38422080.0,17,98467185,7,90108420.0,6,58.554434,39,0.701066,7,584989,14,0.347869,48,117098,8,0.069633,37,1158544,6,0.688939,6,107759,8,0.06408,40,383301,9,0.227934,41,5791,26,0.003444,38
2,2076974,3,34255860.0,22,117031080,3,109131700.0,3,56.346916,46,0.761096,4,818518,4,0.394092,46,179764,3,0.086551,15,1479128,4,0.712155,5,176272,3,0.08487,14,388287,6,0.186948,47,7167,21,0.003451,36
3,1068060,15,39425370.0,14,66560465,13,56067860.0,14,62.319032,28,0.58714,29,657643,9,0.615736,13,95109,12,0.089048,9,631895,18,0.591629,29,92175,13,0.086301,12,363786,12,0.340604,16,15058,13,0.014098,16
4,1700508,7,44506650.0,9,97472810,8,87362650.0,8,57.319819,45,0.662494,14,975703,3,0.573771,20,149170,6,0.087721,11,1081707,8,0.636108,18,155889,6,0.091672,4,486627,4,0.286166,26,16270,12,0.009568,20
5,1066278,16,39099470.0,15,67806820,12,58263710.0,13,63.592065,24,0.598416,25,476558,23,0.446936,39,88310,15,0.082821,22,642266,17,0.602344,26,92194,12,0.086463,11,364347,11,0.3417,15,16854,11,0.015806,15


In [13]:
wardstotals_sql_minus_pandas

Unnamed: 0_level_0,ticket_count,ticket_count_rank,current_amount_due,current_amount_due_rank,fine_level1_amount,fine_level1_amount_rank,total_payments,total_payments_rank,avg_per_ticket,avg_per_ticket_rank,paid_pct,paid_pct_rank,police_ticket_count,police_ticket_count_rank,police_ticket_count_pct,police_ticket_count_pct_rank,contested_ticket_count,contested_ticket_count_rank,contested_ticket_count_pct,contested_ticket_count_pct_rank,paid_ticket_count,paid_ticket_count_rank,paid_ticket_count_pct,paid_ticket_count_pct_rank,dismissed_ticket_count,dismissed_ticket_count_rank,dismissed_ticket_count_pct,dismissed_ticket_count_pct_rank,seized_or_suspended_ticket_count,seized_or_suspended_ticket_count_rank,seized_or_suspended_ticket_count_pct,seized_or_suspended_ticket_count_pct_rank,bankruptcy_ticket_count,bankruptcy_ticket_count_rank,bankruptcy_ticket_count_pct,bankruptcy_ticket_count_pct_rank
ward,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
1,0,0,2.673268e-05,0,0,0,-1.41263e-05,0,0.0,0,-1.787459e-13,0,0,0,5.5511150000000004e-17,0,0,0,0.0,0,0,0,1.110223e-16,0,0,0,1.387779e-17,0,0,0,-2.775558e-17,0,0,0,-4.336809e-19,0
2,0,0,1.467764e-05,0,0,0,-1.725554e-05,0,-7.105427e-15,0,-1.065814e-13,0,0,0,5.5511150000000004e-17,0,0,0,0.0,0,0,0,0.0,0,0,0,-1.387779e-17,0,0,0,0.0,0,0,0,-4.336809e-19,0
3,0,0,2.803653e-05,0,0,0,1.446903e-05,0,-2.131628e-14,0,-1.0969e-13,0,0,0,1.110223e-16,0,0,0,-1.387779e-17,0,0,0,0.0,0,0,0,-1.387779e-17,0,0,0,-5.5511150000000004e-17,0,0,0,0.0,0
4,0,0,1.683086e-05,0,0,0,-8.404255e-06,0,2.842171e-14,0,-1.062483e-13,0,0,0,0.0,0,0,0,-1.387779e-17,0,0,0,-1.110223e-16,0,0,0,-1.387779e-17,0,0,0,0.0,0,0,0,-3.469447e-18,0
5,0,0,2.440065e-05,0,0,0,1.393259e-05,0,7.105427e-15,0,-9.25926e-14,0,0,0,-1.110223e-16,0,0,0,-1.387779e-17,0,0,0,0.0,0,0,0,-1.387779e-17,0,0,0,5.5511150000000004e-17,0,0,0,-3.469447e-18,0
6,0,0,3.805012e-05,0,0,0,1.320988e-05,0,5.684342e-14,0,-1.217915e-13,0,0,0,0.0,0,0,0,0.0,0,0,0,-1.110223e-16,0,0,0,-1.387779e-17,0,0,0,0.0,0,0,0,3.469447e-18,0
7,0,0,1.917034e-05,0,0,0,4.161149e-06,0,4.263256e-14,0,-1.054157e-13,0,0,0,-1.110223e-16,0,0,0,1.387779e-17,0,0,0,-5.5511150000000004e-17,0,0,0,-1.387779e-17,0,0,0,0.0,0,0,0,0.0,0
8,0,0,1.595169e-05,0,0,0,6.012619e-06,0,1.421085e-14,0,-7.327472e-14,0,0,0,0.0,0,0,0,1.387779e-17,0,0,0,-1.110223e-16,0,0,0,-1.387779e-17,0,0,0,-1.110223e-16,0,0,0,0.0,0
9,0,0,-2.086163e-07,0,0,0,-5.885959e-07,0,-5.684342e-14,0,-5.77316e-15,0,0,0,1.110223e-16,0,0,0,-1.387779e-17,0,0,0,-1.110223e-16,0,0,0,-1.387779e-17,0,0,0,5.5511150000000004e-17,0,0,0,-3.469447e-18,0
10,0,0,-4.071742e-06,0,0,0,-6.705523e-07,0,4.263256e-14,0,4.884981e-14,0,0,0,1.110223e-16,0,0,0,0.0,0,0,0,0.0,0,0,0,-1.387779e-17,0,0,0,-5.5511150000000004e-17,0,0,0,-1.734723e-18,0


In [14]:
wardstotals5yr_sql_minus_pandas

Unnamed: 0_level_0,ticket_count,ticket_count_rank,current_amount_due,current_amount_due_rank,fine_level1_amount,fine_level1_amount_rank,total_payments,total_payments_rank,avg_per_ticket,avg_per_ticket_rank,paid_pct,paid_pct_rank,police_ticket_count,police_ticket_count_rank,police_ticket_count_pct,police_ticket_count_pct_rank,contested_ticket_count,contested_ticket_count_rank,contested_ticket_count_pct,contested_ticket_count_pct_rank,paid_ticket_count,paid_ticket_count_rank,paid_ticket_count_pct,paid_ticket_count_pct_rank,dismissed_ticket_count,dismissed_ticket_count_rank,dismissed_ticket_count_pct,dismissed_ticket_count_pct_rank,seized_or_suspended_ticket_count,seized_or_suspended_ticket_count_rank,seized_or_suspended_ticket_count_pct,seized_or_suspended_ticket_count_pct_rank,bankruptcy_ticket_count,bankruptcy_ticket_count_rank,bankruptcy_ticket_count_pct,bankruptcy_ticket_count_pct_rank
ward,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
1,0,0,-1.803041e-06,0,0,0,3.647059e-06,0,-1.421085e-14,0,6.117329e-14,0,0,0,0.0,0,0,0,0.0,0,0,0,0.0,0,0,0,-1.387779e-17,0,0,0,0.0,0,0,0,4.336809e-19,0
2,0,0,4.190952e-07,0,0,0,-1.266599e-07,0,-2.842171e-14,0,-9.547918e-15,0,0,0,-2.775558e-17,0,0,0,1.387779e-17,0,0,0,-1.110223e-16,0,0,0,1.387779e-17,0,0,0,0.0,0,0,0,0.0,0
3,0,0,-4.082918e-06,0,0,0,-1.02818e-06,0,1.421085e-14,0,5.861978e-14,0,0,0,-1.110223e-16,0,0,0,1.387779e-17,0,0,0,-1.110223e-16,0,0,0,-2.775558e-17,0,0,0,2.775558e-17,0,0,0,0.0,0
4,0,0,-1.659617e-06,0,0,0,8.493662e-07,0,4.263256e-14,0,4.085621e-14,0,0,0,-5.5511150000000004e-17,0,0,0,0.0,0,0,0,-1.110223e-16,0,0,0,-1.387779e-17,0,0,0,-2.775558e-17,0,0,0,-1.734723e-18,0
5,0,0,-4.494563e-06,0,0,0,-2.194196e-06,0,5.684342e-14,0,4.318768e-14,0,0,0,0.0,0,0,0,-1.387779e-17,0,0,0,-1.110223e-16,0,0,0,1.387779e-17,0,0,0,0.0,0,0,0,0.0,0
6,0,0,1.996756e-06,0,0,0,-1.819804e-06,0,-4.263256e-14,0,-5.645484e-14,0,0,0,0.0,0,0,0,0.0,0,0,0,1.110223e-16,0,0,0,-1.387779e-17,0,0,0,-5.5511150000000004e-17,0,0,0,0.0,0
7,0,0,-4.813075e-06,0,0,0,-9.145588e-07,0,2.842171e-14,0,4.907186e-14,0,0,0,0.0,0,0,0,-2.775558e-17,0,0,0,-5.5511150000000004e-17,0,0,-1,0.0,0,0,0,0.0,0,0,0,6.938894e-18,0
8,0,0,-4.524365e-06,0,0,0,-1.210719e-06,0,4.263256e-14,0,4.091172e-14,0,0,0,-1.110223e-16,0,0,0,-1.387779e-17,0,0,0,5.5511150000000004e-17,0,0,0,0.0,0,0,0,0.0,0,0,0,0.0,0
9,0,0,-2.145767e-06,0,0,0,-2.039596e-07,0,-1.421085e-14,0,4.490852e-14,0,0,0,-1.110223e-16,0,0,0,-1.387779e-17,0,0,0,0.0,0,0,-1,1.387779e-17,0,0,0,0.0,0,0,0,0.0,0
10,0,0,-5.979091e-07,0,0,0,-1.518056e-07,0,0.0,0,1.659783e-14,0,0,0,0.0,0,0,0,1.387779e-17,0,0,0,-1.110223e-16,0,0,-1,0.0,0,0,0,0.0,0,0,0,-3.469447e-18,0


In [15]:
df_compare = pd.DataFrame()
df_compare['5yr_ticket_count_identical'] = ~wardstotals5yr_sql_minus_pandas['ticket_count'].astype(bool)
df_compare['all_yr_ticket_count_identical'] = ~wardstotals_sql_minus_pandas['ticket_count'].astype(bool)
df_compare

Unnamed: 0_level_0,5yr_ticket_count_identical,all_yr_ticket_count_identical
ward,Unnamed: 1_level_1,Unnamed: 2_level_1
1,True,True
2,True,True
3,True,True
4,True,True
5,True,True
6,True,True
7,True,True
8,True,True
9,True,True
10,True,True


In [16]:
# df_full_compare = pd.DataFrame()
# columns = df_1996to2018.columns.tolist()
# for column in columns:

In [17]:
wardstotals5yr_sql_minus_pandas.to_csv('./wardstotals5yr_sql_minus_pandas.csv')
wardstotals_sql_minus_pandas.to_csv('./wardstotals_sql_minus_pandas.csv')
df_compare.to_csv('./ticket_count_identical.csv')

In [31]:
# Top five types of tickets and counts per ward, 2013-2017

In [44]:
%%time

df_2013_2017 = df[
    (df['year'] > 2012) & (df['year'] < 2018) & 
    (df['geocode_accuracy_type'].isin(['rooftop', 'range_interpolation', 'intersection', 'point'])) & 
    (df['geocoded_city'] == 'Chicago')
]
df_2013_2017 = df_2013_2017[df_2013_2017['ward'].notnull()]
df_2013_2017 = df_2013_2017[['ticket_number','year','violation_code']]

CPU times: user 11.6 s, sys: 16.7 s, total: 28.2 s
Wall time: 31.7 s


In [79]:
%%time
gb_2013_2017 = df_2013_2017.groupby(['year','violation_code'])

CPU times: user 358 µs, sys: 26 µs, total: 384 µs
Wall time: 369 µs


In [83]:
top_5_list = []
for year, new_df in gb_2013_2017.count().groupby('year'):
    top_5_list.append(new_df.nlargest(5, columns='ticket_number'))

In [84]:
df_top_five_2013_2017 = pd.concat(top_5_list)

In [86]:
df_top_five_2013_2017.to_csv('./top_five_violations_2013_2017.csv')

In [18]:
# %%time
# # check out ward 7
# df_filtered_check = df[
#     (df['year'] > 1995) & (df['year'] < 2019) & 
#     (df['geocode_accuracy_type'].isin(['rooftop', 'range_interpolation', 'intersection', 'point'])) & 
#     (df['geocoded_city'] == 'Chicago')
# ]
# df_filtered_check = df_filtered_check[df_filtered_check['ward'].notnull()]
# df_filtered_check = df_filtered_check[df_filtered_check['ward'] == '7']

In [19]:
# df_filtered_check
# df_ward7check = read_in_csv('./ward7test.csv')
# df_ward7check

In [20]:
# list_ward7sql = sorted(df_ward7check['ticket_number'].astype(int).tolist())
# list_ward7pandas = sorted(df_filtered_check['ticket_number'].astype(int).tolist())
# extra_rows = list(set(list_ward7sql) - set(list_ward7pandas))
# df_ward7check_out = df_ward7check.set_index('ticket_number')