In [5]:
import pandas as pd

class AcsData:
    def main(self):
        races = ['white', 'black', 'native', 'pacific_islander', 'other', 'asian']
        numeric_columns = ['total_population'] + races
        cols_to_keep = numeric_columns + ['fips']

        data = self.prepare_data(cols_to_keep, numeric_columns)

        summary = data.groupby('fips').sum()
        for race in races:
            col_name = race + '_population'
            summary[col_name] = summary[race] / summary['total_population']
        
        return summary

    def prepare_data(self, cols_to_keep, numeric_columns):
        data = self.load_data()
        data = self.add_fips_column(data)
        data = self.rename_columns(data)
        data = self.drop_unwanted_columns(data, cols_to_keep)
        data[numeric_columns] = data[numeric_columns].astype(int)
        return data

    def drop_unwanted_columns(self, data, cols_to_keep):
        data = data[cols_to_keep]
        return data

    def rename_columns(self, data):
        col_names = {
            'HD01_VD01': 'total_population',
            'HD01_VD03': 'white',
            'HD01_VD04': 'black',
            'HD01_VD05': 'native',
            'HD01_VD07': 'pacific_islander',
            'HD01_VD08': 'other',
            'HD01_VD06': 'asian',
            'fips': 'fips'
        }
        data.rename(columns=col_names, inplace=True)
        return data

    def add_fips_column(self, data):
        data['fips'] = data['GEO.id2'].astype('str').str[:5]
        return data

    def load_data(self):
        community_data = pd.read_csv("data/acs.csv", encoding="ISO-8859-1")
        data = community_data[1:]
        return data
    

In [21]:
class Stops:
    def __init__(self,filepath):
        self.df = self.load_dataframe(filepath)

    def load_dataframe(self, filepath):
        df = pd.read_csv(filepath)
        cols_to_drop = ['location_raw', 'county_name']
        df = df.drop(cols_to_drop,axis=1)
        return df



In [22]:
filepath = 'data/stop_data/CT-clean.csv'
stops = Stops(filepath)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,id,state,stop_date,stop_time,county_fips,fine_grained_location,police_department,driver_gender,driver_age_raw,driver_age,...,violation_raw,violation,search_conducted,search_type_raw,search_type,contraband_found,stop_outcome,is_arrested,officer_id,stop_duration
0,CT-2013-00001,CT,2013-10-01,00:01,9001.0,"00000 N I 95 (WESTPORT, T158) X 18 LL",State Police,F,69,69.0,...,Speed Related,Speeding,False,,,False,Ticket,False,1000002754,1-15 min
1,CT-2013-00002,CT,2013-10-01,00:02,9013.0,rte 195 storrs,State Police,M,20,20.0,...,Moving Violation,Moving violation,False,,,False,Verbal Warning,False,1000001903,1-15 min
2,CT-2013-00003,CT,2013-10-01,00:07,9011.0,Rt 32/whippoorwill,State Police,M,34,34.0,...,Speed Related,Speeding,False,,,False,Ticket,False,1000002711,1-15 min
3,CT-2013-00004,CT,2013-10-01,00:10,9001.0,I-84,State Police,M,46,46.0,...,Speed Related,Speeding,False,,,False,Written Warning,False,113658284,1-15 min
4,CT-2013-00005,CT,2013-10-01,00:10,9003.0,"00000 W I 84 (EAST HARTFORD, T043)E.OF XT.56",State Police,M,30,30.0,...,Speed Related,Speeding,False,,,False,Ticket,False,830814942,1-15 min


In [29]:
df = stops.df
df['driver_race_raw'].unique()

array(['Black', 'White', 'Hispanic', 'Native American', 'Asian'],
      dtype=object)