In [18]:
import pandas as pd

class AcsData:
    def __init__(self,filepath="data/acs.csv"):
        self.filepath = filepath
        self.df = self.load_data()
        self.rename_columns()
        self.summary = self.create_summary()

    def create_summary(self):
        races = ['white', 'black', 'other', 'asian','hispanic']

        summary = self.df.groupby('fips').sum()
        for race in races:
            col_name = race + '_percentage'
            summary[col_name] = summary[race] / summary['total_population']

        return summary

    def rename_columns(self):
        fips = self.df['GEO.id2'].astype('str').str[:5]

        hispanic_values = ['HD01_VD12','HD01_VD13','HD01_VD14','HD01_VD15','HD01_VD16','HD01_VD17','HD01_VD18',
                           'HD01_VD19','HD01_VD20','HD01_VD21']

        race_cols = ['HD01_VD01','HD01_VD03','HD01_VD04','HD01_VD06']
        native_cols = ['HD01_VD05','HD01_VD07','HD01_VD08']

        combined_cols = hispanic_values + race_cols + native_cols
        df = self.df[combined_cols]
        df[combined_cols] = df[combined_cols].astype(int)
        df['fips'] = fips

        df['hispanic'] = df[hispanic_values].sum(axis=1)
        df['other'] = df[native_cols].sum(axis=1)

        df.drop(hispanic_values,axis=1,inplace=True)
        df.drop(native_cols, axis=1, inplace=True)
        col_names = {
            'HD01_VD01': 'total_population',
            'HD01_VD03': 'white',
            'HD01_VD04': 'black',
            'HD01_VD06': 'asian',
            'fips': 'fips',
            'hispanic':'hispanic',
            'other':'other'
        }

        df.rename(columns=col_names, inplace=True)
        self.df = df

    def load_data(self):
        community_data = pd.read_csv(self.filepath, encoding="ISO-8859-1")
        data = community_data[1:]
        return data

In [7]:
import pandas as pd

class Stop:
    def __init__(self,filepath):
        self.df = self.load_dataframe(filepath)
        self.summary = self.create_summary()

    def load_dataframe(self, filepath):
        df = pd.read_csv(filepath)
        cols_to_drop = ['location_raw', 'county_name','driver_race_raw']
        df = df.drop(cols_to_drop,axis=1)
        df['driver_race'] = df['driver_race'].str.lower()
        df['county_fips'] = df['county_fips'].astype(str)
        df = df[df['county_fips'].notna()]
        return df

    def create_summary(self):
        summary = self.df.groupby(['county_fips', 'driver_race']).agg('count')
        summary = summary[['id']]
        summary['individual'] = summary['id']
        summary = summary[['individual']]
        summary['percentage'] = summary['individual'] / summary['individual'].groupby(level=0).sum()
        return summary

In [8]:
filepath = 'data/stop_data/WY-clean.csv'
stops = Stop(filepath)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [9]:
df = stops.df
df.county_fips.unique()

array(['56025.0', '56039.0', '56037.0', '56031.0', '56035.0', '56021.0',
       '56029.0', '56023.0', '56041.0', '56015.0', '56027.0', '56009.0',
       '56003.0', '56019.0', '56011.0', '56007.0', '56045.0', '56017.0',
       '56001.0', '56005.0', 'nan', '56043.0', '56013.0', '56033.0'],
      dtype=object)

In [11]:
stops.summary

Unnamed: 0_level_0,Unnamed: 1_level_0,individual,percentage
county_fips,driver_race,Unnamed: 2_level_1,Unnamed: 3_level_1
56001.0,asian,312,0.017473
56001.0,black,499,0.027946
56001.0,hispanic,1178,0.065972
56001.0,other,42,0.002352
56001.0,white,15825,0.886257
56003.0,asian,37,0.014324
56003.0,black,13,0.005033
56003.0,hispanic,93,0.036005
56003.0,other,20,0.007743
56003.0,white,2420,0.936895


In [141]:
summary[56001.0]

driver_race
asian         312
black         499
hispanic     1178
other          42
white       15825
dtype: object

In [19]:
acs = AcsData()
acs_summary = acs.summary

  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/

In [20]:
acs_summary

Unnamed: 0_level_0,total_population,white,black,asian,hispanic,other,white_percentage,black_percentage,other_percentage,asian_percentage,hispanic_percentage
fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10001,171474,108515,40690,3401,23779,1461,0.632836,0.237295,0.008520,0.019834,0.138674
10003,551997,326993,130912,28383,106542,2435,0.592382,0.237161,0.004411,0.051419,0.193012
10005,211224,158057,25948,2379,39438,1463,0.748291,0.122846,0.006926,0.011263,0.186712
10010,55049,41663,10113,444,2851,355,0.756835,0.183709,0.006449,0.008066,0.051790
10030,199510,165950,18406,1325,17856,1533,0.831788,0.092256,0.007684,0.006641,0.089499
10039,0,0,0,0,0,0,,,,,
10059,26614,12212,12745,118,2300,244,0.458856,0.478883,0.009168,0.004434,0.086421
10070,22572,16876,4788,14,1004,81,0.747652,0.212121,0.003589,0.000620,0.044480
10090,57704,50582,899,92,10210,301,0.876577,0.015580,0.005216,0.001594,0.176937
10119,10552,2288,7967,84,26,85,0.216831,0.755023,0.008055,0.007961,0.002464
