# Crime data prep
---
- Cleaning records from source
- Refactoring
- Standardizing county names
- Generating new columns based on date (day of week, quarter, year)
- Combining the 97-15 data and 16-19 data

In [1]:
%run workspace.py

## Crime 16-19

In [2]:
head(read_raw('crime_16_19'))

9 cols x 1851996 rows


Unnamed: 0,pub_agency_name,county_name,incident_date,incident_hour,offense_name,crime_against,offense_category_name,offense_group,age_num
0,Westminster,JEFFERSON; ADAMS,08/26/2017,17.0,Aggravated Assault,Person,Assault Offenses,A,52.0
1,Westminster,JEFFERSON; ADAMS,11/22/2017,20.0,Aggravated Assault,Person,Assault Offenses,A,29.0
2,Westminster,JEFFERSON; ADAMS,12/28/2017,21.0,Motor Vehicle Theft,Property,Motor Vehicle Theft,A,


In [3]:
df = read_raw('''
SELECT
    NULL AS year,
    county_name AS county,
    pub_agency_name AS police_dept,
    incident_date AS date,
    NULL AS quarter,
    NULL AS month,
    NULL AS day_of_week,
    incident_hour AS hour,
    age_num AS age,
    crime_against,
    offense_name,
    offense_category_name AS offense_category
FROM crime_16_19
''')
head(df)

12 cols x 1851996 rows


Unnamed: 0,year,county,police_dept,date,quarter,month,day_of_week,hour,age,crime_against,offense_name,offense_category
0,,JEFFERSON; ADAMS,Westminster,08/26/2017,,,,17.0,52.0,Person,Aggravated Assault,Assault Offenses
1,,JEFFERSON; ADAMS,Westminster,11/22/2017,,,,20.0,29.0,Person,Aggravated Assault,Assault Offenses
2,,JEFFERSON; ADAMS,Westminster,12/28/2017,,,,21.0,,Property,Motor Vehicle Theft,Motor Vehicle Theft


In [4]:
# Filter
df = df.loc[
    (~ df.police_dept.isin(['State Patrol', 'Colorado Bureau of Investigation'])) &
    (~ df.police_dept.isna())
]

df = df.copy() # This avoids setting on copy of a slice warning later

# Since we're focused on county and not police department, replace dual county
# police department county values (Ex: "JEFFERSON; ADAMS") with just the primary (Ex: "JEFFERSON")
df.county = (
    df.county
    .str.split('; ')
    .str[0]
    .str.upper()
)

# Convert to datetime and parse out date parts
df['date'] = pd.to_datetime(df.date, infer_datetime_format=True)

df.year = df.date.dt.year.copy()
df.quarter = df.date.dt.quarter.copy()
df.month = df.date.dt.month.copy()
df.day_of_week = df.date.dt.day_of_week.copy()

df_16_19 = df
head(df_16_19)

12 cols x 1845650 rows


Unnamed: 0,year,county,police_dept,date,quarter,month,day_of_week,hour,age,crime_against,offense_name,offense_category
0,2017,JEFFERSON,Westminster,2017-08-26,3,8,5,17.0,52.0,Person,Aggravated Assault,Assault Offenses
1,2017,JEFFERSON,Westminster,2017-11-22,4,11,2,20.0,29.0,Person,Aggravated Assault,Assault Offenses
2,2017,JEFFERSON,Westminster,2017-12-28,4,12,3,21.0,,Property,Motor Vehicle Theft,Motor Vehicle Theft


## Crime 97-15

In [5]:
head(read_raw('crime_97_15'))

10 cols x 4952282 rows


Unnamed: 0,agency_name,agency_type_name,city_name,primary_county,incident_hour,offense_name,crime_against,offense_category_name,age_num,incident_date
0,Lyons Police Department,City,Lyons,Boulder,,,,,,
1,Kremmling Police Department,City,Kremmling,Grand,,,,,,
2,Oak Creek Police Department,City,Oak Creek,Routt,,,,,,


In [6]:
df = read_raw('''
SELECT
    NULL AS year,
    UPPER(primary_county) AS county,
    agency_name AS police_dept,
    incident_date AS date,
    NULL AS quarter,
    NULL AS month,
    NULL AS day_of_week,
    incident_hour AS hour,
    age_num AS age,
    crime_against,
    offense_name,
    offense_category_name AS offense_category
FROM crime_97_15
''')
head(df)

12 cols x 4952282 rows


Unnamed: 0,year,county,police_dept,date,quarter,month,day_of_week,hour,age,crime_against,offense_name,offense_category
0,,BOULDER,Lyons Police Department,,,,,,,,,
1,,GRAND,Kremmling Police Department,,,,,,,,,
2,,ROUTT,Oak Creek Police Department,,,,,,,,,


In [7]:
# Filter
df = df.loc[
    (~ df.police_dept.isin(['State Patrol', 'Colorado Bureau of Investigation'])) &
    (~ df.date.isna())
]
df = df.copy() # This avoids setting on copy of a slice warning later

df.date = pd.to_datetime(df.date, infer_datetime_format=True)

df.year = df.date.dt.year
df.quarter = df.date.dt.quarter
df.month = df.date.dt.month
df.day_of_week = df.date.dt.day_of_week

df_97_15 = df
head(df_97_15)

12 cols x 4925016 rows


Unnamed: 0,year,county,police_dept,date,quarter,month,day_of_week,hour,age,crime_against,offense_name,offense_category
24,1997,BOULDER,Longmont Police Department,1997-03-14,1,3,4,,15.0,Person,Fondling,Sex Offenses
25,1997,BOULDER,Longmont Police Department,1997-07-02,3,7,2,21.0,14.0,Property,Arson,Arson
26,1997,KIT CARSON,Kit Carson County Sheriff's Office,1997-01-20,1,1,0,22.0,58.0,Person,Simple Assault,Assault Offenses


## Combine into single dataset
---

In [8]:
# columns must be the same before concat on axis 0
assert list(df_16_19.columns) == list(df_97_15.columns)
df_all = (
    pd.concat([df_97_15, df_16_19], axis=0)
    .drop_cols('police_dept')
    .reset_index(drop=True)
)
head(df_all)

11 cols x 6770666 rows


Unnamed: 0,year,county,date,quarter,month,day_of_week,hour,age,crime_against,offense_name,offense_category
0,1997,BOULDER,1997-03-14,1,3,4,,15.0,Person,Fondling,Sex Offenses
1,1997,BOULDER,1997-07-02,3,7,2,21.0,14.0,Property,Arson,Arson
2,1997,KIT CARSON,1997-01-20,1,1,0,22.0,58.0,Person,Simple Assault,Assault Offenses


In [9]:
write_main(df_all, 'crime_records')

6770666

---
---
---

# Aggregating crime data

`crime_agg_category.csv`: 32 columns (crime counts broken down by offense **category**)

`crime_agg_name.csv`: 54 columns (crime counts broken down by offense **name**)

### Creates two aggregated datasets. Both include:
- First, ALL values in `crime_against`, `offense_name`, and `offense_category` are first renamed to shorter alternatives in *snake_case*. This was done in preparation for dummifying those columns, to make for friendly column names.
- Data is grouped by year and county, and include the following aggregated columns:
  - Crime count
  - Average age
  - Mode quarter (which quarter had the most crimes?)
  - Mode month (which month had the most crimes?)
  - Mode day of week (mon-fri => 1-7)
  - Mode hour of day (military time)
  - The original `crime_against` column was dummified, and summed during aggregation, to show total crime counts for each:
    - `against_person`
    - `against_society`
    - `against_property`
    - `not_a_crime`

#### Additional columns in `crime_agg_category` data:
- The original `offense_category` column was dummified, and then summed during aggregation, showing total crime counts broken up by offense category

#### Additional columns in `crime_agg_name` data:
- The original `offense_name` column was dummified, and then summed during aggregation, showing total crime counts broken up by offense name.

In [10]:
%run workspace.py

In [12]:
df_raw = read_main('crime_records')
head(df_raw)

11 cols x 6770666 rows


Unnamed: 0,year,county,date,quarter,month,day_of_week,hour,age,crime_against,offense_name,offense_category
0,1997,BOULDER,1997-03-14 00:00:00,1,3,4,,15.0,Person,Fondling,Sex Offenses
1,1997,BOULDER,1997-07-02 00:00:00,3,7,2,21.0,14.0,Property,Arson,Arson
2,1997,KIT CARSON,1997-01-20 00:00:00,1,1,0,22.0,58.0,Person,Simple Assault,Assault Offenses


#### Remap all values in categorical columns based on excel sheet
- We created 3 tables by hand in excel to rename EACH value in offense_name, offense_category and crime_against. This needed to be done in order to create dummy columns with friendly names.

In [13]:
read_sht = lambda sheet: pd.read_excel('crime_renaming_map.xlsx', sheet_name=sheet)
name = read_sht('offense_name')
cat = read_sht('offense_category')
against = read_sht('crime_against')

df = df_raw.copy()
# Create dict from 2 cols from excel file, and pass it to series.map()
df.offense_name = df.offense_name.map(dict(zip(name.OLD, name.NEW)))
df.offense_category = df.offense_category.map(dict(zip(cat.OLD, cat.NEW)))
df.crime_against = df.crime_against.map(dict(zip(against.OLD, against.NEW)))
df_refactored = df
display(name, cat, against, df_refactored)

Unnamed: 0,OLD,NEW
0,Simple Assault,assault_simple
1,Intimidation,intimidation
2,Fondling,fondling
3,Rape,rape
4,Impersonation,impersonation
5,Robbery,robbery
6,Arson,arson
7,Destruction/Damage/Vandalism of Property,property_damage
8,Theft From Motor Vehicle,theft_from_vehicle
9,Burglary/Breaking & Entering,burglary


Unnamed: 0,OLD,NEW
0,Assault Offenses,assault
1,Sex Offenses,sex_offense
2,Fraud Offenses,fraud
3,Robbery,robbery
4,Arson,arson
5,Destruction/Damage/Vandalism of Property,property_damage
6,Larceny/Theft Offenses,larceny_theft
7,Burglary/Breaking & Entering,burglary
8,Homicide Offenses,homicide
9,Drug/Narcotic Offenses,drug


Unnamed: 0,OLD,NEW
0,Person,against_person
1,Property,against_property
2,Society,against_society
3,Not a Crime,not_a_crime


Unnamed: 0,year,county,date,quarter,month,day_of_week,hour,age,crime_against,offense_name,offense_category
0,1997,BOULDER,1997-03-14 00:00:00,1,3,4,,15.0,against_person,fondling,sex_offense
1,1997,BOULDER,1997-07-02 00:00:00,3,7,2,21.0,14.0,against_property,arson,arson
2,1997,KIT CARSON,1997-01-20 00:00:00,1,1,0,22.0,58.0,against_person,assault_simple,assault
3,1997,KIT CARSON,1997-01-18 00:00:00,1,1,5,,21.0,against_property,other_larceny,larceny_theft
4,1997,KIT CARSON,1997-03-31 00:00:00,1,3,0,,,against_property,property_damage,property_damage
...,...,...,...,...,...,...,...,...,...,...,...
6770661,2019,BOULDER,2019-10-16 00:00:00,4,10,2,21.0,,against_property,other_larceny,larceny_theft
6770662,2019,BOULDER,2019-10-16 00:00:00,4,10,2,21.0,,against_property,other_larceny,larceny_theft
6770663,2019,ARAPAHOE,2019-06-01 00:00:00,2,6,5,18.0,20.0,against_property,shoplifting,larceny_theft
6770664,2019,ADAMS,2019-01-21 00:00:00,1,1,0,12.0,15.0,against_property,property_damage,property_damage


### Aggregated datasets
1. Version 1: includes crime_category dummy sums
2. Version 2: includes crime_name dummy sums

In [14]:
INDEX = ['year', 'county']
df = df_refactored.copy()

def dummies_special(df, include:str, exclude:str) -> pd.DataFrame:
    return (
        pd.get_dummies(df,
            columns=['crime_against', include],
            prefix="", prefix_sep=""
        )
        .drop_cols(exclude, 'date', 'quarter', 'month', 'day_of_week', 'hour', 'age')
        .groupby(INDEX)
        .sum()
        .reset_index()
    )

dum_cat = dummies_special(df, 'offense_category', 'offense_name')
dum_name = dummies_special(df, 'offense_name', 'offense_category')

In [15]:
# Convert these to modes
mode_cols = ['quarter', 'month', 'day_of_week', 'hour']
df_modes = df[INDEX + mode_cols]

# For the record, pandas.Series.mode() totally sucks!!! When there's multiple modes
# it puts each of them in a numpy.ndarray as a VALUE in the cell, so you have mixed values.
# And you can't even safely index it because sometimes those arrays are EMPTY :(
# Pandas, for the love of god please give us the option to return only one mode.
# I applied the following function to fix this.
def first_in_list(x):
    """ pd.Series.mode returns ndarray when multiple modes. Safely convert to float """
    if type(x) == np.ndarray:
        if x.size > 0:
            return float(x[0])
        return np.nan
    return float(x)

# df.applymap() is just like apply but instead of acting on an axis, it acts on each cell in df
df_modes = (df_modes
    .groupby(INDEX)
    .agg(pd.Series.mode)
    .applymap(first_in_list)
    .reset_index()
    .rename(columns={c: f'{c}_mode' for c in mode_cols})
)

head(df_modes)

6 cols x 1397 rows


Unnamed: 0,year,county,quarter_mode,month_mode,day_of_week_mode,hour_mode
0,1997,ADAMS,1.0,3.0,0.0,17.0
1,1997,ALAMOSA,3.0,8.0,5.0,18.0
2,1997,ARAPAHOE,3.0,8.0,4.0,18.0


In [16]:
# Convert count and average
df_count = (df
    [INDEX + ['date']]
    .groupby(INDEX)
    .count()
    .reset_index()
    .rename(columns={'date': 'cr_count'})
)
df_avg = (df
    [INDEX + ['age']]
    .groupby(INDEX)
    .mean()
    .reset_index()
    .rename(columns={'age': 'age_avg'})
)
head(df_count, df_avg)

3 cols x 1397 rows


Unnamed: 0,year,county,cr_count
0,1997,ADAMS,22947
1,1997,ALAMOSA,404
2,1997,ARAPAHOE,37555


3 cols x 1397 rows


Unnamed: 0,year,county,age_avg
0,1997,ADAMS,24.582071
1,1997,ALAMOSA,27.098901
2,1997,ARAPAHOE,25.209156


#### Stitching everything together

In [17]:
# Numerical aggregations: counts, avgs, modes
df = (df_count
    .merge(df_avg, on=INDEX)
    .merge(df_modes, on=INDEX)
)
head(df)

8 cols x 1397 rows


Unnamed: 0,year,county,cr_count,age_avg,quarter_mode,month_mode,day_of_week_mode,hour_mode
0,1997,ADAMS,22947,24.582071,1.0,3.0,0.0,17.0
1,1997,ALAMOSA,404,27.098901,3.0,8.0,5.0,18.0
2,1997,ARAPAHOE,37555,25.209156,3.0,8.0,4.0,18.0


### Bring in total county population by year. This will be needed to calculate proportions

In [18]:
pop = read_main('''
SELECT
    year,
    county,
    total as pop
from county_population
WHERE year >= 1997 AND year <= 2019
''')

head(pop)

3 cols x 1472 rows


Unnamed: 0,year,county,pop
0,1997,ADAMS,167740.0
1,1997,ALAMOSA,7173.0
2,1997,ARAPAHOE,225524.0


In [19]:
# Summed aggregations for dummies: 2 versions
def create_merged_version(df, dummy_df) -> pd.DataFrame:
    global pop
    return (df
        .merge(dummy_df, on=INDEX)
        .merge(pop, on=INDEX)
        .move_col('pop', 2)
    )
df_cat = create_merged_version(df, dum_cat)
df_name = create_merged_version(df, dum_name)
head(df_cat, df_name)

33 cols x 1397 rows


Unnamed: 0,year,county,pop,cr_count,age_avg,quarter_mode,month_mode,day_of_week_mode,hour_mode,against_person,...,kidnapping,larceny_theft,porn,property_damage,prostitution,robbery,sex_offense,stolen_property,vehicle_theft,weapon_law
0,1997,ADAMS,167740.0,22947,24.582071,1.0,3.0,0.0,17.0,3047,...,50,8023,1,5467,14,189,316,245,1317,274
1,1997,ALAMOSA,7173.0,404,27.098901,3.0,8.0,5.0,18.0,101,...,0,165,0,73,0,0,9,0,6,4
2,1997,ARAPAHOE,225524.0,37555,25.209156,3.0,8.0,4.0,18.0,4568,...,249,14345,0,5856,198,238,434,421,2537,695


55 cols x 1397 rows


Unnamed: 0,year,county,pop,cr_count,age_avg,quarter_mode,month_mode,day_of_week_mode,hour_mode,against_person,...,shoplifting,sodomy,stolen_property,theft_from_building,theft_from_vehicle,theft_from_vending_machine,vehicle_part_theft,vehicle_theft,weapon_law,wire_fraud
0,1997,ADAMS,167740.0,22947,24.582071,1.0,3.0,0.0,17.0,3047,...,1323,3,245,1232,1733,53,1293,1317,274,0
1,1997,ALAMOSA,7173.0,404,27.098901,3.0,8.0,5.0,18.0,101,...,0,0,0,14,7,10,1,6,4,1
2,1997,ARAPAHOE,225524.0,37555,25.209156,3.0,8.0,4.0,18.0,4568,...,4134,0,421,2283,3867,72,677,2537,695,2


In [21]:
INDEX = ['year', 'county']

BASE_COLS = ['pop', 'cr_count', 'age_avg', 'quarter_mode', 'month_mode', 'day_of_week_mode', 'hour_mode', 'against_person', 'against_property', 'against_society', 'not_a_crime']

df_base = (df_cat
    .copy()
    [INDEX + BASE_COLS]
    .insert_at(5, 'cr_rate', df_cat.cr_count / df_cat['pop'] * 100_000)
)
for i in ['against_person', 'against_property', 'against_society']:
    df_base[f'{i}_rate'] = df_base[i] / df_base['pop'] * 100_000

df_cat = df_cat.drop(columns=BASE_COLS)
df_name = df_name.drop(columns=BASE_COLS)

head(df_base, df_cat, df_name)

17 cols x 1397 rows


Unnamed: 0,year,county,pop,cr_count,age_avg,cr_rate,quarter_mode,month_mode,day_of_week_mode,hour_mode,against_person,against_property,against_society,not_a_crime,against_person_rate,against_property_rate,against_society_rate
0,1997,ADAMS,167740.0,22947,24.582071,13680.100155,1.0,3.0,0.0,17.0,3047,17766,2134,0,1816.501729,10591.391439,1272.206987
1,1997,ALAMOSA,7173.0,404,27.098901,5632.231981,3.0,8.0,5.0,18.0,101,264,39,0,1408.057995,3680.468423,543.705563
2,1997,ARAPAHOE,225524.0,37555,25.209156,16652.329686,3.0,8.0,4.0,18.0,4568,28573,4414,0,2025.505046,12669.605009,1957.219631


22 cols x 1397 rows


Unnamed: 0,year,county,arson,assault,bribery,burglary,drug,embezzlement,extortion,forgery,...,kidnapping,larceny_theft,porn,property_damage,prostitution,robbery,sex_offense,stolen_property,vehicle_theft,weapon_law
0,1997,ADAMS,86,2672,2,1931,1845,26,5,244,...,50,8023,1,5467,14,189,316,245,1317,274
1,1997,ALAMOSA,1,92,0,13,35,0,0,3,...,0,165,0,73,0,0,9,0,6,4
2,1997,ARAPAHOE,138,3872,0,3200,3514,191,12,672,...,249,14345,0,5856,198,238,434,421,2537,695


44 cols x 1397 rows


Unnamed: 0,year,county,arson,assault_aggravated,assault_simple,bribery,burglary,credit_card_machine_fraud,drug_equipment,drug_narcotic,...,shoplifting,sodomy,stolen_property,theft_from_building,theft_from_vehicle,theft_from_vending_machine,vehicle_part_theft,vehicle_theft,weapon_law,wire_fraud
0,1997,ADAMS,86,602,1954,2,1931,44,725,1120,...,1323,3,245,1232,1733,53,1293,1317,274,0
1,1997,ALAMOSA,1,14,72,0,13,0,8,27,...,0,0,0,14,7,10,1,6,4,1
2,1997,ARAPAHOE,138,671,2956,0,3200,194,916,2598,...,4134,0,421,2283,3867,72,677,2537,695,2


### Calculate rates and separate them

In [22]:
def add_rates(df):
    result = df.copy()[INDEX]
    for c in [c for c in df.columns if c not in df_base.columns]:
        result[f'{c}_rate'] = df[c] / df_base['pop'] * 100_000
    return result

df_cat_rate = add_rates(df_cat)
df_name_rate = add_rates(df_name)
df_base = df_base.drop(columns='pop')

In [23]:
df_base_rate = df_base[INDEX + ['cr_rate', 'age_avg', 'quarter_mode', 'month_mode', 'day_of_week_mode', 'hour_mode', 'against_person_rate', 'against_property_rate', 'against_society_rate']]
df_base_count = df_base[INDEX + ['cr_count', 'against_person', 'against_property', 'against_society', 'not_a_crime']]

In [24]:
head(df_base_rate)

11 cols x 1397 rows


Unnamed: 0,year,county,cr_rate,age_avg,quarter_mode,month_mode,day_of_week_mode,hour_mode,against_person_rate,against_property_rate,against_society_rate
0,1997,ADAMS,13680.100155,24.582071,1.0,3.0,0.0,17.0,1816.501729,10591.391439,1272.206987
1,1997,ALAMOSA,5632.231981,27.098901,3.0,8.0,5.0,18.0,1408.057995,3680.468423,543.705563
2,1997,ARAPAHOE,16652.329686,25.209156,3.0,8.0,4.0,18.0,2025.505046,12669.605009,1957.219631


In [25]:
head(df_base_count)

7 cols x 1397 rows


Unnamed: 0,year,county,cr_count,against_person,against_property,against_society,not_a_crime
0,1997,ADAMS,22947,3047,17766,2134,0
1,1997,ALAMOSA,404,101,264,39,0
2,1997,ARAPAHOE,37555,4568,28573,4414,0


In [26]:
head(df_cat)

22 cols x 1397 rows


Unnamed: 0,year,county,arson,assault,bribery,burglary,drug,embezzlement,extortion,forgery,...,kidnapping,larceny_theft,porn,property_damage,prostitution,robbery,sex_offense,stolen_property,vehicle_theft,weapon_law
0,1997,ADAMS,86,2672,2,1931,1845,26,5,244,...,50,8023,1,5467,14,189,316,245,1317,274
1,1997,ALAMOSA,1,92,0,13,35,0,0,3,...,0,165,0,73,0,0,9,0,6,4
2,1997,ARAPAHOE,138,3872,0,3200,3514,191,12,672,...,249,14345,0,5856,198,238,434,421,2537,695


In [27]:
head(df_cat_rate)

22 cols x 1397 rows


Unnamed: 0,year,county,arson_rate,assault_rate,bribery_rate,burglary_rate,drug_rate,embezzlement_rate,extortion_rate,forgery_rate,...,kidnapping_rate,larceny_theft_rate,porn_rate,property_damage_rate,prostitution_rate,robbery_rate,sex_offense_rate,stolen_property_rate,vehicle_theft_rate,weapon_law_rate
0,1997,ADAMS,51.269822,1592.941457,1.192321,1151.18636,1099.916537,15.500179,2.980804,145.463217,...,29.808036,4782.997496,0.596161,3259.210683,8.34625,112.674377,188.386789,146.059378,785.143675,163.348039
1,1997,ALAMOSA,13.941168,1282.587481,0.0,181.235188,487.940889,0.0,0.0,41.823505,...,0.0,2300.292765,0.0,1017.705284,0.0,0.0,125.470514,0.0,83.64701,55.764673
2,1997,ARAPAHOE,61.190827,1716.890442,0.0,1418.917721,1558.149022,84.691651,5.320941,297.972721,...,110.409535,6360.742094,0.0,2596.619429,87.795534,105.532005,192.440716,186.676363,1124.935705,308.171192


In [28]:
head(df_name)

44 cols x 1397 rows


Unnamed: 0,year,county,arson,assault_aggravated,assault_simple,bribery,burglary,credit_card_machine_fraud,drug_equipment,drug_narcotic,...,shoplifting,sodomy,stolen_property,theft_from_building,theft_from_vehicle,theft_from_vending_machine,vehicle_part_theft,vehicle_theft,weapon_law,wire_fraud
0,1997,ADAMS,86,602,1954,2,1931,44,725,1120,...,1323,3,245,1232,1733,53,1293,1317,274,0
1,1997,ALAMOSA,1,14,72,0,13,0,8,27,...,0,0,0,14,7,10,1,6,4,1
2,1997,ARAPAHOE,138,671,2956,0,3200,194,916,2598,...,4134,0,421,2283,3867,72,677,2537,695,2


In [29]:
head(df_name_rate)

44 cols x 1397 rows


Unnamed: 0,year,county,arson_rate,assault_aggravated_rate,assault_simple_rate,bribery_rate,burglary_rate,credit_card_machine_fraud_rate,drug_equipment_rate,drug_narcotic_rate,...,shoplifting_rate,sodomy_rate,stolen_property_rate,theft_from_building_rate,theft_from_vehicle_rate,theft_from_vending_machine_rate,vehicle_part_theft_rate,vehicle_theft_rate,weapon_law_rate,wire_fraud_rate
0,1997,ADAMS,51.269822,358.888756,1164.898057,1.192321,1151.18636,26.231072,432.216526,667.700012,...,788.720639,1.788482,146.059378,734.470013,1033.146536,31.596518,770.835817,785.143675,163.348039,0.0
1,1997,ALAMOSA,13.941168,195.176356,1003.764115,0.0,181.235188,0.0,111.529346,376.411543,...,0.0,0.0,0.0,195.176356,97.588178,139.411683,13.941168,83.64701,55.764673,13.941168
2,1997,ARAPAHOE,61.190827,297.52931,1310.725244,0.0,1418.917721,86.021887,406.165197,1151.983824,...,1833.06433,0.0,186.676363,1012.309111,1714.673383,31.925649,300.18978,1124.935705,308.171192,0.886824


### Output

In [30]:
write_main(df_base_count, 'crime_agg_base_count')
write_main(df_base_rate, 'crime_agg_base_rate')
write_main(df_cat, 'crime_agg_category')
write_main(df_name, 'crime_agg_name')
write_main(df_cat_rate, 'crime_agg_category_rate')
write_main(df_name_rate, 'crime_agg_name_rate')

1397