In [1]:
import pandas as pd

In [2]:
data_path = '/Users/josiah/nashville_data/data/Metro_Nashville_Police_Department_Calls_for_Service.csv'

In [3]:
%%capture --no-display

df = pd.read_csv(data_path)

In [4]:
# drop unwated/need columns
drop_columns = ['Tencode Description', 'Tencode Suffix', 'Tencode Suffix Description',
                'Disposition Description', 'Block', 'Street Name', 'Sector', 'Zone',
                'RPA','Mapped Location']

df.drop(drop_columns, axis=1, inplace=True)

In [14]:
# remove alpha portion of disposition code for simplicity
df['Disposition Code'] = df['Disposition Code'].apply(lambda x: str(x)[:-1] if str(x)[-1].isalpha() else x)

In [15]:
# split data into violent, medical, and vehicle dataframes
theft = df.loc[df['Tencode'].isin([50, 70, 71, 72, 1000])]
vehicle = df.loc[df['Tencode'].isin([32, 45, 46, 93])]
violent = df.loc[df['Tencode'].isin([13, 51, 52, 53, 54, 57, 66, 68, 83, 4000, 6000, 9000])]

In [16]:
def save(df_, name):
    path = f'/Users/josiah/nashville_data/data/{name}.csv'
    df_.to_csv(path, index=False)

In [17]:
def clean_df(df_, name):
    """Cleans and prepares to save the dataframe:
        * Converts 'Call Received' to datetime object.
        * Removes duplicate 'Event Number', kepping the one with the fewest NaNs.
        * Sort by oldest to newest.
        * Saves the data from to a .csv."""
    df_['Call Received'] = pd.to_datetime(df_['Call Received'])
    df_ = df_.loc[df_.notnull().sum(1).groupby(df_['Event Number']).idxmax()]
    df_ = df_.sort_values('Call Received')
    save(df_, name)

In [18]:
%%capture --no-display

clean_df(theft, 'theft')
clean_df(vehicle, 'vehicle')
clean_df(violent, 'violent')