# Assignment One

## Problem 1

This file contains code for problem 1.

In [1]:
import pandas as pd
import requests
import json
import matplotlib.pyplot as plt
from IPython.display import Image

## Functions 

In [2]:
def pull_community(link):
    '''
    This function pulls in the community boundary area for Chicago.

    link: API link

    returns: dictionary with community number as key, community name as value
    '''

    response = requests.get(link)
    d = response.json()

    comm_dict = []

    for comm in d:
        k = comm['area_num_1']
        l = comm['community']
        new = {'community_area': k, 'name': l}
        comm_dict.append(new)

    return pd.DataFrame(comm_dict)

    
def pull_data(api_link, inc = 50000):
    '''
    This function pulls data from the api_link and organizes it into a pandas dataframe.

    api_link: string containing api url
    inc: increment for data pull since api only returns (max) 50000 items at a time

    returns: dataframe
    '''

    off = 0

    full = []

    while True:
        url = f'{api_link}?$limit={inc}&$offset={off}'
        response = requests.get(url, timeout = 10)
        data = response.json()
        if data:
            full += data
            off += inc
        else: #empty response
            break

    return pd.DataFrame(full)

def avg_crime_nhood(*args):
    '''
    This function finds the mean number of crimes per community area.
    
    args: dataframes for each year 
    
    return: dataframe with community area and average crimes
    '''
    all_years = []
    
    for df in args:
        yr = df['year'].unique()[0]
        by_nhood = df.groupby(['community_area', 'primary_type']).count()[['id']]
        by_nhood = by_nhood.reset_index()
        col_name = f'{yr} Total'
        by_nhood.columns = ['community_area', 'type', col_name]
        all_years.append(by_nhood)
        
    if len(all_years) == 1:
        final_counts = all_years[0]
    elif len(all_years) == 2:
        final_counts = pd.merge(all_years[0], all_years[1], on = ['community_area','type'], how='outer').fillna(0)
        col1 = final_counts.columns[2]
        col2 = final_counts.columns[3]
        final_counts['Average'] = round((final_counts[col2]+final_counts[col1])/2,1)
        final_counts['Percent Change'] = round((final_counts[col2]/final_counts[col1] -1)*100,1)

        #final_counts = final_counts.sort_values('Percent Change', ascending=False)
    else:
        final_counts = pd.merge(all_years[0], all_years[1], on = 'community_area')
        for i in range(2, len(all_years)):
            final_counts = pd.merge(final_counts, all_years[i], on = 'community_area')
        
    return final_counts

def num_crimes_type(*args):
    '''
    This function calculates the total number of crimes committed across all 
    dataframes inputted.

    *args: dataframes containing crime records (each row is a crime)

    return: integer with number of crimes
    '''

    all_years = []

    for df in args:
        yr = df['year'].unique()[0]
        #print(yr)
        by_type = df.groupby('primary_type').count()[['id']]
        by_type = by_type.reset_index()
        col_name = f'{yr} Total'
        by_type.columns = ['Type', col_name]
        all_years.append(by_type)

    if len(all_years) == 1:
        final_counts = all_years[0]
    elif len(all_years) == 2:
        final_counts = pd.merge(all_years[0], all_years[1], on = 'Type')
        col1 = final_counts.columns[1]
        col2 = final_counts.columns[2]
        final_counts['Percent Change'] = round((final_counts[col2]/final_counts[col1]-1)*100,1)

        #final_counts = final_counts.sort_values('Percent Change', ascending=False)
    else:
        final_counts = pd.merge(all_years[0], all_years[1], on = 'Type')
        for i in range(2, len(all_years)):
            final_counts = pd.merge(final_counts, all_years[i], on = 'Type')

    return final_counts


def mk_table(data, filename = 'table.png', dpi = 800):
    '''
    This function makes a matplotlib table from a pandas dataframe.

    data: pandas dataframe
    filename: filename to save table png
    dpi = resolution

    '''
    plt.figure(figsize=(8,6))
    plt.axis('off')

    cell_text = []
    for row in range(len(data)):
        cell_text.append(data.iloc[row])

    tab = plt.table(cellText=cell_text, 
        colLabels=data.columns, 
        loc='center')

    tab.auto_set_font_size(False)
    tab.set_fontsize(5)
        
    #plt.title(title)
    plt.savefig(filename, dpi = dpi)

def mk_bar(df, x_col, y_col, title, filename = 'bar.png', dpi = 500):
    '''
    This function makes a horizontal bar chart from a dataframe.

    df: dataframe
    x_col: name of column for the x-axis argument in the plot
    y_col: name of column for the y-axis argument in the plot
    filename: filename to save barchart png
    dpi = resolution

    return: None
    '''

    df = df.sort_values(y_col, ascending=True)
    df.plot.barh(x=x_col, y=y_col, legend = None)
    plt.title(title)
    plt.tick_params(axis='y', which='major', labelsize=6.5)
    plt.savefig(filename, bbox_inches='tight', dpi=400)

In [3]:
# Links to use

community_areas = 'https://data.cityofchicago.org/resource/igwz-8jzy.json'
crime_2017_api = 'https://data.cityofchicago.org/resource/d62x-nvdr.json'
crime_2018_api = 'https://data.cityofchicago.org/resource/3i3m-jwuy.json'

In [4]:
nhood_dict = pull_community(community_areas)
df1 = pull_data(crime_2017_api)
df2 = pull_data(crime_2018_api)

x = num_crimes_type(df1, df2)
x['Type']=x['Type'].str.capitalize()

In [None]:
z=avg_crime_nhood(df1, df2)
#z=pd.merge(nhood_dict, z, on = 'community_area')
# z2 = z[['community_area', '2017 Total', '2018 Total']]
# #z2.plot.scatter(x='2017 Total', y='2018 Total')
# #z1.sort_values('Percent Change', ascending=False)
# z3 = z1[z1['community_area']=='1'].sort_values('2018 Total', ascending=False)

dict_n = []

#make pie chart of the 3?
for c in z['community_area'].unique():
    new = z[z['community_area']== c].sort_values('2018 Total', ascending=False)
    top_2018 =new.iloc[0]
    n = dict(top_2018)
    dict_n.append(n)

a = pd.DataFrame(dict_n)
a['type'].unique()
a

In [None]:
#biggest pct increase: concealed carry license
#biggest pct decrease: other narcotics
vals = x.sort_values('Percent Change', ascending=False)
high = vals.iloc[0]
low = vals.iloc[-1]

hightype = high['Type'].upper()
lowtype = low['Type'].upper()
#z[z['type']==hightype]
z[z['type']== lowtype]


In [None]:
dict(z3.iloc[9])

In [None]:
mk_table(x, 'table_by_type.png')

In [None]:
mk_bar(x, 'Type', 'Percent Change', 'Percent Change in Crimes by Type (2017 to 2018)', 'bar_type_chg.png')

In [None]:
Image('table_by_type.png')

In [None]:
Image('bar_type_chg.png')

In [None]:
x.sort_values('2018 Total', ascending=False)