In [159]:
# Load some data

import os
import requests

def get_table_url(table_name, base_url=os.environ['NEWSROOMDB_URL']):
    return '{}table/json/{}'.format(os.environ['NEWSROOMDB_URL'], table_name)

def get_table_data(table_name):
    url = get_table_url(table_name)
    
    try:
        r = requests.get(url)
        return r.json()
    except:
        print("Request failed. Probably because the response is huge.  We should fix this.")
        return get_table_data(table_name)
    
homicides_raw = get_table_data('homicides')

In [160]:
# Pull homicides into a dataframe

import pandas as pd

homicides = pd.DataFrame(homicides_raw)

## Filter to year-to-day, 2015/2016 homicides only

In [161]:
# Add a column that contains date and time together and is a Python datetime
from datetime import datetime

def get_datetime(row):
    if not row['Occ Date']:
        return None
    
    dt_str = row['Occ Date']
    fmt_str = "%Y-%m-%d"
    if row['Occ Time']:
        dt_str = dt_str + " " + row['Occ Time']
        fmt_str += " %H:%M"
        
    return datetime.strptime(dt_str, fmt_str)

homicides['datetime'] = homicides.apply(get_datetime, axis=1)
homicides['year'] = homicides.apply(lambda row: row['datetime'].year, axis=1)

In [162]:
# Filter to homicides on or before this day

from datetime import datetime
day_of_year = datetime.now().timetuple().tm_yday

def get_day_of_year(row):
    try:
        return row['datetime'].timetuple().tm_yday
    except ValueError:
        return None

homicides['day_of_year'] = homicides.apply(get_day_of_year, axis=1)

homicides_with_day_of_year = homicides[homicides['day_of_year'] != None]

def get_month(row):
    return row['datetime'].month

homicides_with_day_of_year['month'] = homicides_with_day_of_year.apply(get_month, axis=1)

homicides_to_day = homicides_with_day_of_year[homicides_with_day_of_year['day_of_year'] <= day_of_year]

In [163]:
# Normalize district numbers

def normalize_district_number(d):
    try:
        return int(d)
    except ValueError:
        return d

homicides_to_day['District Number'] = homicides_to_day['District Number'].apply(normalize_district_number)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [164]:
# Get only 2015, 2016 homicides

homicides_to_day_2015 = homicides_to_day[homicides_to_day['year'] == 2015]
homicides_to_day_2016 = homicides_to_day[homicides_to_day['year'] == 2016]

## Median Age

In [165]:
from IPython.display import display
import numpy as np
import pandas as pd

median_age_2015 = homicides_to_day_2015['Age'][homicides_to_day_2015['Age'].apply(np.isreal)].median()
median_age_2016 = homicides_to_day_2016['Age'][homicides_to_day_2016['Age'].apply(np.isreal)].median()

median_age = pd.DataFrame({
    2015: pd.Series([median_age_2015], index=['Median age']),
    2016: pd.Series([median_age_2016], index=['Median age']),   
})
display(median_age)


Unnamed: 0,2015,2016
Median age,25.0,26.0


## Oldest/youngest

In [166]:
from IPython.display import display
import numpy as np
import pandas as pd

youngest_2015 = homicides_to_day_2015['Age'][homicides_to_day_2015['Age'].apply(np.isreal)].min()
youngest_2016 = homicides_to_day_2016['Age'][homicides_to_day_2016['Age'].apply(np.isreal)].min()
oldest_2015 = homicides_to_day_2015['Age'][homicides_to_day_2015['Age'].apply(np.isreal)].max()
oldest_2016 = homicides_to_day_2016['Age'][homicides_to_day_2016['Age'].apply(np.isreal)].max()

oldest_youngest = pd.DataFrame({
    2015: pd.Series([youngest_2015, oldest_2015], index=['Youngest victim age', 'Oldest victim age']), 
    2016: pd.Series([youngest_2016, oldest_2016], index=['Youngest victim age', 'Oldest victim age']),         
})
display(oldest_youngest)

Unnamed: 0,2015,2016
Youngest victim age,1,2
Oldest victim age,79,84


## By police district

In [167]:
from IPython.display import display

by_police_district = pd.DataFrame({
    2015: homicides_to_day_2015.groupby('District Number').size(),
    2016: homicides_to_day_2016.groupby('District Number').size(),
})

by_police_district['change'] = by_police_district[2016] - by_police_district[2015]
by_police_district['percent change'] = (((by_police_district[2016] - by_police_district[2015]) / by_police_district[2015]) * 100)

display(by_police_district)

Unnamed: 0,2015,2016,change,percent change
1.0,4.0,8,4.0,100.0
2.0,14.0,19,5.0,35.714286
3.0,15.0,29,14.0,93.333333
4.0,26.0,21,-5.0,-19.230769
5.0,19.0,23,4.0,21.052632
6.0,22.0,35,13.0,59.090909
7.0,25.0,55,30.0,120.0
8.0,19.0,39,20.0,105.263158
9.0,20.0,35,15.0,75.0
10.0,23.0,25,2.0,8.695652


## By month

In [168]:
from IPython.display import display

by_month = pd.DataFrame({
    2015: homicides_to_day_2015.groupby('month').size(),
    2016: homicides_to_day_2016.groupby('month').size(),
})

by_month['change'] = by_month[2016] - by_month[2015]
by_month['percent change'] = (((by_month[2016] - by_month[2015]) / by_month[2015]) * 100)

display(by_month)

Unnamed: 0_level_0,2015,2016,change,percent change
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,32,57.0,25.0,78.125
2.0,20,45.0,25.0,125.0
3.0,35,49.0,14.0,40.0
4.0,35,41.0,6.0,17.142857
5.0,49,68.0,19.0,38.77551
6.0,49,72.0,23.0,46.938776
7.0,54,71.0,17.0,31.481481
8.0,57,90.0,33.0,57.894737
9.0,2,,,


## By weekend

In [169]:
from datetime import datetime, timedelta
from dateutil import rrule
from dateutil import parser
from dateutil import relativedelta
from IPython.display import display

today = datetime.now().date()
fridays_2015 = rrule.rrule(rrule.WEEKLY, byweekday=relativedelta.FR(1), dtstart=datetime(2015,1,1), count=52)
fridays_2016 = rrule.rrule(rrule.WEEKLY, byweekday=relativedelta.FR(1), dtstart=datetime(2016,1,1), count=52)

day_of_year = datetime.now().timetuple().tm_yday
fridays_2015_to_date = [d.date() for d in fridays_2015 if d.timetuple().tm_yday <= day_of_year]
fridays_2016_to_date = [d.date() for d in fridays_2016 if d.timetuple().tm_yday <= day_of_year]

def get_weekends(fridays):
    weekends = []
    for friday in fridays:
        friday_dt = datetime(year=friday.year, month=friday.month, day=friday.day, hour=15, minute=0)
        monday_dt = (friday_dt + timedelta(days=3)).replace(hour=6, minute=0)
        weekends.append((friday_dt, monday_dt))
        
    return weekends
    
weekends_2015_to_date = get_weekends(fridays_2015_to_date)
weekends_2016_to_date = get_weekends(fridays_2016_to_date)

def is_weekend(timestamp):
    """Does the timestamp fall between Friday 3 p.m. and Monday 6 a.m."""
    if not timestamp:
        return False
    
    day_of_week = timestamp.weekday()
    
    if day_of_week > 0 and day_of_week < 4:
        return False
    
    if day_of_week == 4:
        # Friday
        
        # Same day, 3 p.m.
        start = datetime(timestamp.year, timestamp.month, timestamp.day, 15)
        
        return timestamp >= start
    
    if day_of_week == 0:
        # Monday
        
        # Same day, 6 a.m.
        end = datetime(timestamp.year, timestamp.month, timestamp.day, 6)
        
        return timestamp < end
        
    return True

homicides_to_day_2015_weekend = homicides_to_day_2015[homicides_to_day_2015.apply(lambda r: is_weekend(r['datetime']), axis=1)]
homicides_to_day_2016_weekend = homicides_to_day_2016[homicides_to_day_2016.apply(lambda r: is_weekend(r['datetime']), axis=1)]

def get_weekend_2015(row):
    for i, weekend in enumerate(weekends_2015_to_date):
        weekend_start, weekend_end = weekend
        if row['datetime'] >= weekend_start and row['datetime'] <= weekend_end:
            return i + 1
        
def get_weekend_2016(row):
    for i, weekend in enumerate(weekends_2016_to_date):
        weekend_start, weekend_end = weekend
        if row['datetime'] >= weekend_start and row['datetime'] <= weekend_end:
            return i + 1
        
homicides_to_day_2015_weekend['weekend'] = homicides_to_day_2015_weekend.apply(get_weekend_2015, axis=1)
homicides_to_day_2016_weekend['weekend'] = homicides_to_day_2016_weekend.apply(get_weekend_2016, axis=1)

by_weekend = pd.DataFrame({
    2015: homicides_to_day_2015_weekend.groupby('weekend').size(),
    2016: homicides_to_day_2016_weekend.groupby('weekend').size(),
})

by_weekend['change'] = by_weekend[2016] - by_weekend[2015]
by_weekend['percent change'] = (((by_weekend[2016] - by_weekend[2015]) / by_weekend[2015]) * 100)

display(by_weekend)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,2015,2016,change,percent change
weekend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2.0,2,0.0,0.0
2,1.0,7,6.0,600.0
3,4.0,4,0.0,0.0
4,5.0,5,0.0,0.0
5,4.0,9,5.0,125.0
6,2.0,4,2.0,100.0
7,2.0,7,5.0,250.0
8,3.0,5,2.0,66.666667
9,,2,,
10,5.0,1,-4.0,-80.0
