In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.graphics.gofplots import qqplot
from matplotlib.patches import Patch

In [2]:
# Define some lists we will use later
violent_crime = [
    'AGG ROBBERY/DEADLY WEAPON',
    'ROBBERY BY ASSAULT',
    'AGG ASLT W/MOTOR VEH FAM/DAT V',
    'AGG ASLT STRANGLE/SUFFOCATE',
    'AGG ASSAULT',
    'AGG ASLT ENHANC STRANGL/SUFFOC',
    'RAPE',
    'DEADLY CONDUCT',
    'AGG ASSAULT FAM/DATE VIOLENCE',
    'AGG RAPE OF A CHILD',
    'AGG RAPE',
    'ROBBERY BY THREAT',
    'AGG ROBBERY BY ASSAULT',
    'RAPE OF A CHILD',
    'AGG ASSAULT WITH MOTOR VEH',
    'MURDER',
    'AGG ASSAULT ON PUBLIC SERVANT',
    'DEADLY CONDUCT FAM/DATE VIOL',
    'MANSLAUGHTER'
]

In [3]:
# Import datasets
austin_crime = pd.read_csv('data/crime-housing-austin-2015.csv')
austin_weather = pd.read_csv('data/austin_weather.csv')
population_data = pd.read_csv('data/AustinZipCodes.csv')

percentage_columns = [
    'Populationbelowpovertylevel', 'Non-WhiteNon-HispanicorLatino', 
    'HispanicorLatinoofanyrace', 'Populationwithdisability', 'Unemployment',
    'Largehouseholds(5+members)', 'Homesaffordabletopeopleearninglessthan$50000',
    'Rentalsaffordabletopeopleearninglessthan$25000', 'Rent-restrictedunits',
    'HousingChoiceVoucherholders', 'Percentageofrentalunitsinpoorcondition',
    'Percentchangeinnumberofhousingunits2000-2012',
    'Changeinpercentageofpopulationbelowpoverty2000-2012',
    'Changeinmedianrent2000-2012', 'Changeinmedianhomevalue2000-2012',
    'Percentageofhomeswithin1/4-mioftransitstop',
    'Percentageofhousingandtransportationcoststhatistransportation-related'
]

currency_columns = ['Medianhouseholdincome', 'Medianrent', 'Medianhomevalue', 'Averagemonthlytransportationcost']

# Setup some functions to convert percentages and currency to floats
def percentage_to_float(value):
    try:
        return float(value.strip('%')) / 100
    except:
        return value

def currency_to_float(value):
    try:
        return float(value.replace('$', '').replace(',', ''))
    except:
        return value
    
# Process data for analysis
for col in percentage_columns:
    austin_crime[col] = austin_crime[col].apply(percentage_to_float)

for col in currency_columns:
    austin_crime[col] = austin_crime[col].apply(currency_to_float)

In [4]:
# Merge crime and weather data, get a daily high and low temperature for each crime
high_low_temp = austin_weather.groupby('Date').agg({'TempHighF': 'max', 'TempLowF': 'min'}).reset_index()

# Convert the date to a datetime object, for high_low_temp and austin_crime
high_low_temp['Date'] = pd.to_datetime(high_low_temp['Date'])
austin_crime['Report_Date'] = pd.to_datetime(austin_crime['Report_Date'])

# Merge the two dataframes
austin_crime = austin_crime.merge(high_low_temp, left_on='Report_Date', right_on='Date')

  austin_crime['Report_Date'] = pd.to_datetime(austin_crime['Report_Date'])
