# Factors Driving Austin Housing Prices
## Join Crime Index to Housing Data
Ricardo Dunia (duniarh)
<br/>
Kevin John Cherian Joseph (kc43529)
<br/>
Arnob Mallick (am93746)

In [1]:
import multiprocessing as mp
import pandas as pd
import math
import csv

from datetime import datetime

In [2]:
# load the datasets
housing_df = pd.read_csv('data/raw_austin_housing_data.csv', low_memory=False)
crime_df = pd.read_csv('data/raw_crime_data.csv', low_memory=False)

In [3]:
# function for calculating Haversine distance
def distance(origin, destination):
    """
    Calculate the Haversine distance.

    Parameters
    ----------
    origin : tuple of float
        (lat, long)
    destination : tuple of float
        (lat, long)

    Returns
    -------
    distance_in_km : float

    Examples
    --------
    >>> origin = (48.1372, 11.5756)  # Munich
    >>> destination = (52.5186, 13.4083)  # Berlin
    >>> round(distance(origin, destination), 1)
    504.2
    """
    lat1, lon1 = origin
    lat2, lon2 = destination
    radius = 6371  # km

    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
         math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
         math.sin(dlon / 2) * math.sin(dlon / 2))
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    d = radius * c
    return d

In [4]:
# constants
crime_type_key = 'Highest Offense Description'
crime_index_key = 'Crime Index'
lat_key = 'latitude'
lon_key = 'longitude'
year_start = 2015
year_count = 5
crime_data = [ 
    list(csv.reader(open('./data/geo_crime_' + str(year) + '.csv'))) 
    for year in range(year_start, year_start + year_count) ]
unique_crime_types = crime_df[crime_type_key].unique().tolist()

seg_lat = 200
seg_lon = 200
min_lat = 30.0727239
max_lat = 30.5193782
min_lon = -98.0158212
max_lon = -97.4053586
del_lat = (max_lat - min_lat) / seg_lat
del_lon = (max_lon - min_lon) / seg_lon

In [6]:
# add an empty column for aggregated crime
housing_df[crime_index_key] = 0

# Build a subjective weight scale for all incident types
incident_weights = dict([
    ('AGG ROBBERY/DEADLY WEAPON', 10),
    ('BURGLARY OF VEHICLE', 10),
    ('FORGERY AND PASSING', 2),
    ('POSS OF DRUG PARAPHERNALIA', 8),
    ('AUTO THEFT', 10),
    ('BURGLARY OF RESIDENCE', 12),
    ('POSS MARIJUANA', 6),
    ('TERRORISTIC THREAT', 16),
    ('PUBLIC INTOXICATION', 8),
    ('FAMILY DISTURBANCE', 5),
    ('DWI', 5),
    ('THEFT', 10),
    ('VIOL STATE LAW - OTHER', 2),
    ('THEFT BY SHOPLIFTING', 4),
    ('CRIMINAL MISCHIEF', 4),
    ('DWI  .15 BAC OR ABOVE', 4),
    ('DWI - CHILD PASSENGER', 4),
    ('DOC DISPLAY GUN/DEADLY PUB PLC', 8),
    ('DWI 2ND', 10),
    ('CRIMINAL TRESPASS', 12),
    ('HARASSMENT', 10),
    ('EVADING / FOOT', 6),
    ('RESISTING ARREST OR SEARCH', 10),
    ('CRED CARD ABUSE - OTHER', 4),
    ('STALKING', 14),
    ('AIRPORT - FEDERAL VIOL', 3),
    ('VOCO SOLICITATION PROHIBIT', 7),
    ('AGG ASSAULT', 16),
    ('THEFT FROM AUTO', 10),
    ('ASSAULT W/INJURY-FAM/DATE VIOL', 8),
    ('CRIMINAL TRESPASS/HOTEL', 4),
    ('POSS CONTROLLED SUB/NARCOTIC', 4),
    ('PAROLE VIOL', 10),
    ('ASSAULT WITH INJURY', 12),
    ('RUNAWAY CHILD', 4),
    ('VIOL CITY ORDINANCE - OTHER', 4),
    ('DISTURBANCE - OTHER', 1),
    ('THEFT OF BICYCLE', 8),
    ('CRED CARD ABUSE BY FORGERY', 2),
    ('SUSPICIOUS PERSON', 12),
    ('COUNTERFEITING', 2),
    ('FRAUD FILING FINANCE STATEMENT', 1),
    ('FEDERAL VIOL/OTHER', 8),
    ('IDENTITY THEFT', 2),
    ('FRAUD - OTHER', 1),
    ('ASSAULT BY THREAT', 6),
    ('BURGLARY NON RESIDENCE', 6),
    ('ASSAULT BY CONTACT', 10),
    ('DRIVING WHILE INTOX / FELONY', 8),
    ('SEXUAL ASSAULT W/ OBJECT', 16),
    ('DAMAGE CITY PROP', 12),
    ('ROBBERY BY ASSAULT', 14),
    ('CRASH/FAIL STOP AND RENDER AID', 1),
    ('DATING DISTURBANCE', 2),
    ('FAILURE TO IDENTIFY', 2),
    ('THEFT OF SERVICE', 1),
    ('CUSTODY ARREST TRAFFIC WARR', 3),
    ('VIOL GLASS CONTAINER', 5),
    ('WARRANT ARREST NON TRAFFIC', 8),
    ('VIOL CITY ORDINANCE - CURFEW', 8),
    ('PROTECTIVE ORDER', 6),
    ('URINATING IN PUBLIC PLACE', 12),
    ('DOC FIGHTING', 12),
    ('POSS OF PROHIBITED WEAPON', 12),
    ('GRAFFITI', 16),
    ('FALSE ALARM OR REPORT', 1),
    ('POSS SYNTHETIC MARIJUANA', 1),
    ('PROSTITUTION', 18),
    ('SOLICITATION - BEGGING', 18),
    ('VIOL OF EMERG PROTECTIVE ORDER', 6),
    ('AGG ASSAULT FAM/DATE VIOLENCE', 6),
    ('DEBIT CARD ABUSE', 1),
    ('EVADING VEHICLE', 4),
    ('LIQUOR LAW VIOLATION/OTHER', 2),
    ('VIOL OF PROTECTIVE ORDER', 2),
    ('INJURY TO CHILD (CARE/CUSTODY)', 6),
    ('ASSAULT BY CONTACT FAM/DATING', 5),
    ('ARSON', 10),
    ('THEFT FROM BUILDING', 10),
    ('LITTERING', 15),
    ('POSS CONTROLLED SUB/OTHER', 2),
    ('THEFT FROM PERSON', 13),
    ('AGG ASSAULT WITH MOTOR VEH', 12),
    ('DEL CONTROLLED SUB/SYN NARC', 5),
    ('VOCO - ALCOHOL  CONSUMPTION', 5),
    ('ENTICING A CHILD', 18),
    ('BURG NON RESIDENCE SHEDS', 6),
    ('BOMB THREAT', 12),
    ('THEFT OF METAL', 3),
    ('FORGERY BY MAKING', 1),
    ('FAMILY DISTURBANCE/PARENTAL', 6),
    ('THEFT OF LICENSE PLATE', 12),
    ('VIOL OF CAMPING ORDINANCE', 20),
    ('CHILD ENDANGERMENT- ABANDONMEN', 4),
    ('THEFT OF AUTO PARTS', 6),
    ('CHILD CUSTODY INTERFERE', 5),
    ('WEAPON VIOL - OTHER', 10),
    ('AIRPORT PLACES WEAPON PROHIBIT', 4),
    ('RECKLESS DAMAGE', 8),
    ('UNLAWFUL CARRYING WEAPON', 8),
    ('GAMBLING PROMOTION', 10),
    ('BURGLARY OF COIN-OP MACHINE', 12),
    ('RAPE', 18),
    ('INDECENT EXPOSURE', 17),
    ('UNAUTHORIZED USE VEH-EXPIRED', 14),
    ('HARASSMENT ONLINE', 2),
    ('AGG ASLT STRANGLE/SUFFOCATE', 16),
    ('VIOL OF PARK CURFEW', 6),
    ('POSS OF ALCOHOL - AGE 17 TO 20', 4),
    ('DEL CONTROLLED SUB/NARCOTIC', 4),
    ('VIOL CITY ORDINANCE - SMOKING', 1),
    ('DOC ABUSE OR THREAT', 1),
    ('SEXUAL ASSAULT OF CHILD/OBJECT', 20),
    ('ROBBERY BY THREAT', 18),
    ('RENTAL CAR/FAIL TO RETURN', 1),
    ('CRIMINAL TRESPASS/TRANSIENT', 4),
    ('VIOL CITY ORDINANCE - DOG', 1),
    ('VIOL WATER SAFETY ACT', 1),
    ('MANF CONTROLLED SUB- SYN NARC', 1),
    ('TERRORISTIC THREAT-FAM/DAT VIO', 5),
    ('CRUELTY TO ANIMALS', 3),
    ('ASSAULT BY THREAT FAM/DATING', 5),
    ('FORGERY BY ALTERATION', 1),
    ('APPLIC TO REVOKE PROBATION', 0),
    ('POSS CONTROLLED SUB/SYN NARC', 1),
    ('HARASSMENT OF A PUBLIC SERVANT', 1),
    ('BREACH OF COMPUTER SECURITY', 1),
    ('AGG ASLT W/MOTOR VEH FAM/DAT V', 4),
    ('DWI - DRUG RECOGNITION EXPERT', 6),
    ('CIVIL DISTURBANCE/DEMO', 6),
    ('POSS DANG DRUG', 2),
    ('FALSE REPORT TO PEACE OFFICER', 1),
    ('VIOL CITY ORDINANCE - SOUND', 12),
    ('VIOL STAY AWAY ORDER', 1),
    ('STATUTORY RAPE OF CHILD', 18),
    ('AGG ASLT ENHANC STRANGL/SUFFOC', 16),
    ('THEFT BY CHECK', 1),
    ('FAILURE TO REG AS SEX OFFENDER', 12),
    ('DAMAGE CITY VEHICLE', 4),
    ('VIOL CITY ORDINANCE -  GAME RM', 3),
    ('POSS CRIMINAL INSTRUMENT', 2),
    ('FELONY ENHANCEMENT/ASSLT W/INJ', 5),
    ('PROWLER', 16),
    ('DOC EXPOSURE', 4),
    ('DEADLY CONDUCT', 14),
    ('FORGERY - OTHER', 1),
    ('THEFT OF TRAILER', 5),
    ('AIRPORT - BREACH OF SECURITY', 2),
    ('BOATING WHILE INTOXICATED', 1),
    ('INJURY TO CHILD', 3),
    ('INJ TO ELDERLY   FAM/DATE VIOL', 3),
    ('GAMBLING', 1),
    ('VIOL CITY ORDINANCE - WRECKER', 4),
    ('EXPLOSIVE ORDNANCE DISPOSAL', 6),
    ('ASSAULT  CONTACT-SEXUAL NATURE', 10),
    ('DOC UNREASONABLE NOISE', 12),
    ('THEFT- APPROPRIATE STOLEN PROP', 8),
    ('INTER EMERG PHONECALL FAM/DATE', 1),
    ('POSS/PROMO CHILD PORNOGRAPHY', 3),
    ('SIT AND LIE ORDINANCE VIOL', 17),
    ('INTERFERENCE PUBLIC DUTIES', 2),
    ('DEL OF ALCOHOL TO MINOR', 16),
    ('CRASH/INTOXICATION ASSAULT', 8),
    ('OBTAIN CONTROLLED SUB BY FRAUD', 2),
    ('INTERFERE W PO SERVICE ANIMALS', 2),
    ('DUI - AGE 17 TO 20', 2),
    ('THEFT CATALYTIC CONVERTER', 12),
    ('DEPENDENT AND NEGLECTED CHILD', 4),
    ('VIOL OF COURT ORDER-NON EPO-PO', 1),
    ('AMPLIFIED MUSIC / VEHICLE', 10),
    ('TAKE WEAPON FRM POLICE OFFICER', 10),
    ('VOCO AMPLIFIED MUSIC/VEHICLE', 12),
    ('DOC DISCHARGE GUN - PUB PLACE', 14),
    ('PUBLIC LEWDNESS', 14),
    ('EVADING / VEHICLE PURSUIT', 10),
    ('IMMIGRATION HOLD/ARREST', 8),
    ('DOC DISCHARGE GUN - PUB ROAD', 10),
    ('AGG SEXUAL ASSAULT CHILD/OBJEC', 18),
    ('NUISANCE ABATEMENT', 4),
    ('MAKING TOBACCO AVAIL TO MINOR', 8),
    ('ABUSE OF 911', 1),
    ('ASSAULT ON PUBLIC SERVANT', 8),
    ('DEL OF DANG DRUG', 1),
    ('FRAUD-CARD SKIMMER', 6),
    ('RETALIATION', 5),
    ('BURG OF RES - SEXUAL NATURE', 14),
    ('INDECENCY WITH A CHILD/CONTACT', 10),
    ('ATT BURGLARY OF RESIDENCE', 16),
    ('POSS OF FIREARM BY FELON', 14),
    ('SLEEPING IN PUBLIC PLACE', 14),
    ('INVASIVE VISUAL RECORDING', 3),
    ('AGG KIDNAPPING', 12),
    ('INDECENCY WITH CHILD/EXPOSURE', 12),
    ('DISCLOS/PROMO INTIMATE VISUAL', 12),
    ('ONLINE SOLICITATION OF A MINOR', 1),
    ('POCKET PICKING', 12),
    ('INJURY DISABLED INDIVIDUAL', 1),
    ('VIOL OF BOND CONDITIONS', 8),
    ('RAPE OF A CHILD', 20),
    ('EXPIRED-EVADING ARREST', 10),
    ('AGG RAPE OF A CHILD', 22),
    ('IMPERSONATING PUBLIC SERVANT', 6),
    ('PURCHASING PROSTITUTION', 8),
    ('ONLINE IMPERSONATION', 1),
    ('INTERFERING W/EMERG PHONE CALL', 1),
    ('FALSE STATEMENT -OBTAIN CREDIT', 1),
    ('DOC ABUSIVE LANGUAGE', 1),
    ('BURG OF RES - FAM/DATING ASLT', 12),
    ('AGG PROMOTION OF PROSTITUTION', 18),
    ('CONTROLLED SUB VIOL - OTHER', 4),
    ('VIOL TEMP EX PARTE  ORDER', 2),
    ('ILLEGAL LABELLING OF RECORDING', 2),
    ('INJURY TO ELDERLY PERSON', 1),
    ('AGG PERJURY', 4),
    ('ATT AUTO THEFT', 14),
    ('FORCED SODOMY', 16),
    ('MONEY LAUNDERING', 4),
    ('DEL MARIJUANA', 4),
    ('AGG RAPE', 22),
    ('KIDNAPPING', 20),
    ('TAMPERING WITH EVIDENCE', 4),
    ('DOC WINDOW PEEPING-RESIDENCE', 16),
    ('TRAFFICKING OF PERSONS', 22),
    ('PIGEON DROP', 4),
    ('TERRORISTIC THREAT-MASS CASLTY', 30),
    ('CAPITAL MURDER', 20),
    ('HINDERING PROCEEDING', 4),
    ('THEFT OF HEAVY EQUIPMENT', 8),
    ('AGG SEXUAL ASSAULT W OBJECT', 18),
    ('BESTIALITY', 8),
    ('PROBATION VIOL', 8),
    ('TOBACCO VIOL - UNDER AGE 17', 2),
    ('HINDER SECURED CREDITORS', 2),
    ('BAIL JUMPING/FAIL TO APPEAR', 4),
    ('CRED CARD ABUSE - EXPIR-CANCEL', 1),
    ('BANK KITING', 2),
    ('EXPIRED-ATT KIDNAPPING', 10),
    ('DOC OFFENSIVE GESTURE', 3),
    ('MURDER', 12),
    ('UNLAWFUL CARRY-LIC HOLDER', 6),
    ('HINDERING APPREHENSION', 8),
    ('AGG ROBBERY BY ASSAULT', 12),
    ('UNLAWFUL RESTRAINT FAM/DAT VIO', 4),
    ('FRAUD DESTRUCTION OF A WRITING', 1),
    ('DUMPING REFUSE NEAR HIGHWAY', 1),
    ('TAMPERING WITH GOV RECORD', 1),
    ('ESCAPE FROM CUSTODY', 8),
    ('UNLAWFUL RESTRAINT', 4),
    ('CRIMINAL MISCHIEF BY ARSON', 4),
    ('EXPIRED-INJURY CHILD - ELDERLY', 2),
    ('DANG DRUG VIOL - OTHER', 12),
    ('AGG ASSAULT ON PUBLIC SERVANT', 12),
    ('FORGERY- CERTIFICATE OF TITLE', 1),
    ('SEXTING DEPICTING A MINOR', 5),
    ('ASSAULT ON PEACE OFFICER', 12),
    ('PROMOTION OF PROSTITUTION', 18),
    ('DISRUPTING MEETING/PROCESSION', 8),
    ('TRUANCY', 8),
    ('INJ/CHILD FV (NO CARE/CUSTODY)', 4),
    ('EXPIRED-ATT AGG KIDNAPPING', 4),
    ('PUBLIC INTOX-SOBERING CENTER', 12),
    ('MISREP AGE BY MINOR', 4),
    ('POSS OF ALCOHOL-AGE 16 & UNDER', 4),
    ('DEADLY CONDUCT FAM/DATE VIOL', 10),
    ('FORGERY OF IDENTIFICATION', 2),
    ('PERJURY', 2),
    ('TELECOMMUNICATION CRIMES/OTHER', 1),
    ('CRIMINAL TRESPASS/IN VEHICLE', 4),
    ('IDENTITY THEFT-TAX RETURNS', 1),
    ('PURSE SNATCHING', 4),
    ('INHALANT ABUSE', 2),
    ('DEL CONTROLLED SUB/OTHER', 2),
    ('SMUGGLING ILLEGAL ALIEN', 17),
    ('CRIMINAL SOLICITATION OF MINOR', 12),
    ('EXPLOITATION OF CHILD/ELDERLY', 12),
    ('THEFT BY FALSE PRETEXT/BUNCO', 8),
    ('DUI - AGE 16 AND UNDER', 6),
    ('CONT SEX ABUSE OF CHILD', 18),
    ('CRASH/INTOX MANSLAUGHTER', 10),
    ('MISAPPLY FIDUCIARY PROP', 2),
    ('THEFT BY EXTORTION', 2),
    ('DOC WINDOW PEEPING - HOTEL', 4),
    ('DISTRIB HARMFUL MATERIAL MINOR', 4),
    ('DEL SYNTHETIC MARIJUANA', 2),
    ('HARBORING RUNAWAY CHILD', 4),
    ('TOBACCO VIOL - AGE 17', 4),
    ('VIOL CITY ORDINANCE -FIREWORK', 1),
    ('TAMPERING WITH ID NUMBER', 1),
    ('AGG KIDNAPPING FAM VIO', 4),
    ('CRIMES AGAINST ELDERLY', 8),
    ('EXPIRED-ATT CAPITAL MURDER', 4),
    ('COMPELLING PROSTITUTION', 18),
    ('AGG FORCED SODOMY OF CHILD', 18),
    ('OBTAIN DANG DRUG BY FRAUD', 2),
    ('MISUSE OF OFFICIAL INFO', 2),
    ('INJ TO DISABLED  FAM/DATE VIOL', 3),
    ('THEFT BY PUBLIC SERVANT', 6),
    ('ABUSE OF OFFICIAL CAPACITY', 4),
    ('FIREARMS ON SCHOOL PROP', 6),
    ('OFFICIAL OPPRESSION', 12),
    ('VIOL CIVIL RIGHTS PRISONER', 10),
    ('TAMPERING WITH WITNESS', 4),
    ('UNLAWFUL INTERCEPTION', 4),
    ('VIOL OF PRISONERS RIGHT', 4),
    ('SERIOUS INJURY TO A CHILD', 4),
    ('DOC CREATING NOXIOUS ODOR', 2),
    ('THEFT OF TELECOMMUNICATION SRV', 2),
    ('ILLUMIN AIRCRAFT INTENSE LIGHT', 2),
    ('TRADEMARK COUNTERFEITING', 1),
    ('CRIMINAL SOLICITATION', 3),
    ('SEXUAL PERFORMANCE BY CHILD', 5),
    ('DISPOSAL OF SOLID WASTE', 2),
    ('THEFT BY EMBEZZLEMENT', 2),
    ('UCW LICENSE PREMISE', 1),
    ('POSS OF PRESCRIPTION FORM', 2),
    ('TAMPERING WITH CONSUMER PROD', 1),
    ('EXPIRED-ATT SEXUAL ASSAULT', 14),
    ('DELIVERY OF PRESCRIPTION FORM', 1),
    ('ABUSE OF CORPSE', 4),
    ('OFFICIAL MISCONDUCT', 2),
    ('FORCED SODOMY OF CHILD', 18),
    ('MANSLAUGHTER', 22),
    ('FALSE ID AS A PEACE OFFICER', 8),
    ('ATT BURGLARY NON RESIDENCE', 8),
    ('VIOL CITY ORDINANCE - BOOTING', 6),
    ('JUSTIFIED HOMICIDE', 18),
    ('EXPIRED-ATT RAPE', 10),
    ('AGG SODOMY', 16),
    ('VIOL CITY ORDINANCE - AIRPORT', 2),
    ('AGG FORCED SODOMY', 16),
    ('ABANDONED REFRIGERATOR', 6),
    ('COMMUNICATING GAMBLING INFO', 8),
    ('POSSESSION OF FORGED WRITING', 1),
    ('RECKLESS CONDUCT', 2),
    ('AIRPORT - CRIMINAL TRESPASS', 3),
    ('VIOL CITY ORDINANCE - TAXI', 1),
    ('CRASH/MURDER', 8),
    ('DISRUPTION OF CLASSES', 2),
    ('MANF CONTROLLED SUB - OTHER', 1),
    ('EXPIRED-ATT OB DANG DRUG FRAUD', 2),
    ('POSS OF GAMBLING PARAPHERNALIA', 2),
    ('KEEPING GAMBLING PLACE', 2),
    ('DISRUPTIVE ACTS AT SCHOOLS', 2),
    ('BRIBERY', 1),
    ('PRACTICE MEDICINE W/OUT LICENS', 10),
    ('ARSON WITH BODILY INJURY', 10),
    ('VIOL PO / SEXUAL ASLT VICTIM', 6),
    ('CRIMINAL CONSPIRACY', 1),
    ('CRIM NEG HOMICIDE/NON TRAFFIC', 6),
    ('THEFT/TILL TAPPING', 1),
    ('FALSE REPORT TO CPS', 2),
    ('EXPIRED-SOLICITATION OF CHILD', 6),
    ('IMPROPER CONTACT-SEX ASLT VICT', 8),
    ('HAZING', 2),
    ('CONTEMPT OF COURT', 1),
    ('ASSAULT - SCHOOL PERSONNEL', 10),
    ('DRINKING AFTER CURFEW', 6),
    ('SALE OR PURCHASE OF CHILD', 8),
    ('OBSCENE DISPLAY - DISTRIBUTION', 16),
    ('DOC WINDOW PEEPING - PUB AREA', 14),
    ('KIDNAPPING FAM VIO', 14),
    ('VIOL CITY ORDINANCE - TITLE 10', 1),
    ('CRASH/MANSLAUGHTER', 4),
    ('ISSUANCE OF BAD CHECK', 1),
    ('FAIL DISPLAY HANDGUN LICENSE', 8),
    ('SALE OF LIQ WITHOUT PERMIT', 10),
    ('EXPIRED-ATT OBT CONT SUB FRAUD', 1),
    ('AIRPORT - BOMB THREAT', 3),
    ('CONTRIBUTE DELINQUENCY MINOR', 6),
    ('ATT THEFT', 12),
    ('DOMESTIC VIOLENCE/ALARM', 8),
    ('EXPIRED-ATT ROBBERY BY ASSAULT', 8),
    ('SECURING EXEC-DOC BY DECEPTION', 1),
    ('PROHIBITED SEX CONDUCT-INCEST', 10),
    ('OBSCENITY', 16),
    ('EXPIRED-ATT AGG ROBBERY/WEAP', 12),
    ('BWI-EXPIRED USE 2110', 1),
    ('STAY AWAY ORDER', 1),
    ('DESECRATION VENERATED OBJECT', 6),
    ('EXPIRED-ATT AGG SEXUAL ASSAULT', 8),
    ('FICTITIOUS NAME', 1),
    ('CRIMINAL NONSUPPORT', 1),
    ('POSS OF GAMBLING EQUIPMENT', 1),
    ('ATTACK ON ASSISTANCE ANIMAL', 3),
    ('DESERTION', 1),
    ('AWOL', 1),
    ('EXPIRED-ATT/TAKE WEAP FROM OFF', 6),
    ('EXP-VIOL CITY ORDINANCE - TAXI', 1),
    ('EXPIRED-ATT SEXUAL ASULT CHILD', 8),
    ('GIFT TO PUBLIC SERVANT', 2),
    ('ILLEGAL TRANSPORTATION OF LIQ', 6),
    ('VIOL STATE MASSAGE REGULATIONS', 1),
    ('RIOT', 10),
    ('COERCION OF PUBLIC SERVANT', 2),
    ('ATT ARSON', 6),
    ('EXPIRED-ATT MURDER', 6),
    ('EXPIREDATT AGG SEX ASSLT CHILD', 8),
    ('POSS OF LIQ ON SCHOOL PROP', 8),
    ('SALE OF LIQ IN PROHIB AREA', 8),
    ('FAILURE TO REPORT CHILD ABUSE', 1),
    ('EXPIRED-ATTAGGFORCESODOMYCHILD', 6),
    ('EXPIRED-ATT RAPE OF A CHILD', 10),
    ('AIDING SUICIDE', 3),
    ('EXPIRED - DEADLY ASSAULT', 12),
    ('BOMB THREAT - AIRCRAFT', 3),
    ('COMMERCIAL BRIBERY', 1),
    ('EXPIRED-ATT FORCED SODOMY', 8),
    ('LOITERING ON SCHOOL PROP', 8),
    ('AIRPORT - SUSPICIOUS PERSON', 3),
    ('LOITERING IN PUBLIC PARK', 8)])

In [7]:
# set up variables for radius search around each house
desired_range_km = 3
years_to_use = [2015, 2016, 2017, 2018, 2019]
lat_step_km = distance((min_lat, min_lon), (min_lat + del_lat, min_lon))
lon_step_km = distance((min_lat, min_lon), (min_lat, min_lon + del_lon))
avg_step_km = (lat_step_km + lon_step_km) / 2
num_steps = math.ceil(desired_range_km / avg_step_km)
search_range = range(-num_steps, num_steps + 1)

# function that builds crime data for the house
def calculate_crime_index(house):
    house_lat = house[1][lat_key]
    house_lon = house[1][lon_key]

    # skip houses that are not in our bounds
    if (house_lat < min_lat or 
        house_lat > max_lat or 
        house_lon < min_lon or 
        house_lon > max_lon):
        return { 'idx': house[1]['idx'], 'crime_indices': { crime_index_key: 0 } }

    # find the grid index the house is in
    d_from_min_lat = house_lat - min_lat
    d_from_min_lon = house_lon - min_lon
    idx_lat = math.floor(d_from_min_lat / del_lat)
    idx_lon = math.floor(d_from_min_lon / del_lon)

    # iterate through all neighboring grid squares to find crime
    crime_total = 0
    for radius_lat in search_range:
        for radius_lon in search_range:
            search_lat_idx = idx_lat + radius_lat
            search_lon_idx = idx_lon + radius_lon

            # check if we're on the edge
            if (search_lat_idx < 0 or search_lat_idx >= seg_lat or 
                search_lon_idx < 0 or search_lon_idx >= seg_lon):
                continue
            
            # iterate through precomputed crime indices
            for year in years_to_use:
                year_idx = year - year_start
                lat_lon_idx = (search_lon_idx * seg_lon) + search_lat_idx + 1

                for crime_type_idx in range(len(crime_data[year_idx][0]) - 3):
                    crime_count = int(crime_data[year_idx][lat_lon_idx][crime_type_idx + 3])
                    if (crime_count > 0):
                        curr_crime_key = crime_data[year_idx][0][crime_type_idx + 3]
                        crime_total += crime_count * incident_weights[curr_crime_key]
    
    return { 'idx': house[1]['idx'], 'crime_indices': { crime_index_key: crime_total } }

In [8]:
# create an index column
housing_df['idx'] = housing_df.index

# log the data size
print('Looking at ' + str(len(housing_df)) + ' houses.')

# parallelize computation
count = 0
with mp.Pool(int(mp.cpu_count() / 2)) as pool:
    results = pool.imap(calculate_crime_index, housing_df.iterrows(), chunksize=50)
    for result in results:
        count += 1
        for crime_key in result['crime_indices']:
            housing_df.at[housing_df.index[result['idx']], crime_key] = \
            housing_df.at[housing_df.index[result['idx']], crime_key] + \
            result['crime_indices'][crime_key]
        print('\rProcessing house no: {}.'.format(count), end="\r")

Looking at 61818 houses.
Processing house no: 61818.

In [9]:
housing_df.to_csv('./data/raw_austin_housing_data_w_crime.csv')