In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [2]:
file = open("dbCredentials.txt","r")
lines = file.readlines()
dbCredentials = {"host" : "localhost", \
                 "uname" : "root", \
                 "password" : "password", \
                 "dbName" : "ChicagoData",
                }
for line in lines:
    vals = line.split("=")
    if(len(vals) == 2):
        if(vals[0] in dbCredentials):
            dbCredentials[vals[0]] = vals[1].replace("\n","")

In [3]:
conn = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}"
                       .format(user=dbCredentials["uname"],
                               pw=dbCredentials["password"],
                               db=dbCredentials["dbName"]))

In [4]:
crimeDF = pd.read_sql("select * from "+dbCredentials["dbName"]+".Crimes", conn);
pd.set_option('display.expand_frame_repr', False)
censusDF = pd.read_sql("select * from "+dbCredentials["dbName"]+".Census", conn);
pd.set_option('display.expand_frame_repr', False)

In [5]:
from datetime import datetime
format = '%Y-%m-%d %H:%M:%S'
crimeDF["hour"]=crimeDF.date.apply(lambda row: datetime.strptime(row, format).strftime("%H"))
crimeDF['month']=crimeDF.date.apply(lambda row: datetime.strptime(row, format).strftime("%m"))
crimeDF["weekday"]=crimeDF.date.apply(lambda row: datetime.strptime(row, format).strftime("%A"))
print(crimeDF.shape)
crimeDF.head(2)

(1573164, 15)


Unnamed: 0,date,block,primarytype,locationdescription,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,weekday
0,2015-09-05 13:30:00,043XX S WOOD ST,BATTERY,RESIDENCE,0,1,9.0,12,61,2015,41.815117282,-87.669999562,13,9,Saturday
1,2015-09-04 11:30:00,008XX N CENTRAL AVE,THEFT,CTA BUS,0,0,15.0,29,25,2015,41.895080471,-87.765400451,11,9,Friday


In [6]:
crimeDF.drop(["date"],axis=1,inplace=True)
crimeDF.head(2)

Unnamed: 0,block,primarytype,locationdescription,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,weekday
0,043XX S WOOD ST,BATTERY,RESIDENCE,0,1,9.0,12,61,2015,41.815117282,-87.669999562,13,9,Saturday
1,008XX N CENTRAL AVE,THEFT,CTA BUS,0,0,15.0,29,25,2015,41.895080471,-87.765400451,11,9,Friday


In [7]:
print(crimeDF['primarytype'].unique())

['BATTERY' 'THEFT' 'NARCOTICS' 'ASSAULT' 'BURGLARY' 'ROBBERY'
 'OTHER OFFENSE' 'CRIMINAL DAMAGE' 'WEAPONS VIOLATION'
 'DECEPTIVE PRACTICE' 'CRIMINAL TRESPASS' 'MOTOR VEHICLE THEFT'
 'SEX OFFENSE' 'INTERFERENCE WITH PUBLIC OFFICER'
 'OFFENSE INVOLVING CHILDREN' 'PUBLIC PEACE VIOLATION' 'PROSTITUTION'
 'GAMBLING' 'CRIM SEXUAL ASSAULT' 'LIQUOR LAW VIOLATION' 'ARSON'
 'STALKING' 'KIDNAPPING' 'INTIMIDATION' 'HOMICIDE'
 'CONCEALED CARRY LICENSE VIOLATION' 'NON - CRIMINAL' 'HUMAN TRAFFICKING'
 'OBSCENITY' 'CRIMINAL SEXUAL ASSAULT' 'PUBLIC INDECENCY'
 'OTHER NARCOTIC VIOLATION' 'NON-CRIMINAL'
 'NON-CRIMINAL (SUBJECT SPECIFIED)' 'RITUALISM']


In [8]:
"""
0 - not severe
1 - severe
"""
def convertToSevereAndNonSevere(row):
    if row['primarytype'] in ['THEFT','NARCOTICS','CRIMINAL DAMAGE','DECEPTIVE PRACTICE','MOTOR VEHICLE THEFT',\
                             'INTERFERENCE WITH PUBLIC OFFICER','PROSTITUTION','GAMBLING','LIQUOR LAW VIOLATION',\
                             'STALKING','CONCEALED CARRY LICENSE VIOLATION','NON - CRIMINAL','OBSCENITY',\
                             'PUBLIC INDECENCY','OTHER NARCOTIC VIOLATION']:
        return 0
    else:
        return 1

In [9]:
crimeDF['severity'] = crimeDF.apply(lambda row: convertToSevereAndNonSevere(row), axis=1)
crimeDF.head(2)

Unnamed: 0,block,primarytype,locationdescription,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,weekday,severity
0,043XX S WOOD ST,BATTERY,RESIDENCE,0,1,9.0,12,61,2015,41.815117282,-87.669999562,13,9,Saturday,1
1,008XX N CENTRAL AVE,THEFT,CTA BUS,0,0,15.0,29,25,2015,41.895080471,-87.765400451,11,9,Friday,0


In [10]:
"""
0 - RESIDENTIAL AREA
1 - BUSINESS AREA
2 - VEHICLE
3 - PUBLIC BUILDINGS
4 - PUBLIC AREA
5 - GOVERNMENT LOCATONS
6 - PUBLIC TRANSPORT
7 - OTHER
"""
def convertlocationToIndoorOutdoor(row):

    if row['locationdescription'] in ['RESIDENCE', 'APARTMENT', 'CHA APARTMENT', 'RESIDENCE PORCH/HALLWAY', \
                                      'RESIDENCE-GARAGE','RESIDENTIAL YARD (FRONT/BACK)', \
                                      'DRIVEWAY - RESIDENTIAL', 'HOUSE','RESIDENCE - YARD (FRONT / BACK)',\
                                      'RESIDENCE - GARAGE','ROOMING HOUSE','ELEVATOR', 'NURSING HOME', 'NURSING / RETIREMENT HOME',\
                                      'HOTEL','RESIDENCE - PORCH / HALLWAY','MOTEL','COLLEGE / UNIVERSITY - RESIDENCE HALL',\
                                      'POOLROOM','HOTEL / MOTEL','PORCH','POOL ROOM']:
        """RESIDENTIAL AREA"""
        return 0
    if row['locationdescription'] in ['BARBERSHOP', 'COMMERCIAL / BUSINESS OFFICE', 'CURRENCY EXCHANGE', \
                                      'DEPARTMENT STORE', 'RESTAURANT','ATHLETIC CLUB', 'TAVERN/LIQUOR STORE', \
                                      'SMALL RETAIL STORE', 'HOTEL/MOTEL', 'GAS STATION','AUTO / BOAT / RV DEALERSHIP', \
                                      'CONVENIENCE STORE', 'BANK', 'BAR OR TAVERN', 'DRUG STORE',\
                                      'GROCERY FOOD STORE', 'CAR WASH', 'SPORTS ARENA/STADIUM', 'DAY CARE CENTER',\
                                      'MOVIE HOUSE/THEATER','APPLIANCE STORE', 'CLEANING STORE', 'PAWN SHOP', \
                                      'FACTORY/MANUFACTURING BUILDING', 'ANIMAL HOSPITAL','BOWLING ALLEY', \
                                      'SAVINGS AND LOAN', 'CREDIT UNION', 'KENNEL', 'GARAGE/AUTO REPAIR', \
                                      'LIQUOR STORE','GAS STATION DRIVE/PROP.', 'OFFICE', 'BARBER SHOP/BEAUTY SALON','FUNERAL PARLOR',\
                                      'SCHOOL - PRIVATE GROUNDS','CTA PARKING LOT / GARAGE / OTHER PROPERTY','CLEANERS/LAUNDROMAT'\
                                      'HOSPITAL BUILDING / GROUNDS', 'PUBLIC HIGH SCHOOL' ,'FACTORY / MANUFACTURING BUILDING', \
                                      'FACTORY', 'SCHOOL - PRIVATE BUILDING', 'RETAIL STORE','CLUB','TAVERN / LIQUOR STORE',\
                                      'MEDICAL / DENTAL OFFICE', 'MOVIE HOUSE / THEATER']:
        """BUSINESS AREA"""
        return 1
    if row['locationdescription'] in ['VEHICLE NON-COMMERCIAL', 'AUTO', 'VEHICLE - OTHER RIDE SHARE SERVICE (E.G., UBER, LYFT)', \
                                      'TAXICAB','VEHICLE-COMMERCIAL', 'VEHICLE - DELIVERY TRUCK', \
                                      'VEHICLE-COMMERCIAL - TROLLEY BUS','VEHICLE-COMMERCIAL - ENTERTAINMENT/PARTY BUS',\
                                      'VEHICLE - COMMERCIAL','VEHICLE - OTHER RIDE SERVICE','DELIVERY TRUCK',\
                                      'TAXI CAB', 'VEHICLE - OTHER RIDE SHARE SERVICE (LYFT, UBER, ETC.)',\
                                      'VEHICLE - COMMERCIAL: ENTERTAINMENT / PARTY BUS', 'TRUCK','TRAILER']:
        """VEHICLE"""
        return 2
    if row['locationdescription'] in ['HOSPITAL BUILDING/GROUNDS', 'NURSING HOME/RETIREMENT HOME', \
                                      'SCHOOL, PUBLIC, BUILDING','CHURCH/SYNAGOGUE/PLACE OF WORSHIP', \
                                      'SCHOOL, PUBLIC, GROUNDS', 'SCHOOL, PRIVATE, BUILDING',\
                                      'MEDICAL/DENTAL OFFICE', 'LIBRARY', 'COLLEGE/UNIVERSITY RESIDENCE HALL', \
                                      'YMCA', 'HOSPITAL','SCHOOL - PUBLIC BUILDING','COLLEGE / UNIVERSITY - GROUNDS',\
                                      'EXPRESSWAY EMBANKMENT', 'SCHOOL - PRIVATE GROUNDS','RAILROAD PROPERTY',\
                                      'COACH HOUSE','CHURCH']:
        """PUBLIC BUILDINGS"""
        return 3
    if row['locationdescription'] in ['STREET', 'PARKING LOT/GARAGE(NON.RESID.)', 'SIDEWALK', 'PARK PROPERTY', \
                                      'ALLEY', 'CEMETARY','CHA HALLWAY/STAIRWELL/ELEVATOR', 'CHA PARKING LOT/GROUNDS', \
                                      'COLLEGE/UNIVERSITY GROUNDS', 'BRIDGE','SCHOOL, PRIVATE, GROUNDS', \
                                      'FOREST PRESERVE', 'LAKEFRONT/WATERFRONT/RIVERBANK', 'PARKING LOT', 'DRIVEWAY',\
                                      'HALLWAY', 'YARD', 'CHA GROUNDS', 'RIVER BANK', 'STAIRWELL', 'CHA PARKING LOT',\
                                      'AIRPORT TERMINAL UPPER LEVEL - SECURE AREA','OTHER RAILROAD PROP / TRAIN DEPOT',\
                                      'AIRPORT TERMINAL LOWER LEVEL - SECURE AREA', 'AIRPORT BUILDING NON-TERMINAL - SECURE AREA',\
                                      'AIRPORT BUILDING NON-TERMINAL - SECURE AREA', 'AIRPORT EXTERIOR - NON-SECURE AREA',\
                                      'AIRPORT PARKING LOT', 'AIRPORT TERMINAL LOWER LEVEL - NON-SECURE AREA',\
                                      'AIRPORT BUILDING NON-TERMINAL - NON-SECURE AREA', 'AIRPORT VENDING ESTABLISHMENT',\
                                      'AIRPORT TERMINAL MEZZANINE - NON-SECURE AREA','CTA PROPERTY','CTA "L" PLATFORM',\
                                      'VACANT LOT', 'VACANT LOT / LAND','VACANT LOT/LAND', 'LAGOON','HIGHWAY / EXPRESSWAY','SEWER',\
                                      'LIVERY STAND OFFICE','SCHOOL YARD','SCHOOL - PUBLIC GROUNDS',\
                                      'PUBLIC GRAMMAR SCHOOL', 'SPORTS ARENA / STADIUM', 'CHA BREEZEWAY', 'DUMPSTER',\
                                      'CHA PARKING LOT / GROUNDS','RIVER', 'JUNK YARD/GARBAGE DUMP', \
                                      'PARKING LOT / GARAGE (NON RESIDENTIAL)','TAVERN', 'VESTIBULE', 'TRUCKING TERMINAL',\
                                      'CHURCH / SYNAGOGUE / PLACE OF WORSHIP', 'CHURCH PROPERTY', \
                                      'LAKE','LAKEFRONT / WATERFRONT / RIVERBANK','HIGHWAY/EXPRESSWAY']:
        """PUBLIC AREA"""
        return 4
    if row['locationdescription'] in ['POLICE FACILITY/VEH PARKING LOT', 'GOVERNMENT BUILDING/PROPERTY', \
                                      'FEDERAL BUILDING', 'JAIL / LOCK-UP FACILITY','FIRE STATION', \
                                      'GOVERNMENT BUILDING','CHA ELEVATOR','GOVERNMENT BUILDING / PROPERTY',\
                                      'OTHER RAILROAD PROPERTY / TRAIN DEPOT','COUNTY JAIL', 'CHA STAIRWELL',\
                                      'POLICE FACILITY / VEHICLE PARKING LOT','CHA PLAY LOT','CHA LOBBY',\
                                      'CHA HALLWAY']:
        """GOVERNMENT LOCATONS"""
        return 5
    if row['locationdescription'] in ['AIRPORT TERMINAL UPPER LEVEL - NON-SECURE AREA', 'CTA PLATFORM', \
                                      'CTA STATION', 'CTA BUS STOP', 'CTA TRAIN', 'CTA BUS', 'CTA GARAGE / OTHER PROPERTY',\
                                      'AIRCRAFT', 'OTHER COMMERCIAL TRANSPORTATION', \
                                      'AIRPORT EXTERIOR - SECURE AREA', 'AIRPORT TRANSPORTATION SYSTEM (ATS)','CTA TRACKS - RIGHT OF WAY', \
                                      'AIRPORT/AIRCRAFT', 'BOAT/WATERCRAFT','CTA "L" TRAIN','PRAIRIE','CHA HALLWAY / STAIRWELL / ELEVATOR'\
                                      ]:
        """PUBLIC TRANSPORT"""
        return 6
    if row['locationdescription'] in ['OTHER', 'ABANDONED BUILDING', 'WAREHOUSE', 'ATM (AUTOMATIC TELLER MACHINE)',\
                                      'CONSTRUCTION SITE', 'NEWSSTAND','COIN OPERATED MACHINE',\
                                      'HORSE STABLE','FARM', 'GARAGE', 'WOODED AREA', 'GANGWAY', 'BASEMENT',\
                                      'OTHER (SPECIFY)']:
        """OTHER"""
        return 7
    else:
        return 8

In [11]:
crimeDF['location'] = crimeDF.apply(lambda row: convertlocationToIndoorOutdoor(row), axis=1)
crimeDF.head(2)

Unnamed: 0,block,primarytype,locationdescription,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,weekday,severity,location
0,043XX S WOOD ST,BATTERY,RESIDENCE,0,1,9.0,12,61,2015,41.815117282,-87.669999562,13,9,Saturday,1,0
1,008XX N CENTRAL AVE,THEFT,CTA BUS,0,0,15.0,29,25,2015,41.895080471,-87.765400451,11,9,Friday,0,6


In [12]:
"""
0 - sunday
1 - monday
2 - tuesday
3 - wednesday
4 - thursday
5 - friday
6 - saturday
"""
def encodeDays(row):
    if row['weekday'] == "Sunday":
        return 0
    if row['weekday'] == "Monday":
        return 1
    if row['weekday'] == "Tuesday":
        return 2
    if row['weekday'] == "Wednesday":
        return 3
    if row['weekday'] == "Thursday":
        return 4
    if row['weekday'] == "Friday":
        return 5
    if row['weekday'] == "Saturday":
        return 6

In [13]:
crimeDF['day'] = crimeDF.apply(lambda row: encodeDays(row), axis=1)
crimeDF.head(2)

Unnamed: 0,block,primarytype,locationdescription,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,weekday,severity,location,day
0,043XX S WOOD ST,BATTERY,RESIDENCE,0,1,9.0,12,61,2015,41.815117282,-87.669999562,13,9,Saturday,1,0,6
1,008XX N CENTRAL AVE,THEFT,CTA BUS,0,0,15.0,29,25,2015,41.895080471,-87.765400451,11,9,Friday,0,6,5


In [14]:
crimeDF.drop(["block","primarytype","locationdescription","weekday"],axis=1,inplace=True)
crimeDF.head(2)

Unnamed: 0,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,severity,location,day
0,0,1,9.0,12,61,2015,41.815117282,-87.669999562,13,9,1,0,6
1,0,0,15.0,29,25,2015,41.895080471,-87.765400451,11,9,0,6,5


In [48]:
censusDF.head(5)
beforeAddingCensus = crimeDF.copy()

In [16]:
crimeDF["income"]=crimeDF["communityarea"].apply(lambda val: censusDF.loc[censusDF['communityareanumber'] == val]["percapitaincome"].values[0])

In [17]:
crimeDF["belowpoverty"]=crimeDF["communityarea"].apply(lambda val: censusDF.loc[censusDF['communityareanumber'] == val]["percenthouseholdsbelowpoverty"].values[0])

In [18]:
crimeDF["unemployed"]=crimeDF["communityarea"].apply(lambda val: censusDF.loc[censusDF['communityareanumber'] == val]["percentaged16aboveunemployed"].values[0])

In [19]:
crimeDF["educated"]=crimeDF["communityarea"].apply(lambda val: censusDF.loc[censusDF['communityareanumber'] == val]["percentaged25abovewithouthighschooldiploma"].values[0])

In [20]:
crimeDF["workingage"]=crimeDF["communityarea"].apply(lambda val: censusDF.loc[censusDF['communityareanumber'] == val]["percentagedunder18orover64"].values[0])

In [21]:
crimeDF.head(5)

Unnamed: 0,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,severity,location,day,income,belowpoverty,unemployed,educated,workingage
0,0,1,9.0,12,61,2015,41.815117282,-87.669999562,13,9,1,0,6,12765,29.0,23.0,41.5,38.9
1,0,0,15.0,29,25,2015,41.895080471,-87.765400451,11,9,0,6,5,15957,28.6,22.6,24.4,37.9
2,1,0,14.0,35,21,2015,41.937405765,-87.71664968700001,12,9,0,4,6,20039,15.3,9.2,24.7,31.0
3,0,1,15.0,28,25,2015,41.881903443,-87.755121152,13,9,1,0,6,15957,28.6,22.6,24.4,37.9
4,0,0,6.0,21,71,2015,41.744378879,-87.658430635,10,9,1,0,6,15528,27.6,28.3,18.5,41.9


In [50]:
beforeAddingCensus=beforeAddingCensus.drop(["income","belowpoverty","unemployed","educated","workingage"], 1)
beforeAddingCensus.head(5)

Unnamed: 0,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,severity,location,day
0,0,1,9.0,12,61,2015,41.815117282,-87.669999562,13,9,1,0,6
1,0,0,15.0,29,25,2015,41.895080471,-87.765400451,11,9,0,6,5
2,1,0,14.0,35,21,2015,41.937405765,-87.71664968700001,12,9,0,4,6
3,0,1,15.0,28,25,2015,41.881903443,-87.755121152,13,9,1,0,6
4,0,0,6.0,21,71,2015,41.744378879,-87.658430635,10,9,1,0,6


In [145]:
dbCredentials = {"host" : "localhost", \
                 "uname" : "root", \
                 "password" : "adminpass", \
                 "dbName2" : "ChicagoDataForMining",
                }
for line in lines:
    vals = line.split("=")
    if(len(vals) == 2):
        if(vals[0] in dbCredentials):
            dbCredentials[vals[0]] = vals[1].replace("\n","")
print(dbCredentials)

{'host': 'localhost', 'uname': 'root', 'password': 'adminpass', 'dbName2': 'ChicagoDataForMining'}


In [146]:
import mysql.connector
mydb = mysql.connector.connect(
  host=dbCredentials["host"],
  user=dbCredentials["uname"],
  password=dbCredentials["password"]
)

mycursor = mydb.cursor(buffered=True)

mycursor.execute("CREATE DATABASE IF NOT EXISTS " + dbCredentials["dbName2"])
mycursor.execute("USE " + dbCredentials["dbName2"])

In [147]:
mycursor.execute("CREATE TABLE IF NOT EXISTS CrimesWithHardshipIndex (\
    arrest int,\
    domestic int,\
    district int,\
    ward int,\
    communityarea int,\
    year int,\
    latitude FLOAT(10),\
    longitude FLOAT(10),\
    hour int,\
    month int,\
    severity int,\
    location int,\
    day int,\
    income int,\
    belowpoverty FLOAT(10),\
    unemployed int,\
    educated FLOAT(10),\
    workingage FLOAT(10))"\
)

mycursor.execute("CREATE TABLE IF NOT EXISTS CrimesWithoutHardshipIndex (\
    arrest int,\
    domestic int,\
    district int,\
    ward int,\
    communityarea int,\
    year int,\
    latitude FLOAT(10),\
    longitude FLOAT(10),\
    hour int,\
    month int,\
    severity int,\
    location int,\
    day int)"\
)


In [148]:
from sqlalchemy import create_engine

conn = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}"
                       .format(user=dbCredentials["uname"],
                               pw=dbCredentials["password"],
                               db=dbCredentials["dbName2"]))

In [149]:
beforeAddingCensus.to_sql('CrimesWithoutHardshipIndex', con = conn, if_exists = 'append', chunksize = 1000, index= False)
crimeDF.to_sql('CrimesWithHardshipIndex', con = conn, if_exists = 'append', chunksize = 1000, index= False)

In [150]:
beforeDF = pd.read_sql("select * from "+dbCredentials["dbName2"]+".CrimesWithoutHardshipIndex", conn);
pd.set_option('display.expand_frame_repr', False)
afterDF = pd.read_sql("select * from "+dbCredentials["dbName2"]+".CrimesWithHardshipIndex", conn);
pd.set_option('display.expand_frame_repr', False)

In [151]:
beforeDF.head()

Unnamed: 0,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,severity,location,day
0,0,1,9.0,12,61,2015,41.8151,-87.67,13,9,1,0,6
1,0,0,15.0,29,25,2015,41.8951,-87.7654,11,9,0,6,5
2,1,0,14.0,35,21,2015,41.9374,-87.7167,12,9,0,4,6
3,0,1,15.0,28,25,2015,41.8819,-87.7551,13,9,1,0,6
4,0,0,6.0,21,71,2015,41.7444,-87.6584,10,9,1,0,6


In [152]:
afterDF.head()

Unnamed: 0,arrest,domestic,district,ward,communityarea,year,latitude,longitude,hour,month,severity,location,day,income,belowpoverty,unemployed,educated,workingage
0,0,1,9.0,12,61,2015,41.8151,-87.67,13,9,1,0,6,12765,29.0,23,41.5,38.9
1,0,0,15.0,29,25,2015,41.8951,-87.7654,11,9,0,6,5,15957,28.6,23,24.4,37.9
2,1,0,14.0,35,21,2015,41.9374,-87.7167,12,9,0,4,6,20039,15.3,9,24.7,31.0
3,0,1,15.0,28,25,2015,41.8819,-87.7551,13,9,1,0,6,15957,28.6,23,24.4,37.9
4,0,0,6.0,21,71,2015,41.7444,-87.6584,10,9,1,0,6,15528,27.6,28,18.5,41.9


In [144]:
mycursor.execute("DROP DATABASE IF EXISTS " + dbCredentials["dbName2"])