In [1]:
import pandas as pd
import numpy as np
import math
from sklearn.preprocessing import LabelEncoder

pd.set_option("display.max_rows", None, "display.max_columns", None)

In [2]:
gainesville_crime_data = "C:/Users/szieg/Repositories/FinalProject/SLZ/Gainesville_Crime.csv"
classifications = "C:/Users/szieg/Repositories/FinalProject/Gainesville_Crime/Classifications.csv"
moonphase = "C:/Users/szieg/Repositories/FinalProject/MoonPhase/MoonPhases.csv"

In [3]:
# Read Gainesville_Crime.csv
gainesville_df = pd.read_csv(gainesville_crime_data)
gainesville_df.head()

Unnamed: 0,ID,IncidentType,Report Date,Offense Date,Report Hour of Day,Report Day of Week,Offense Hour of Day,Offense Day of Week,City,State,Address,Latitude,Longitude,Location
0,221009134,Assist Other Agency,7/2/2021 1:00,7/2/2021 1:00,1,Friday,1,Friday,GAINESVILLE,FL,300 BLK SW WILLISTON RD,29.620543,-82.328759,POINT (-82.328759 29.620542999999998)
1,221009267,Domestic Aggravated Battery,7/4/2021 22:37,7/4/2021 21:24,22,Sunday,21,Sunday,GAINESVILLE,FL,100 BLK NW 39TH AVE,29.688534,-82.326069,POINT (-82.326069 29.688534000000004)
2,221009201,Assist Other Agency,7/3/2021 12:31,7/3/2021 12:25,12,Saturday,12,Saturday,GAINESVILLE,FL,200 BLK SE 16TH AVE,29.634039,-82.326408,POINT (-82.326408 29.634038999999998)
3,121009277,Assist Citizen,7/5/2021 3:27,7/5/2021 3:27,3,Monday,3,Monday,GAINESVILLE,FL,500 BLK NW 8TH AVE,29.659423,-82.329994,POINT (-82.329994 29.659423)
4,221009177,Warrant Arrest,7/2/2021 18:51,7/2/2021 18:51,18,Friday,18,Friday,GAINESVILLE,FL,1500 BLK N MAIN ST,29.66577,-82.324505,POINT (-82.324505 29.66577)


In [4]:
# Rename column headers for Gainesville crime

gainesville_df.rename(columns = {'IncidentType': 'CFS',
                                 'Report Date' : 'reportDate',
                                 'Offense Date' : 'offenseDate',
                                 'Report Hour of Day' : 'reportHour',
                                 'Report Day of Week' : 'reportDOW',
                                 'Offense Hour of Day' : 'offenseHour',
                                 'Offense Day of Week' : 'offenseDOW',
                                 'City' : 'city',
                                 'State' : 'state',
                                 'Address' : 'address',
                                 'Latitude' : 'latitude',
                                 'Longitude' : 'longitude',
                                 'Location' : 'location'
                                                            
                                }, inplace = True)


gainesville_df.head()

Unnamed: 0,ID,CFS,reportDate,offenseDate,reportHour,reportDOW,offenseHour,offenseDOW,city,state,address,latitude,longitude,location
0,221009134,Assist Other Agency,7/2/2021 1:00,7/2/2021 1:00,1,Friday,1,Friday,GAINESVILLE,FL,300 BLK SW WILLISTON RD,29.620543,-82.328759,POINT (-82.328759 29.620542999999998)
1,221009267,Domestic Aggravated Battery,7/4/2021 22:37,7/4/2021 21:24,22,Sunday,21,Sunday,GAINESVILLE,FL,100 BLK NW 39TH AVE,29.688534,-82.326069,POINT (-82.326069 29.688534000000004)
2,221009201,Assist Other Agency,7/3/2021 12:31,7/3/2021 12:25,12,Saturday,12,Saturday,GAINESVILLE,FL,200 BLK SE 16TH AVE,29.634039,-82.326408,POINT (-82.326408 29.634038999999998)
3,121009277,Assist Citizen,7/5/2021 3:27,7/5/2021 3:27,3,Monday,3,Monday,GAINESVILLE,FL,500 BLK NW 8TH AVE,29.659423,-82.329994,POINT (-82.329994 29.659423)
4,221009177,Warrant Arrest,7/2/2021 18:51,7/2/2021 18:51,18,Friday,18,Friday,GAINESVILLE,FL,1500 BLK N MAIN ST,29.66577,-82.324505,POINT (-82.324505 29.66577)


In [5]:
# Filter Gainesville_Crimes.csv for date range
start_date = '01-01-2018'
end_date = '12-31-2021'
gainesville_df['offenseDate'] = pd.to_datetime(gainesville_df['offenseDate']) 
date_range = (gainesville_df['offenseDate'] > start_date) & (gainesville_df['offenseDate'] <= end_date)
gainesville_df = gainesville_df.loc[date_range]
gainesville_df.head()


Unnamed: 0,ID,CFS,reportDate,offenseDate,reportHour,reportDOW,offenseHour,offenseDOW,city,state,address,latitude,longitude,location
0,221009134,Assist Other Agency,7/2/2021 1:00,2021-07-02 01:00:00,1,Friday,1,Friday,GAINESVILLE,FL,300 BLK SW WILLISTON RD,29.620543,-82.328759,POINT (-82.328759 29.620542999999998)
1,221009267,Domestic Aggravated Battery,7/4/2021 22:37,2021-07-04 21:24:00,22,Sunday,21,Sunday,GAINESVILLE,FL,100 BLK NW 39TH AVE,29.688534,-82.326069,POINT (-82.326069 29.688534000000004)
2,221009201,Assist Other Agency,7/3/2021 12:31,2021-07-03 12:25:00,12,Saturday,12,Saturday,GAINESVILLE,FL,200 BLK SE 16TH AVE,29.634039,-82.326408,POINT (-82.326408 29.634038999999998)
3,121009277,Assist Citizen,7/5/2021 3:27,2021-07-05 03:27:00,3,Monday,3,Monday,GAINESVILLE,FL,500 BLK NW 8TH AVE,29.659423,-82.329994,POINT (-82.329994 29.659423)
4,221009177,Warrant Arrest,7/2/2021 18:51,2021-07-02 18:51:00,18,Friday,18,Friday,GAINESVILLE,FL,1500 BLK N MAIN ST,29.66577,-82.324505,POINT (-82.324505 29.66577)


In [6]:
drop_columns = ['reportDate', 'reportHour', 'reportDOW','city','state','location','address']
gainesville_df = gainesville_df.drop(drop_columns, axis = 1)
gainesville_df.head()

Unnamed: 0,ID,CFS,offenseDate,offenseHour,offenseDOW,latitude,longitude
0,221009134,Assist Other Agency,2021-07-02 01:00:00,1,Friday,29.620543,-82.328759
1,221009267,Domestic Aggravated Battery,2021-07-04 21:24:00,21,Sunday,29.688534,-82.326069
2,221009201,Assist Other Agency,2021-07-03 12:25:00,12,Saturday,29.634039,-82.326408
3,121009277,Assist Citizen,2021-07-05 03:27:00,3,Monday,29.659423,-82.329994
4,221009177,Warrant Arrest,2021-07-02 18:51:00,18,Friday,29.66577,-82.324505


In [22]:
le = LabelEncoder()

gainesville_numeric_df = gainesville_df

gainesville_numeric_df['offenseDOW'] = le.fit_transform(gainesville_df['offenseDOW'])

gainesville_numeric_df['offenseMonth'] = gainesville_df['offenseDate'].dt.month
gainesville_numeric_df['offenseYear'] = gainesville_df['offenseDate'].dt.year
gainesville_numeric_df['offenseDate'] = gainesville_df['offenseDate'].dt.date
gainesville_numeric_df.head()


Unnamed: 0,ID,CFS,offenseDate,offenseHour,offenseDOW,latitude,longitude,offenseMonth,offenseYear
0,221009134,Assist Other Agency,2021-07-02,1,0,29.620543,-82.328759,7,2021
1,221009267,Domestic Aggravated Battery,2021-07-04,21,3,29.688534,-82.326069,7,2021
2,221009201,Assist Other Agency,2021-07-03,12,2,29.634039,-82.326408,7,2021
3,121009277,Assist Citizen,2021-07-05,3,1,29.659423,-82.329994,7,2021
4,221009177,Warrant Arrest,2021-07-02,18,0,29.66577,-82.324505,7,2021


In [8]:
#gainesville_numeric_df= gainesville_numeric_df.drop(['offenseDate'], axis=1)
#gainesville_numeric_df.head()

In [9]:
# Read Classifications.csv
classifications_df = pd.read_csv(classifications)
classifications_df.head()

Unnamed: 0,CFS,CFS_Type,Classification
0,Driving Under the Influence,Alcohol,Government
1,Poss. of Alcohol Under 21 Yoa,Alcohol,Government
2,All Other Liquor Law Viol.,Alcohol,Government
3,Alcohol Beverage-possess by Person Under 21 Yoa,Alcohol,Government
4,Assault (police Officer Aggravated),Assault,Person


In [23]:
le = LabelEncoder()

classification_numeric_df = classifications_df

classification_numeric_df['CFS_Type'] = le.fit_transform(classifications_df['CFS_Type'])
classification_numeric_df['Classification'] = le.fit_transform(classifications_df['Classification'])
classification_numeric_df.head(50)

Unnamed: 0,CFS,CFS_Type,Classification
0,Driving Under the Influence,0,0
1,Poss. of Alcohol Under 21 Yoa,0,0
2,All Other Liquor Law Viol.,0,0
3,Alcohol Beverage-possess by Person Under 21 Yoa,0,0
4,Assault (police Officer Aggravated),1,2
5,Domestic Aggravated Assualt,1,2
6,Assault (aggravated),1,2
7,Domestic Assault,1,2
8,Assault (simple),1,2
9,Verbal Threats,1,2


In [11]:
#classification_numeric_df = pd.get_dummies(classifications_df, columns=['CFS_Type','Classification'])

#classification_numeric_df.head()


In [12]:
#Read MoonPhases.csv
# convert fullDate values from object to datetime
moonPhase_df = pd.read_csv(moonphase, sep=',')
moonPhase_df['fullDate'] = pd.to_datetime(moonPhase_df['fullDate'])
moonPhase_df = moonPhase_df[['month','day','year','fullDate','DOW','moonPhase']]

moonPhase_df.head()

Unnamed: 0,month,day,year,fullDate,DOW,moonPhase
0,1,16,2018,2018-01-16,Tuesday,New Moon
1,2,15,2018,2018-02-15,Thursday,New Moon
2,3,17,2018,2018-03-17,Saturday,New Moon
3,4,15,2018,2018-04-15,Sunday,New Moon
4,5,15,2018,2018-05-15,Tuesday,New Moon


In [13]:
le = LabelEncoder()

moonPhase_numeric_df = moonPhase_df

moonPhase_numeric_df['DOW'] = le.fit_transform(moonPhase_df['DOW'])


moonPhase_numeric_df.head()

Unnamed: 0,month,day,year,fullDate,DOW,moonPhase
0,1,16,2018,2018-01-16,5,New Moon
1,2,15,2018,2018-02-15,4,New Moon
2,3,17,2018,2018-03-17,2,New Moon
3,4,15,2018,2018-04-15,3,New Moon
4,5,15,2018,2018-05-15,5,New Moon


In [14]:
#moonPhase_numeric_df = pd.get_dummies(moonPhase_df, columns=['DOW','moonPhase'])
#moonPhase_numeric_df.fillna(0)      
#moonPhase_numeric_df.head()

In [15]:
# Merge Gainesville Crime with Classification for CFS_Type and Classifications

gainesville_classified_df = pd.merge(gainesville_numeric_df, classification_numeric_df, how= "inner", on=["CFS"])

gainesville_classified_df.head()

Unnamed: 0,ID,CFS,offenseDate,offenseHour,offenseDOW,latitude,longitude,offenseMonth,offenseYear,CFS_Type,Classification
0,221009267,Domestic Aggravated Battery,2021-07-04 21:24:00,21,3,29.688534,-82.326069,7,2021-07-04,2,2
1,221009608,Domestic Aggravated Battery,2021-07-11 22:54:00,22,3,29.632687,-82.387148,7,2021-07-11,2,2
2,221009391,Domestic Aggravated Battery,2021-07-07 19:12:00,19,6,29.640249,-82.29939,7,2021-07-07,2,2
3,221009308,Domestic Aggravated Battery,2021-07-06 07:26:00,7,5,29.641625,-82.398242,7,2021-07-06,2,2
4,221011388,Domestic Aggravated Battery,2021-08-16 17:25:00,17,1,29.688534,-82.326069,8,2021-08-16,2,2


In [16]:
# Merged the MoonPhases data with Gainesville Crime and Classifications

gainesville_classified_moon_df = pd.merge(gainesville_classified_df, moonPhase_df, how='left', left_on='offenseDate', right_on='fullDate')

gainesville_classified_moon_df.head(10)

Unnamed: 0,ID,CFS,offenseDate,offenseHour,offenseDOW,latitude,longitude,offenseMonth,offenseYear,CFS_Type,Classification,month,day,year,fullDate,DOW,moonPhase
0,221009267,Domestic Aggravated Battery,2021-07-04 21:24:00,21,3,29.688534,-82.326069,7,2021-07-04,2,2,,,,NaT,,
1,221009608,Domestic Aggravated Battery,2021-07-11 22:54:00,22,3,29.632687,-82.387148,7,2021-07-11,2,2,,,,NaT,,
2,221009391,Domestic Aggravated Battery,2021-07-07 19:12:00,19,6,29.640249,-82.29939,7,2021-07-07,2,2,,,,NaT,,
3,221009308,Domestic Aggravated Battery,2021-07-06 07:26:00,7,5,29.641625,-82.398242,7,2021-07-06,2,2,,,,NaT,,
4,221011388,Domestic Aggravated Battery,2021-08-16 17:25:00,17,1,29.688534,-82.326069,8,2021-08-16,2,2,,,,NaT,,
5,221011524,Domestic Aggravated Battery,2021-08-19 07:30:00,7,4,29.631246,-82.319771,8,2021-08-19,2,2,,,,NaT,,
6,221012057,Domestic Aggravated Battery,2021-08-28 16:24:00,16,2,29.704114,-82.372561,8,2021-08-28,2,2,,,,NaT,,
7,221012231,Domestic Aggravated Battery,2021-08-31 23:53:00,23,5,29.684413,-82.305793,8,2021-08-31,2,2,,,,NaT,,
8,221012341,Domestic Aggravated Battery,2021-09-02 19:04:00,19,4,29.696642,-82.384909,9,2021-09-02,2,2,,,,NaT,,
9,221013249,Domestic Aggravated Battery,2021-09-19 19:35:00,19,3,29.616533,-82.367391,9,2021-09-19,2,2,,,,NaT,,


In [17]:
#gainesville_classified_moon_df.replace(np.nan,0)
gainesville_classified_moon_df = gainesville_classified_moon_df.replace(np.nan,0)


gainesville_classified_moon_df.head()

Unnamed: 0,ID,CFS,offenseDate,offenseHour,offenseDOW,latitude,longitude,offenseMonth,offenseYear,CFS_Type,Classification,month,day,year,fullDate,DOW,moonPhase
0,221009267,Domestic Aggravated Battery,2021-07-04 21:24:00,21,3,29.688534,-82.326069,7,2021-07-04,2,2,0.0,0.0,0.0,0,0.0,0
1,221009608,Domestic Aggravated Battery,2021-07-11 22:54:00,22,3,29.632687,-82.387148,7,2021-07-11,2,2,0.0,0.0,0.0,0,0.0,0
2,221009391,Domestic Aggravated Battery,2021-07-07 19:12:00,19,6,29.640249,-82.29939,7,2021-07-07,2,2,0.0,0.0,0.0,0,0.0,0
3,221009308,Domestic Aggravated Battery,2021-07-06 07:26:00,7,5,29.641625,-82.398242,7,2021-07-06,2,2,0.0,0.0,0.0,0,0.0,0
4,221011388,Domestic Aggravated Battery,2021-08-16 17:25:00,17,1,29.688534,-82.326069,8,2021-08-16,2,2,0.0,0.0,0.0,0,0.0,0


In [18]:
summary = gainesville_classified_moon_df.groupby(['moonPhase','CFS'])[['CFS']].count()
summary.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,CFS
moonPhase,CFS,Unnamed: 2_level_1
0,Affray,144
0,Aircraft Incident,2
0,All Other Liquor Law Viol.,2
0,Animal Attack,52
0,Animal Cruelty,15


In [19]:
#df = pd.DataFrame(gainesville_classified_df.CFS_Type.unique(), columns=['UniqueCFSType'])

#df.head(230)

In [20]:
summary.to_csv('summarydf.csv')

In [21]:
#writer = pd.ExcelWriter('dfSummary.xlsx', engine = 'xlsxwriter')

#gainesville_classified_moon_df.to_excel(writer, sheet_name = '1', index = True)
#writer.save()