In [1]:
#Import dependencies
import pandas as pd
import numpy as np
pd.set_option("display.max_rows", None, "display.max_columns", None)

### Read Data from Datasources

In [2]:
#Read csv from datasources
gainesville_crime_data = "C:/Users/szieg/Repositories/FinalProject/Gainesville_Crime.csv"
classifications = "C:/Users/szieg/Repositories/FinalProject/Gainesville_Crime/Classifications.csv"
moonphase = "C:/Users/szieg/Repositories/FinalProject/MoonPhase/MoonPhases.csv"

In [3]:
# Read Gainesville_Crime.csv
gainesville_df = pd.read_csv(gainesville_crime_data)
gainesville_df.head()

Unnamed: 0,ID,Incident Type,Report Date,Offense Date,Report Hour of Day,Report Day of Week,Offense Hour of Day,Offense Day of Week,City,State,Address,Latitude,Longitude,Location
0,221009134,Assist Other Agency,7/2/2021 1:00,7/2/2021 1:00,1,Friday,1,Friday,GAINESVILLE,FL,300 BLK SW WILLISTON RD,29.620543,-82.328759,POINT (-82.328759 29.620542999999998)
1,221009267,Domestic Aggravated Battery,7/4/2021 22:37,7/4/2021 21:24,22,Sunday,21,Sunday,GAINESVILLE,FL,100 BLK NW 39TH AVE,29.688534,-82.326069,POINT (-82.326069 29.688534000000004)
2,221009201,Assist Other Agency,7/3/2021 12:31,7/3/2021 12:25,12,Saturday,12,Saturday,GAINESVILLE,FL,200 BLK SE 16TH AVE,29.634039,-82.326408,POINT (-82.326408 29.634038999999998)
3,121009277,Assist Citizen,7/5/2021 3:27,7/5/2021 3:27,3,Monday,3,Monday,GAINESVILLE,FL,500 BLK NW 8TH AVE,29.659423,-82.329994,POINT (-82.329994 29.659423)
4,221009177,Warrant Arrest,7/2/2021 18:51,7/2/2021 18:51,18,Friday,18,Friday,GAINESVILLE,FL,1500 BLK N MAIN ST,29.66577,-82.324505,POINT (-82.324505 29.66577)


### Clean Gainesville Crime data

In [4]:
# Rename column headers for Gainesville crime

gainesville_df.rename(columns = {'Incident Type': 'CFS',
                                 'Report Date' : 'reportDate',
                                 'Offense Date' : 'offenseDate',
                                 'Report Hour of Day' : 'reportHour',
                                 'Report Day of Week' : 'reportDOW',
                                 'Offense Hour of Day' : 'offenseHour',
                                 'Offense Day of Week' : 'offenseDOW',
                                 'City' : 'city',
                                 'State' : 'state',
                                 'Address' : 'CFSaddress',
                                 'Latitude' : 'CFSlatitude',
                                 'Longitude' : 'CFSlongitude',
                                 'Location' : 'CFSlocation'
                                                            
                                }, inplace = True)


gainesville_df.head()

Unnamed: 0,ID,CFS,reportDate,offenseDate,reportHour,reportDOW,offenseHour,offenseDOW,city,state,CFSaddress,CFSlatitude,CFSlongitude,CFSlocation
0,221009134,Assist Other Agency,7/2/2021 1:00,7/2/2021 1:00,1,Friday,1,Friday,GAINESVILLE,FL,300 BLK SW WILLISTON RD,29.620543,-82.328759,POINT (-82.328759 29.620542999999998)
1,221009267,Domestic Aggravated Battery,7/4/2021 22:37,7/4/2021 21:24,22,Sunday,21,Sunday,GAINESVILLE,FL,100 BLK NW 39TH AVE,29.688534,-82.326069,POINT (-82.326069 29.688534000000004)
2,221009201,Assist Other Agency,7/3/2021 12:31,7/3/2021 12:25,12,Saturday,12,Saturday,GAINESVILLE,FL,200 BLK SE 16TH AVE,29.634039,-82.326408,POINT (-82.326408 29.634038999999998)
3,121009277,Assist Citizen,7/5/2021 3:27,7/5/2021 3:27,3,Monday,3,Monday,GAINESVILLE,FL,500 BLK NW 8TH AVE,29.659423,-82.329994,POINT (-82.329994 29.659423)
4,221009177,Warrant Arrest,7/2/2021 18:51,7/2/2021 18:51,18,Friday,18,Friday,GAINESVILLE,FL,1500 BLK N MAIN ST,29.66577,-82.324505,POINT (-82.324505 29.66577)


In [5]:
# Filter Gainesville_Crimes.csv for date range
start_date = '01-01-2018'
end_date = '12-31-2021'
gainesville_df['offenseDate'] = pd.to_datetime(gainesville_df['offenseDate'])
date_range = (gainesville_df['offenseDate'] > start_date) & (gainesville_df['offenseDate'] <= end_date)
gainesville_df = gainesville_df.loc[date_range]

gainesville_df.head()
gainesville_df.dtypes

ID                       int64
CFS                     object
reportDate              object
offenseDate     datetime64[ns]
reportHour               int64
reportDOW               object
offenseHour              int64
offenseDOW              object
city                    object
state                   object
CFSaddress              object
CFSlatitude            float64
CFSlongitude           float64
CFSlocation             object
dtype: object

In [6]:
#Drop unneeded columns
gainesville_df= gainesville_df.drop(['reportDate', 'reportHour', 'reportDOW','city','state','CFSlocation'],axis=1)
gainesville_df.head()


Unnamed: 0,ID,CFS,offenseDate,offenseHour,offenseDOW,CFSaddress,CFSlatitude,CFSlongitude
0,221009134,Assist Other Agency,2021-07-02 01:00:00,1,Friday,300 BLK SW WILLISTON RD,29.620543,-82.328759
1,221009267,Domestic Aggravated Battery,2021-07-04 21:24:00,21,Sunday,100 BLK NW 39TH AVE,29.688534,-82.326069
2,221009201,Assist Other Agency,2021-07-03 12:25:00,12,Saturday,200 BLK SE 16TH AVE,29.634039,-82.326408
3,121009277,Assist Citizen,2021-07-05 03:27:00,3,Monday,500 BLK NW 8TH AVE,29.659423,-82.329994
4,221009177,Warrant Arrest,2021-07-02 18:51:00,18,Friday,1500 BLK N MAIN ST,29.66577,-82.324505


### Export Gainesville Crime data as CSV

In [7]:
#Create CSV file for gainesville_df data
#gainesville_df.to_csv('CallsForService.csv',index=False)b

### Read, Clean, export as CSV

In [8]:
# Read Classifications.csv
classifications_df = pd.read_csv(classifications)
classifications_df.head()

Unnamed: 0,CFS,CFS_Type,Classification
0,Driving Under the Influence,Alcohol,Government
1,Poss. of Alcohol Under 21 Yoa,Alcohol,Government
2,All Other Liquor Law Viol.,Alcohol,Government
3,Alcohol Beverage-possess by Person Under 21 Yoa,Alcohol,Government
4,Assault (police Officer Aggravated),Assault,Person


In [9]:
#Create CSV file for classifications_df data
#classifications_df.to_csv('Classification.csv',index=False)

### Read, Clean, export as CSV

In [10]:
#Read MoonPhases.csv
# convert fullDate values from object to datetime
moonPhase_df = pd.read_csv(moonphase)
moonPhase_df['fullDate'] = pd.to_datetime(moonPhase_df['fullDate'])
moonPhase_df.head()
moonPhase_df.dtypes

date                 object
month                 int64
day                   int64
year                  int64
fullDate     datetime64[ns]
DOW                  object
time                 object
moonPhase            object
dtype: object

In [11]:
#Create CSV file for moonPhase_df data
#moonPhase_df.to_csv('MoonPhase.csv',index=False)

### Merge datasets

In [14]:
# Merge Gainesville Crime with Classification for CFS_Type and Classifications

gainesville_classified_df = pd.merge(gainesville_df, classifications_df, how= "inner", on=["CFS"])

gainesville_classified_df.head()

Unnamed: 0,ID,CFS,offenseDate,offenseHour,offenseDOW,CFSaddress,CFSlatitude,CFSlongitude,CFS_Type,Classification
0,221009267,Domestic Aggravated Battery,2021-07-04 21:24:00,21,Sunday,100 BLK NW 39TH AVE,29.688534,-82.326069,Battery,Person
1,221009608,Domestic Aggravated Battery,2021-07-11 22:54:00,22,Sunday,4200 BLK SW 21ST PL,29.632687,-82.387148,Battery,Person
2,221009391,Domestic Aggravated Battery,2021-07-07 19:12:00,19,Wednesday,1200 BLK SE 19TH TER,29.640249,-82.29939,Battery,Person
3,221009308,Domestic Aggravated Battery,2021-07-06 07:26:00,7,Tuesday,1000 BLK SW 62ND BLVD,29.641625,-82.398242,Battery,Person
4,221011388,Domestic Aggravated Battery,2021-08-16 17:25:00,17,Monday,100 BLK NW 39TH AVE,29.688534,-82.326069,Battery,Person


In [15]:
# Merged the MoonPhases data with Gainesville Crime and Classifications

gainesville_classified_moon_df = pd.merge(gainesville_classified_df, moonPhase_df, how='left', left_on='offenseDate', right_on='fullDate')

gainesville_classified_moon_df.head()

Unnamed: 0,ID,CFS,offenseDate,offenseHour,offenseDOW,CFSaddress,CFSlatitude,CFSlongitude,CFS_Type,Classification,date,month,day,year,fullDate,DOW,time,moonPhase
0,221009267,Domestic Aggravated Battery,2021-07-04 21:24:00,21,Sunday,100 BLK NW 39TH AVE,29.688534,-82.326069,Battery,Person,,,,,NaT,,,
1,221009608,Domestic Aggravated Battery,2021-07-11 22:54:00,22,Sunday,4200 BLK SW 21ST PL,29.632687,-82.387148,Battery,Person,,,,,NaT,,,
2,221009391,Domestic Aggravated Battery,2021-07-07 19:12:00,19,Wednesday,1200 BLK SE 19TH TER,29.640249,-82.29939,Battery,Person,,,,,NaT,,,
3,221009308,Domestic Aggravated Battery,2021-07-06 07:26:00,7,Tuesday,1000 BLK SW 62ND BLVD,29.641625,-82.398242,Battery,Person,,,,,NaT,,,
4,221011388,Domestic Aggravated Battery,2021-08-16 17:25:00,17,Monday,100 BLK NW 39TH AVE,29.688534,-82.326069,Battery,Person,,,,,NaT,,,


### Export All dataset file to CSV

In [None]:
#gainesville_classified_moon_df.to_csv('TransformGainesville_CrimesALL-Refactored.csv')