In [1]:
# Import pandas and sqlite3

import pandas as pd
import sqlite3

**https://data.louisvilleky.gov/**

**https://data.louisvilleky.gov/datasets/a220289a40c945298d7f9d5c8dc7b3c0_0/explore**

In [2]:
# Create a dataframe from the .csv obtained from the address above

data = pd.read_csv('Louisville_Metro_KY_-_Crime_Data_2024.csv')

data.head(3)

Unnamed: 0,incident_number,date_reported,date_occurred,badge_id,offense_classification,offense_code_name,nibrs_code,nibrs_group_name,was_offense_completed,lmpd_division,lmpd_beat,location_category,block_address,city,zip_code,ObjectId
0,LMPD24056997,2024/05/17 20:32:00+00,2024/04/14 21:49:00+00,5314.0,11 SIMPLE ASSAULT,ASSAULT - 4TH DEGREE (DOMESTIC VIOLENCE) MINOR...,13B,A,YES,7TH DIVISION,735,RESIDENCE/HOME,4600 BLOCK LOR ANN AVE,LOUISVILLE,40219,1
1,LMPD24054715,2024/05/11 12:37:00+00,2024/05/11 03:00:00+00,8358.0,21 THEFT FR VEH,TBUT OR DISP CONTENTS FROM VEH 514.030 24140 23F,23F,A,YES,7TH DIVISION,723,PARKING/ DROP LOT/ GARAGE,7600 BLOCK MOUNTAIN VIEW CIR,LOUISVILLE,40228,2
2,LMPD24056995,2024/05/17 20:19:00+00,2024/05/05 06:00:00+00,7166.0,9 AGGRAVATED ASSAULT,ASSAULT - 1ST DEGREE 508.010 13150 13A,13A,A,YES,3RD DIVISION,315,BAR/NIGHTCLUB,8200 BLOCK DIXIE HWY,LOUISVILLE,40202,3


In [3]:
# Overwrite was_offense_completed column with 1,0 versions of the given 'Boolean' 'YES':'NO'

data['was_offense_completed'] = data.was_offense_completed.map({'YES':1, 'NO':0})

In [4]:
# Rename ID column to a format more friendly to our SQL conversion

data = data.rename(columns={'ObjectId':'object_id'})

In [5]:
# Isolate dates and times into separate columns from the string 'datetime' values given in the .csv for both date_reported and
# date_occurred

data['date_reported'] = pd.to_datetime(data['date_reported'])

data['time_reported'] = data['date_reported'].dt.time

data['date_reported'] = data['date_reported'].dt.date

data['date_occurred'] = pd.to_datetime(data['date_occurred'])

data['time_occurred'] = data['date_occurred'].dt.time

data['date_occurred'] = data['date_occurred'].dt.date

In [6]:
# Rearrange column names into the desired order

data = data.reindex(columns=['object_id', 'incident_number', 'date_reported', 'time_reported','date_occurred', 'time_occurred', 
                         'badge_id', 'offense_classification', 'offense_code_name', 'nibrs_code', 'nibrs_group_name', 
                         'was_offense_completed', 'lmpd_division', 'lmpd_beat', 'location_category', 'block_address', 
                         'city', 'zip_code'])

In [7]:
# Inspect the DataFrame to confirm that our transformations have executed as expected

data.head(3)

Unnamed: 0,object_id,incident_number,date_reported,time_reported,date_occurred,time_occurred,badge_id,offense_classification,offense_code_name,nibrs_code,nibrs_group_name,was_offense_completed,lmpd_division,lmpd_beat,location_category,block_address,city,zip_code
0,1,LMPD24056997,2024-05-17,20:32:00,2024-04-14,21:49:00,5314.0,11 SIMPLE ASSAULT,ASSAULT - 4TH DEGREE (DOMESTIC VIOLENCE) MINOR...,13B,A,1,7TH DIVISION,735,RESIDENCE/HOME,4600 BLOCK LOR ANN AVE,LOUISVILLE,40219
1,2,LMPD24054715,2024-05-11,12:37:00,2024-05-11,03:00:00,8358.0,21 THEFT FR VEH,TBUT OR DISP CONTENTS FROM VEH 514.030 24140 23F,23F,A,1,7TH DIVISION,723,PARKING/ DROP LOT/ GARAGE,7600 BLOCK MOUNTAIN VIEW CIR,LOUISVILLE,40228
2,3,LMPD24056995,2024-05-17,20:19:00,2024-05-05,06:00:00,7166.0,9 AGGRAVATED ASSAULT,ASSAULT - 1ST DEGREE 508.010 13150 13A,13A,A,1,3RD DIVISION,315,BAR/NIGHTCLUB,8200 BLOCK DIXIE HWY,LOUISVILLE,40202


In [8]:
#Open connection and drop table if it exists (from previous runs)
conn = sqlite3.connect('crime.db')

conn.execute('DROP TABLE IF EXISTS CRIME')

<sqlite3.Cursor at 0x1392df40740>

**https://www.sqlite.org/datatype3.html**

In [9]:
# Use connection to create a table to store the data from our .csv

conn.execute('''
         CREATE TABLE IF NOT EXISTS CRIME 
         (OBJECT_ID INT PRIMARY KEY NOT NULL,
         INCIDENT_NUMBER TEXT,
         DATE_REPORTED TEXT,
         TIME_REPORTED TEXT,
         DATE_OCCURRED TEXT,
         TIME_OCCURRED TEXT,
         BADGE_ID INT,
         OFFENSE_CLASSIFICATION TEXT,
         OFFENSE_CODE_NAME TEXT,
         NIBRS_CODE TEXT,
         NIBRS_GROUP_NAME TEXT,
         WAS_OFFENSE_COMPLETED INT,
         LMPD_DIVISION TEXT,
         LMPD_BEAT INT,
         LOCATION_CATEGORY TEXT,
         BLOCK_ADDRESS TEXT,
         CITY TEXT,
         ZIP_CODE INT);''')

<sqlite3.Cursor at 0x1392df40e40>

In [10]:
# Use our DataFrame to push the data it contains to the SQL table that we just created 

data.to_sql("CRIME", conn, if_exists='append', index=False)

22904

# To browse this database I've downloaded a tool from:

**https://sqlitebrowser.org/**

In [11]:
# Create a query to isolate crime data for the zip code 40202 limited to 100 rows

query_40202 = """SELECT OBJECT_ID, DATE_OCCURRED, OFFENSE_CLASSIFICATION, ZIP_CODE, BLOCK_ADDRESS FROM CRIME WHERE 
                ZIP_CODE = 40202 LIMIT 100"""

In [12]:
# Utilize pd.read_sql_query to execute the query passing our query_40202 variable as a string and the connection object to the
# database file

df_40202 = pd.read_sql_query(query_40202, conn)

In [13]:
# Inspect the DataFrame to ensure this has worked as expected

df_40202

Unnamed: 0,OBJECT_ID,DATE_OCCURRED,OFFENSE_CLASSIFICATION,ZIP_CODE,BLOCK_ADDRESS
0,3,2024-05-05,9 AGGRAVATED ASSAULT,40202,8200 BLOCK DIXIE HWY
1,82,2024-05-11,8 ROBBERY,40202,700 BLOCK MARSHALL ST
2,89,2024-05-11,56 ALL OTHER OFFENSES,40202,400 BLOCK S 3RD ST
3,90,2024-05-11,51 DISORDERLY CONDUCT,40202,300 BLOCK W MUHAMMAD ALI BLVD
4,91,2024-05-11,52 DRUNKENNESS,40202,400 BLOCK S 3RD ST
...,...,...,...,...,...
95,1926,2024-05-03,24 VANDALISM,40202,800 BLOCK W BROADWAY
96,1931,2024-05-03,21 THEFT FR VEH,40202,500 BLOCK W BROADWAY
97,1947,2024-05-03,19 THEFT FR BLDG,40202,400 BLOCK W MARKET ST
98,1951,2024-05-02,12 INTIMIDATION,40202,400 BLOCK S 2ND ST


In [14]:
# Create a query to return the first 5 rows from the database file

query_all = "SELECT * FROM CRIME LIMIT 5"

# Read our query into a DataFrame

data_peek = pd.read_sql_query(query_all, conn)

# Inspect our DataFrame

data_peek

Unnamed: 0,OBJECT_ID,INCIDENT_NUMBER,DATE_REPORTED,TIME_REPORTED,DATE_OCCURRED,TIME_OCCURRED,BADGE_ID,OFFENSE_CLASSIFICATION,OFFENSE_CODE_NAME,NIBRS_CODE,NIBRS_GROUP_NAME,WAS_OFFENSE_COMPLETED,LMPD_DIVISION,LMPD_BEAT,LOCATION_CATEGORY,BLOCK_ADDRESS,CITY,ZIP_CODE
0,1,LMPD24056997,2024-05-17,20:32:00.000000,2024-04-14,21:49:00.000000,5314,11 SIMPLE ASSAULT,ASSAULT - 4TH DEGREE (DOMESTIC VIOLENCE) MINOR...,13B,A,1,7TH DIVISION,735,RESIDENCE/HOME,4600 BLOCK LOR ANN AVE,LOUISVILLE,40219
1,2,LMPD24054715,2024-05-11,12:37:00.000000,2024-05-11,03:00:00.000000,8358,21 THEFT FR VEH,TBUT OR DISP CONTENTS FROM VEH 514.030 24140 23F,23F,A,1,7TH DIVISION,723,PARKING/ DROP LOT/ GARAGE,7600 BLOCK MOUNTAIN VIEW CIR,LOUISVILLE,40228
2,3,LMPD24056995,2024-05-17,20:19:00.000000,2024-05-05,06:00:00.000000,7166,9 AGGRAVATED ASSAULT,ASSAULT - 1ST DEGREE 508.010 13150 13A,13A,A,1,3RD DIVISION,315,BAR/NIGHTCLUB,8200 BLOCK DIXIE HWY,LOUISVILLE,40202
3,4,LMPD24056921,2024-05-17,17:09:00.000000,2024-02-21,05:01:00.000000,8560,28 FRAUD CREDIT CARD/ATM,FRAUDULENT USE OF A CREDIT CARD $500 < $1000 4...,26B,A,1,7TH DIVISION,723,DEPARTMENT/DISCOUNT STORE,7100 BLOCK CEDAR SPRINGS BLVD,LOUISVILLE,40291
4,5,LMPD24054714,2024-05-11,12:17:00.000000,2024-05-11,03:00:00.000000,8560,21 THEFT FR VEH,TBUT OR DISP CONTENTS FROM VEH 514.030 24140 23F,23F,A,0,3RD DIVISION,322,RESIDENCE/HOME,600 BLOCK IROQUOIS AVE,LOUISVILLE,40214


In [15]:
# Close our connection to the database to prevent database locking 

conn.close()