In [2]:
import pandas as pd

df = pd.read_csv('datasets/crimes-in-baltimore.csv')

# Mengubah tipe data kolom "CrimeDateTime" menjadi datetime
df['CrimeDateTime'] = pd.to_datetime(df['CrimeDateTime'], errors='coerce')

# Mengidentifikasi baris yang menghasilkan kesalahan
out_of_bounds_rows = df[df['CrimeDateTime'].isna()]

# Menghapus baris yang menghasilkan kesalahan
df = df.drop(out_of_bounds_rows.index)

# Memisahkan kolom "CrimeDateTime" menjadi "CrimeDate" dan "CrimeTime"
df['CrimeDate'] = df['CrimeDateTime'].dt.strftime('%d/%m/%Y')
df['CrimeTime'] = df['CrimeDateTime'].dt.strftime('%H:%M:%S')

# Menghapus beberapa kolom yang tidak dibutuhkan
df = df.drop(['CrimeDateTime', 'RowID', 'CCNO', 'Gender', 'Age', 'Race', 'Ethnicity'], axis=1)

# Memindahkan kolom "CrimeDate" ke posisi 0
crime_date = df.pop('CrimeDate')
df.insert(0, 'CrimeDate', crime_date)

# Memindahkan kolom "CrimeTime" ke posisi 1
crime_time = df.pop('CrimeTime')
df.insert(1, 'CrimeTime', crime_time)

# Sort the DataFrame by 'CrimeDate' and 'CrimeTime'
df = df.sort_values(['CrimeDate', 'CrimeTime'])

# Reset the index of the sorted DataFrame
df = df.reset_index(drop=True)

print('Total Data: ' + str(len(df)))
print('======================')
df.head()

Total Data: 566643


Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside_Outside,Weapon,Post,District,Neighborhood,Latitude,Longitude,GeoLocation,Premise,Total_Incidents
0,01/01/1966,01:00:00,2A,900 STAMFORD RD,RAPE,I,OTHER,823.0,SOUTHWEST,WEST HILLS,39.296306,-76.709401,"(39.296305974752784,-76.709400769778497)",ROW/TOWNHOUSE-OCC,1
1,01/01/1978,00:00:00,2A,3600 W BELVEDERE AVE,RAPE,,OTHER,633.0,NORTHWEST,ARLINGTON,39.346619,-76.680174,"(39.346618976119643,-76.680173520662322)",,1
2,01/01/1978,00:00:00,2A,4300 PARK HEIGHTS AVE,RAPE,I,OTHER,614.0,NORTHWEST,CENTRAL PARK HEIGHTS,39.338898,-76.665462,"(39.338898480013256,-76.665462300931694)",ROW/TOWNHOUSE-OCC,1
3,01/01/1978,10:30:00,2A,1900 ARGONNE DR,RAPE,I,OTHER,421.0,NORTHEAST,MORGAN STATE UNIVERSITY,39.340748,-76.582734,"(39.340747701096049,-76.582734031623119)",ROW/TOWNHOUSE-OCC,1
4,01/01/1979,00:01:00,2A,800 BENNINGHAUS RD,RAPE,I,OTHER,523.0,NORTHERN,BELVEDERE,39.360133,-76.603973,"(39.360132718158731,-76.603973247731915)",ROW/TOWNHOUSE-OCC,1


In [3]:
# Mengubah format datetime dari kolom CrimeDate 
df['CrimeDate'] = pd.to_datetime(df['CrimeDate'], format='%d/%m/%Y')
# Filter berdasarkan tahunnya (2012-2018)
filtered_df = df[(df['CrimeDate'].dt.year >= 2012) & (df['CrimeDate'].dt.year <= 2018)]

# Simpan sebagai file csv baru
filtered_df.to_csv('datasets/filtered.csv')

print(len(filtered_df))
filtered_df.tail()

339346


Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside_Outside,Weapon,Post,District,Neighborhood,Latitude,Longitude,GeoLocation,Premise,Total_Incidents
566273,2018-12-31,23:01:00,6B,400 N KENWOOD AVE,LARCENY,O,,221.0,SOUTHEAST,MCELDERRY PARK,39.295899,-76.578039,"(39.295898558427403,-76.578038752635862)",STREET,1
566274,2018-12-31,23:30:00,2A,600 FALLSWAY,RAPE,,OTHER,324.0,EASTERN,PENN-FALLSWAY,39.296367,-76.61,"(39.296366658410619,-76.609999519334821)",,1
566275,2018-12-31,23:30:00,7A,2500 W BALTIMORE ST,AUTO THEFT,O,,835.0,SOUTHWEST,SHIPLEY HILL,39.287866,-76.656607,"(39.287865937854249,-76.656606575406443)",STREET,1
566276,2018-12-31,23:30:00,6E,2400 W LANVALE ST,LARCENY,O,,721.0,WESTERN,EVERGREEN LAWN,39.297158,-76.655085,"(39.297157727607015,-76.655085093749847)",STREET,1
566277,2018-12-31,23:32:00,6J,1500 MOSHER ST,LARCENY,O,,724.0,WESTERN,SANDTOWN-WINCHESTER,39.300143,-76.642596,"(39.300143010886039,-76.642596171264387)",STREET,1


In [4]:
# Lakukan filter SHOOTING khusus untuk kolom Description
df_shooting = filtered_df[filtered_df['Description'] == 'SHOOTING']

# Simpan sebagai file csv baru
df_shooting.to_csv('datasets/shooting.csv')

print(len(df_shooting))
df_shooting.tail()

3116


Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside_Outside,Weapon,Post,District,Neighborhood,Latitude,Longitude,GeoLocation,Premise,Total_Incidents
566017,2016-12-31,17:58:00,9S,0 S HILTON ST,SHOOTING,Outside,FIREARM,835.0,SOUTHWEST,SAINT JOSEPHS,39.285422,-76.672448,"(39.285422471524996,-76.672448261620346)",PARKING LOT,1
566257,2018-12-31,22:08:00,9S,2800 SPRINGHILL AV,SHOOTING,Outside,FIREARM,612.0,NORTHWEST,PARK CIRCLE,39.331266,-76.662881,"(39.331266198075639,-76.66288100157351)",STREET,1
566258,2018-12-31,22:08:00,9S,700 WHITMORE AV,SHOOTING,Outside,FIREARM,721.0,WESTERN,EVERGREEN LAWN,39.295557,-76.658166,"(39.2955565493506,-76.658166447821344)",PUBLIC AREA,1
566263,2018-12-31,22:10:00,9S,1200 E PRESTON ST,SHOOTING,Outside,FIREARM,314.0,EASTERN,OLIVER,39.305131,-76.601749,"(39.305130593546622,-76.601748899565564)",PUBLIC AREA,1
566264,2018-12-31,22:10:00,9S,1200 E PRESTON ST,SHOOTING,Outside,FIREARM,314.0,EASTERN,OLIVER,39.305131,-76.601749,"(39.305130593546622,-76.601748899565564)",PUBLIC AREA,1
