## Los Angeles crimes - data analysis

In [6]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# importing data and displaying top 5 records
crimes = pd.read_csv("crimes.csv", parse_dates=["Date Rptd", "DATE OCC"], dtype={"TIME OCC": str})
crimes.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA NAME,Crm Cd Desc,Vict Age,Vict Sex,Vict Descent,Weapon Desc,Status Desc,LOCATION
0,220314085,2022-07-22,2020-05-12,1110,Southwest,THEFT OF IDENTITY,27,F,B,,Invest Cont,2500 S SYCAMORE AV
1,222013040,2022-08-06,2020-06-04,1620,Olympic,THEFT OF IDENTITY,60,M,H,,Invest Cont,3300 SAN MARINO ST
2,220614831,2022-08-18,2020-08-17,1200,Hollywood,THEFT OF IDENTITY,28,M,H,,Invest Cont,1900 TRANSIENT
3,231207725,2023-02-27,2020-01-27,635,77th Street,THEFT OF IDENTITY,37,M,H,,Invest Cont,6200 4TH AV
4,220213256,2022-07-14,2020-07-14,900,Rampart,THEFT OF IDENTITY,79,M,B,,Invest Cont,1200 W 7TH ST


In [11]:
# checking data types
crimes.dtypes

DR_NO                    int64
Date Rptd       datetime64[ns]
DATE OCC        datetime64[ns]
TIME OCC                 int32
AREA NAME               object
Crm Cd Desc             object
Vict Age                 int64
Vict Sex                object
Vict Descent            object
Weapon Desc             object
Status Desc             object
LOCATION                object
dtype: object

In [12]:
# finding the peak crime hour
crime_pivot = crimes.pivot_table(index='TIME OCC', values='DR_NO', aggfunc='count')
peak_crime_hour_1 = str(crime_pivot[crime_pivot['DR_NO'] == crime_pivot['DR_NO'].max()].index[0])
peak_crime_hour = int(peak_crime_hour_1[:2])
peak_crime_hour

12

In [15]:
# finding an area which has the highest number of night crimes
# night crimes - between 2200 and 2359, between 1 and 359
crimes_copy = crimes
crimes_copy['TIME OCC'] = crimes_copy['TIME OCC'].astype(int)
crimes_filtered = crimes_copy[np.logical_or(crimes_copy['TIME OCC'].between(2200, 2359), crimes_copy['TIME OCC'].between(1, 359))]
crimes_loc = crimes_filtered.pivot_table(index='AREA NAME', values='DR_NO', aggfunc='count')
peak_night_crime_location = str(crimes_loc[crimes_loc['DR_NO'] == crimes_loc['DR_NO'].max()].index[0])
peak_night_crime_location

'Central'

In [14]:
# number of victims per indicated age groups
crimes3 = crimes
age_group_conditions = [
    (crimes3['Vict Age'].between(0, 17)),
    (crimes3['Vict Age'].between(18, 25)),
    (crimes3['Vict Age'].between(26, 34)),
    (crimes3['Vict Age'].between(35, 44)),
    (crimes3['Vict Age'].between(45, 54)),
    (crimes3['Vict Age'].between(55, 64)),
    (crimes3['Vict Age'] >= 65)
]
age_groups = ['0-17', '18-25', '26-34', '35-44', '45-54', '55-64', '65+']
crimes3['age_group'] = np.select(age_group_conditions, age_groups, default='')
victim_ages = crimes3[['age_group', 'DR_NO']].groupby('age_group').agg('count')
victim_ages = victim_ages['DR_NO'].rename('frequency_of_crimes')
victim_ages

age_group
0-17      4528
18-25    28291
26-34    47470
35-44    42157
45-54    28353
55-64    20169
65+      14747
Name: frequency_of_crimes, dtype: int64