# LAPD calls for service: Domestic violence cases

In [1]:
%load_ext lab_black

In [2]:
import json
import glob
import io
import os
import pandas as pd
import numpy as np
import altair as alt
import altair_latimes as lat

pd.options.display.max_columns = 50
pd.options.display.max_rows = 50

### URL codes for City of LA open data portal

In [3]:
codes = [
    "r4ka-x5je",
    "nayp-w2tw",
    "ryvm-a59m",
    "xwgr-xw5q",
    "tss8-455b",
    "mgue-vbsx",
    "urhh-yf63",
    "i7pm-cnmm",
    "4tmc-7r6g",
    "iy4q-t9vr",
]

### Download historic csvs from data portal by looping over codes

In [4]:
# for c in codes:
#     !wget 'https://data.lacity.org/api/views/{c}/rows.csv?accessType=DOWNLOAD' \
#     -P /Users/mhustiles/data/LAPD/

### Read the most recent year of calls

In [5]:
dtypes = {"Incident_Number": str, "Area_Occ": str, "Rpt_Dist": str}

la_current = pd.read_csv(
    "https://data.lacity.org/api/views/cibt-wiru/rows.csv?accessType=DOWNLOAD",
    dtype=dtypes,
    parse_dates=True,
)

### Read the files for past years

In [6]:
path = "/Users/mhustiles/data/LAPD/calls-for-service/"
files = glob.glob(os.path.join(path, "*.csv"))

In [7]:
file_df = (
    pd.read_csv(f, encoding="ISO-8859-1", low_memory=False).assign(
        year=os.path.basename(f)
    )
    for f in files
)

### Concatenate everything

In [8]:
la_past = pd.concat(file_df, ignore_index=True)

In [9]:
df = pd.concat([la_current, la_past]).drop(["year"], axis=1)

### Clean up column names and dates/times

In [10]:
df.rename(
    columns={
        "Incident_Number": "incident_id",
        "Area_Occ": "area",
        "Rpt_Dist": "district",
        "Dispatch_Date": "datetime",
        "Dispatch_Time": "time",
        "Call_Type_Code": "type_code",
        "Call_Type_Text": "type_description",
    },
    inplace=True,
)

In [11]:
df["date"] = df["datetime"].str.replace(" 12:00:00 AM", "", regex=False)

In [12]:
df["date"] = pd.to_datetime(df["date"], format="%m/%d/%Y")
df["time"] = pd.to_datetime(df["time"], format="%H:%M:%S")

In [13]:
df["year"] = df["date"].dt.year
df["date"] = df["date"].dt.date
df["time"] = df["time"].dt.time

In [14]:
df.drop(["datetime"], axis=1, inplace=True)

### Get rid of calls 'outside' LAPD jurisdiction

In [15]:
df = df[df["area"] != "Outside"]

### How many records? 

In [16]:
len(df)

9871273

In [17]:
df.head()

Unnamed: 0,incident_id,area,district,time,type_code,type_description,date,year
0,PD21051600004112,77th Street,1205,21:00:37,9212,TRESPASS SUSP,2021-05-16,2021
1,PD21021200001741,West LA,829,10:57:16,906B1,CODE 30 RINGER,2021-02-12,2021
2,PD21040200002194,Hollywood,642,13:24:20,459I,INVEST,2021-04-02,2021
3,PD21042300002656,Mission,1956,14:49:35,006,CODE 6,2021-04-23,2021
4,PD21040500001095,Hollenbeck,448,08:11:59,459I,INVEST,2021-04-05,2021


In [21]:
today = pd.to_datetime("today")
start = pd.to_datetime("2021-01-01")
elapsed = (today - start).days

In [22]:
def per_day(row):
    if row["year"] <= 2020:
        return (row["all_calls_count"] / 365.25).round(0)
    else:
        return (row["all_calls_count"] / elapsed).round(0)

In [23]:
df_grp = (df.groupby(["year"]).agg({"incident_id": "count"}).reset_index()).rename(
    columns={"incident_id": "all_calls_count"}
)

In [24]:
df_grp["call_per_day"] = df_grp.apply(per_day, axis=1)

In [25]:
df_grp

Unnamed: 0,year,all_calls_count,call_per_day
0,2012,923246,2528.0
1,2013,901401,2468.0
2,2014,932781,2554.0
3,2015,961870,2633.0
4,2016,1016874,2784.0
5,2017,1048989,2872.0
6,2018,1114791,3052.0
7,2019,1095430,2999.0
8,2020,1084550,2969.0
9,2021,791341,2757.0


---

### Domestic violence cases

In [67]:
domestic = df[df["type_description"].str.contains("DOM VIOL")]
# domestic = df[df["type_code"] == "242D"]

In [68]:
len(domestic)

433911

In [69]:
dom_grp = (
    domestic.groupby(["year"]).agg({"incident_id": "count"}).reset_index()
).rename(columns={"incident_id": "dom_count"})

In [70]:
def per_day(row):
    if row["year"] <= 2020:
        return (row["dom_count"] / 365.25).round(0)
    else:
        return (row["dom_count"] / elapsed).round(0)

In [71]:
dom_grp["dom_calls_per_day"] = dom_grp.apply(per_day, axis=1)

In [72]:
dom_grp

Unnamed: 0,year,dom_count,dom_calls_per_day
0,2012,46709,128.0
1,2013,46113,126.0
2,2014,46016,126.0
3,2015,46478,127.0
4,2016,46647,128.0
5,2017,43866,120.0
6,2018,42654,117.0
7,2019,41121,113.0
8,2020,42528,116.0
9,2021,31779,111.0


In [73]:
merged = pd.merge(df_grp, dom_grp, on="year")

In [74]:
merged["dom_count_rate_all_calls"] = (
    (merged["dom_count"] / merged["all_calls_count"]) * 100
).round(2)

In [75]:
merged["year"] = merged["year"].astype(str)

In [76]:
merged

Unnamed: 0,year,all_calls_count,call_per_day,dom_count,dom_calls_per_day,dom_count_rate_all_calls
0,2012,923246,2528.0,46709,128.0,5.06
1,2013,901401,2468.0,46113,126.0,5.12
2,2014,932781,2554.0,46016,126.0,4.93
3,2015,961870,2633.0,46478,127.0,4.83
4,2016,1016874,2784.0,46647,128.0,4.59
5,2017,1048989,2872.0,43866,120.0,4.18
6,2018,1114791,3052.0,42654,117.0,3.83
7,2019,1095430,2999.0,41121,113.0,3.75
8,2020,1084550,2969.0,42528,116.0,3.92
9,2021,791341,2757.0,31779,111.0,4.02


In [77]:
alt.Chart(merged).mark_bar().encode(x="year", y="dom_calls_per_day")

In [78]:
alt.Chart(merged).mark_bar().encode(x="year", y="dom_count_rate_all_calls")

### Domestic violence calls in 2020

In [79]:
domestic.head()

Unnamed: 0,incident_id,area,district,time,type_code,type_description,date,year
11,PD21031500000011,77th Street,1265,00:08:27,242D,DOM VIOL,2021-03-15,2021
129,PD21030800002343,77th Street,1268,13:22:25,620D,DOM VIOL,2021-03-08,2021
131,PD21012600002393,N Hollywood,1543,13:18:18,242D,DOM VIOL,2021-01-26,2021
146,PD21031600000983,Olympic,2044,08:12:05,620D,DOM VIOL,2021-03-16,2021
156,PD21010100000337,Newton,1371,01:01:15,245DS,DOM VIOL SUSP,2021-01-01,2021


In [80]:
dom2020 = domestic[domestic["year"] == 2020]

In [81]:
dom_grp_2020 = (
    dom2020.groupby(["date"]).agg({"incident_id": "count"}).reset_index()
).rename(columns={"incident_id": "dom_count"})

In [82]:
dom_grp_2020["date"] = pd.to_datetime(dom_grp_2020["date"])

In [83]:
alt.Chart(dom_grp_2020).mark_line().encode(x="date:T", y="dom_count")

In [84]:
dom_grp_2020.sort_values("dom_count", ascending=False).head(20)

Unnamed: 0,date,dom_count
361,2020-12-27,896
365,2020-12-31,707
364,2020-12-30,693
362,2020-12-28,560
363,2020-12-29,553
0,2020-01-01,189
193,2020-07-12,158
234,2020-08-22,153
256,2020-09-13,144
250,2020-09-07,144


### Export

In [85]:
domestic.to_csv(
    "output/domestic_violence_calls_2017-2021.csv",
    index=None,
)

In [86]:
dom_grp.to_csv(
    "output/domestic_violence_calls_2012-2021_grouped.csv",
    index=None,
)

In [87]:
merged.to_csv(
    "output/calls_2012-2021_grouped.csv",
    index=None,
)