# LAPD calls for service

In [1]:
%load_ext lab_black

In [2]:
import json
import glob
import io
import os
import pandas as pd
import numpy as np
import altair as alt
import altair_latimes as lat

pd.options.display.max_columns = 50
pd.options.display.max_rows = 50

### URL codes for City of LA open data portal

In [3]:
codes = [
    "r4ka-x5je",
    "nayp-w2tw",
    "ryvm-a59m",
    "xwgr-xw5q",
    "tss8-455b",
    "mgue-vbsx",
    "urhh-yf63",
    "i7pm-cnmm",
    "4tmc-7r6g",
    "iy4q-t9vr",
]

### Download historic csvs from data portal by looping over codes

In [4]:
# for c in codes:
#     !wget 'https://data.lacity.org/api/views/{c}/rows.csv?accessType=DOWNLOAD' \
#     -P /Users/mhustiles/data/LAPD/

### Read the most recent year of calls

In [5]:
dtypes = {"Incident_Number": str, "Area_Occ": str, "Rpt_Dist": str}

la_current = pd.read_csv(
    "https://data.lacity.org/api/views/cibt-wiru/rows.csv?accessType=DOWNLOAD",
    dtype=dtypes,
    parse_dates=True,
)

### Read the files for past years

In [6]:
path = "/Users/mhustiles/data/LAPD/calls-for-service/"
files = glob.glob(os.path.join(path, "*.csv"))

In [7]:
file_df = (
    pd.read_csv(f, encoding="ISO-8859-1", low_memory=False).assign(
        year=os.path.basename(f)
    )
    for f in files
)

### Concatenate everything

In [8]:
la_past = pd.concat(file_df, ignore_index=True)

In [9]:
df = pd.concat([la_current, la_past]).drop(["year"], axis=1)

### Clean up column names and dates/times

In [10]:
df.rename(
    columns={
        "Incident_Number": "incident_id",
        "Area_Occ": "area",
        "Rpt_Dist": "district",
        "Dispatch_Date": "datetime",
        "Dispatch_Time": "time",
        "Call_Type_Code": "type_code",
        "Call_Type_Text": "type_description",
    },
    inplace=True,
)

In [11]:
df["date"] = df["datetime"].str.replace(" 12:00:00 AM", "", regex=False)

In [12]:
df["date"] = pd.to_datetime(df["date"], format="%m/%d/%Y")
df["time"] = pd.to_datetime(df["time"], format="%H:%M:%S")

In [13]:
df["year"] = df["date"].dt.year
df["date"] = df["date"].dt.date
df["time"] = df["time"].dt.time

### How many records? 

In [14]:
len(df)

11893788

In [15]:
df.drop(["datetime"], axis=1, inplace=True)

In [16]:
df.head()

Unnamed: 0,incident_id,area,district,time,type_code,type_description,date,year
0,PD21051600004112,77th Street,1205,21:00:37,9212,TRESPASS SUSP,2021-05-16,2021
1,PD21021200001741,West LA,829,10:57:16,906B1,CODE 30 RINGER,2021-02-12,2021
2,PD21040200002194,Hollywood,642,13:24:20,459I,INVEST,2021-04-02,2021
3,PD21042300002656,Mission,1956,14:49:35,006,CODE 6,2021-04-23,2021
4,PD21040500001095,Hollenbeck,448,08:11:59,459I,INVEST,2021-04-05,2021


---

### Domestic violence cases

In [17]:
domestic = df[(df["type_description"].str.contains("DOM VIOL")) & (df["year"] > 2016)]

In [18]:
len(domestic)

201356

In [19]:
domestic_grouped = (
    domestic.groupby(["year", "area"]).agg({"incident_id": "count"}).reset_index()
)

### Export

In [20]:
domestic.to_csv(
    "output/domestic_violence_calls_2017-2021.csv",
    index=None,
)

In [21]:
domestic_grouped.to_csv(
    "output/domestic_violence_calls_2017-2021_grouped.csv",
    index=None,
)