# imports

In [1]:
import requests
import pandas as pd

# acquire data

In [2]:
api_url = "https://opendata.maryland.gov/api/id/crti-ybyp.json?$select=*&$order=`:id`+ASC&$limit=3000&$offset=0 "
r = requests.get(api_url)
data = r.json()
df = pd.DataFrame(data)
df.head()

Unnamed: 0,sno,complaint,complaint_description,complaint_type,recieved_date,incident_date,county,incident_closed_date,incident_status_desc,incident_zip
0,1,232264,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,
1,2,232263,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,
2,3,232262,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,
3,4,232261,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,
4,5,232260,concern of loud noise in the Cheverly area,Other,2024-03-06,2024-03-06,Prince George's,2024-03-08,Incident Closed-Managed,


# clean data

In [3]:
backup_df = df.copy()

In [4]:
df.dtypes

sno                      object
complaint                object
complaint_description    object
complaint_type           object
recieved_date            object
incident_date            object
county                   object
incident_closed_date     object
incident_status_desc     object
incident_zip             object
dtype: object

In [5]:
df['recieved_date'] = pd.to_datetime(df['recieved_date'])
df['incident_date'] = pd.to_datetime(df['incident_date'])
df['incident_closed_date'] = pd.to_datetime(df['incident_closed_date'])

In [8]:
df.tail()

Unnamed: 0,sno,complaint,complaint_description,complaint_type,recieved_date,incident_date,county,incident_closed_date,incident_status_desc,incident_zip
1514,1515,8058,Boat company sandblasting boats and blowing wh...,Fugitive Dust/Particulate Matter,2021-01-11,2021-01-11,Queen Anne's,2022-03-08,Incident Closed - No further action,
1515,1516,8057,Smoke and ash from neighbor's open burning in ...,Air,2021-01-07,2021-01-07,Anne Arundel,2021-01-07,Incident Closed-No Violation Observed,20751.0
1516,1517,8056,Smoke from neighbor's wood burning stove.,Air,2021-01-07,2021-01-04,Charles,NaT,Under Investigation,
1517,1518,8051,Concern of materials being burned in fireplace...,Air,2021-01-04,2020-12-28,Frederick,2021-01-15,Incident Closed-No Violation Observed,21770.0
1518,1519,8019,Spice odors coming from McCormicks,Odor,2021-01-09,2020-12-10,Baltimore,2020-12-11,Incident Closed - No further action,


# analyze data

In [11]:
year_ago_date = pd.Timestamp('2023-03-09')
last_year_df = df[df.recieved_date > year_ago_date].copy()
last_year_df['complaint_type'] = last_year_df['complaint_type'].str.split(', ')

In [19]:
# reports from each county
last_year_df.complaint_type.value_counts()

complaint_type
[Air]                                                                                 166
[Odor]                                                                                140
[Other]                                                                               121
[Fugitive Dust/Particulate Matter]                                                     35
[Smoke]                                                                                28
[Air, Fumes, Odor]                                                                     20
[Open Burning]                                                                         13
[Air, Odor]                                                                            11
[Fumes]                                                                                11
[Open Burning, Smoke]                                                                   7
[Air, Odor, Smoke]                                                                   

In [16]:
last_year_df.iloc[398].complaint_description

'Concerned with general poor ambient air quality in Maryland and would like to know what actions are being taken to improve air quality.'

In [29]:
complaints_by_type = last_year_df.explode('complaint_type')
last_year_by_type = complaints_by_type.complaint_type.value_counts()

In [30]:
last_year_df['county'].value_counts()

county
Baltimore City        101
Anne Arundel           85
Prince George's        83
Frederick              62
Baltimore              57
Montgomery             35
Cecil                  25
Dorchester             17
Harford                17
Howard                 15
Allegany               15
Washington             14
Charles                12
Carroll                 9
Wicomico                8
Not Yet Determined      7
Worcester               7
St. Mary's              6
Garrett                 5
Caroline                3
Queen Anne's            2
Somerset                2
Talbot                  1
Kent                    1
Statewide               1
Name: count, dtype: int64

In [41]:
# remove "not yet determined" and "statewide" issues
non_county_last_year_df = last_year_df[last_year_df.county.isin(['Not Yet Determined','Statewide'])].copy()
last_year_df_all = last_year_df.copy()
last_year_df = last_year_df[~last_year_df.county.isin(['Not Yet Determined','Statewide'])].copy()

In [39]:
last_year_by_county = last_year_df.county.value_counts()

# export data

In [43]:
last_year_by_type.to_csv('exported_data/type_frequency.csv')
last_year_by_county.to_csv('exported_data/county_frequency.csv')
non_county_last_year_df.to_csv('exported_data/invalid_counties.csv')
last_year_df.to_csv('exported_data/valid_counties.csv')
last_year_df_all.to_csv('exported_data/all_complaints.csv')