In [26]:
import pandas as pd
import requests

# fetch data from the API endpoint with a limit of 1000 rows
url = "https://data.cityofchicago.org/resource/4ijn-s7e5.csv?$limit=1000&$offset=0"
response = requests.get(url)

# load data into a pandas DataFrame from the response content
data = pd.read_csv(url)


In [27]:
data.sample(1)


Unnamed: 0,inspection_id,dba_name,aka_name,license_,facility_type,risk,address,city,state,zip,inspection_date,inspection_type,results,violations,latitude,longitude,location
55,2615967,LAKE EFFECT BREWING COMPANY,LAKE EFFECT BREWING COMPANY,2933983,Liquor,Risk 3 (Low),3074 N MILWAUKEE AVE,CHICAGO,IL,60618,2025-04-17T00:00:00.000,License,Pass,,41.936917,-87.72051,"\n, \n(41.93691659006879, -87.72050967194036)"


In [28]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   inspection_id    1000 non-null   int64  
 1   dba_name         1000 non-null   object 
 2   aka_name         996 non-null    object 
 3   license_         1000 non-null   int64  
 4   facility_type    981 non-null    object 
 5   risk             1000 non-null   object 
 6   address          1000 non-null   object 
 7   city             1000 non-null   object 
 8   state            1000 non-null   object 
 9   zip              1000 non-null   int64  
 10  inspection_date  1000 non-null   object 
 11  inspection_type  1000 non-null   object 
 12  results          1000 non-null   object 
 13  violations       690 non-null    object 
 14  latitude         992 non-null    float64
 15  longitude        992 non-null    float64
 16  location         992 non-null    object 
dtypes: float64(2), 

In [29]:
# fix city names
print("Unique cities:", data['city'].unique())
print("Unique states:", data['state'].unique())

Unique cities: ['CHICAGO' 'EVANSTON' 'BERWYN' 'CCHICAGO' 'Chicago']
Unique states: ['IL']


In [30]:
print("Unique types:", data['facility_type'].unique())


Unique types: ['Grocery Store' 'Restaurant' "Children's Services Facility" 'School'
 'AFTER SCHOOL PROGRAM' 'Shared Kitchen' 'Mobile Food Preparer' 'Liquor'
 'Catering' 'Daycare Above and Under 2 Years' 'HERBALIFE' 'Bakery' nan
 'Shared Kitchen User (Long Term)' 'Mobile Prepared Food Vendor'
 'Daycare (2 - 6 Years)' 'Long Term Care' 'Mobile Frozen Desserts Vendor'
 'DINING HALL' 'Hospital' 'Wholesale' 'MOVIE THEATER' 'Golden Diner'
 'CHURCH KITCHEN' 'REGULATED BUSINESS' 'SUPPORTIVE LIVING']


In [None]:
# function for facility types
# create a mapping for facility types
facility_mapping = {
    'AFTER SCHOOL PROGRAM': 'Child or Student Facilities',
    'School': 'Child or Student Facilities',
    "Children's Services Facility": 'Child or Student Facilities',
    'Daycare Above and Under 2 Years': 'Child or Student Facilities',
    'Daycare (2 - 6 Years)': 'Child or Student Facilities',
    'DINING HALL': 'Child or Student Facilities',
    'Mobile Food Preparer': 'Mobile',
    'Mobile Prepared Food Vendor': 'Mobile',
    'Mobile Frozen Desserts Vendor': 'Mobile',
    'Long Term Care': 'Elders Facilities',
    'SUPPORTIVE LIVING': 'Elders Facilities',
    'HERBALIFE': 'Supplemental Food',
    'Restaurant': 'Food Service',
    'Catering': 'Food Service',
    'Shared Kitchen': 'Food Service',
    'Shared Kitchen User (Long Term)': 'Food Service',
    'Bakery': 'Food Service',
    'Golden Diner': 'Food Service',
    'CHURCH KITCHEN': 'Food Service',
    'Grocery Store': 'Retail',
    'Liquor': 'Retail',
    'Wholesale': 'Retail',
    'Hospital': 'Institutional Businesses',
    'Movie Theater': 'Institutional Businesses',
    'REGULATED BUSINESS': 'Institutional Businesses'
}

In [None]:
# function to clean

def clean(df):
    df['inspection_date'] = pd.to_datetime(df['inspection_date']).dt.strftime('%Y-%m-%d')

    # fix city
    df['city'] = df['city'].str.replace('CCHICAGO', 'CHICAGO')
    df['city'] = df['city'].str.replace('Chicago', 'CHICAGO')


    df['facility_type'] = df['facility_type'].map(facility_mapping).fillna(df['facility_type'])

    df = df.drop(columns=['aka_name', 'license_', 'city', 'state', 'violations', 'location'])
    df = df[df['results'] != 'No Entry']
    df = df.dropna()
    return df


In [32]:
# test
data = clean(data)


In [37]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 919 entries, 0 to 999
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   inspection_id    919 non-null    int64  
 1   dba_name         919 non-null    object 
 2   facility_type    919 non-null    object 
 3   risk             919 non-null    object 
 4   address          919 non-null    object 
 5   zip              919 non-null    int64  
 6   inspection_date  919 non-null    object 
 7   inspection_type  919 non-null    object 
 8   results          919 non-null    object 
 9   latitude         919 non-null    float64
 10  longitude        919 non-null    float64
dtypes: float64(2), int64(2), object(7)
memory usage: 86.2+ KB


In [38]:
data.sample(5)

Unnamed: 0,inspection_id,dba_name,facility_type,risk,address,zip,inspection_date,inspection_type,results,latitude,longitude
204,2615618,CHI TOWN GROUP LLC,Retail,Risk 3 (Low),5002 N PULASKI RD,60630,2025-04-11,License,Pass,41.972031,-87.728282
935,2614555,BELLAS FUENTEZ,Food Service,Risk 1 (High),1725 W 47TH ST,60609,2025-03-26,Complaint,Pass,41.80846,-87.668548
228,2615643,RAZA WEST GYRO,Food Service,Risk 1 (High),754 S WESTERN AVE,60612,2025-04-11,Canvass,Pass,41.871363,-87.686317
455,2615257,TACO BELL #2513,Food Service,Risk 1 (High),6944 W ARCHER AVE,60638,2025-04-07,Canvass,Fail,41.792186,-87.795808
539,2615095,BARRACO'S PIZZA,Food Service,Risk 1 (High),3043-3047 W 111TH ST,60655,2025-04-03,Canvass,Pass,41.691495,-87.69778
