In [2]:
# Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt

## Load Data

In [3]:
# Get datasets using Los Angeles County API
inspections_dataset = pd.read_csv("./Resources/LOS_ANGELES_COUNTY_RESTAURANT_AND_MARKET_INSPECTIONS.csv")
violations_dataset = pd.read_csv("./Resources/LOS_ANGELES_COUNTY_RESTAURANT_AND_MARKET_VIOLATIONS.csv")

In [14]:
# Create Inspections DataFrame
inspections_df = pd.DataFrame(inspections_dataset)
inspections_df.head()

Unnamed: 0,:@computed_region_cbw7_skn5,:@computed_region_gj26_y8x3,:@computed_region_pft9_733t,:@computed_region_x8wy_s94z,activity_date,employee_id,facility_address,facility_city,facility_id,facility_name,...,owner_name,pe_description,program_element_pe,program_name,program_status,record_id,score,serial_number,service_code,service_description
0,5.0,295.0,1629.0,20146.0,2018-09-10T00:00:00.000,EE0000495,203 W OLIVE AVE # C,BURBANK,FA0242046,SERVERY- NICKELODEON,...,"GUCKENHEIMER SERVICES, LLC.",RESTAURANT (31-60) SEATS HIGH RISK,1635,SERVERY- NICKELODEON,ACTIVE,PR0190194,96,DARRFUZBW,1,ROUTINE INSPECTION
1,2.0,439.0,846.0,23668.0,2018-07-19T00:00:00.000,EE0001130,1030 W MARTIN LUTHER KING JR BLVD STE 108,LOS ANGELES,FA0252769,TOMS JR BURGERS,...,ANASTACIOS POLITIS,RESTAURANT (0-30) SEATS HIGH RISK,1632,TOMS JR BURGERS,ACTIVE,PR0202127,98,DA0XQVMTN,1,ROUTINE INSPECTION
2,1.0,105.0,2226.0,10148.0,2018-08-15T00:00:00.000,EE0000500,1515 E AMAR RD,WEST COVINA,FA0011237,DJ BIBINGKAHAN,...,DJ BIBINGKAHAN CORPORATION,RESTAURANT (0-30) SEATS MODERATE RISK,1631,DJ BIBINGKAHAN BAKESHOP,ACTIVE,PR0035416,98,DAMPOJNY8,1,ROUTINE INSPECTION
3,3.0,395.0,1571.0,19730.0,2018-07-16T00:00:00.000,EE0000045,13560 ROSCOE BLVD,PANORAMA CITY,FA0061073,DOROSE LIQUOR,...,KULWINDER KAUR,"FOOD MKT RETAIL (1-1,999 SF) LOW RISK",1610,DOROSE LIQUOR,ACTIVE,PR0027907,91,DAUTU3DPD,1,ROUTINE INSPECTION
4,3.0,1.0,360.0,4276.0,2018-09-07T00:00:00.000,EE0000526,29002 AGOURA RD,AGOURA HILLS,FA0252595,MEJICO GRILL AND TEQUILLA LOUNGE,...,JUAN C OROZCO,RESTAURANT (151 + ) SEATS HIGH RISK,1641,MEJICO GRILL AND TEQUILLA LOUNGE,ACTIVE,PR0201914,90,DAUEU4NGF,1,ROUTINE INSPECTION


In [4]:
# Create Violations DataFrame
violations_df = pd.DataFrame(violations_dataset)
violations_df.head()

Unnamed: 0,points,serial_number,violation_code,violation_description,violation_status
0,2,DA000211Z,F006,# 06. Adequate handwashing facilities supplied...,OUT OF COMPLIANCE
1,1,DA000211Z,F044,"# 44. Floors, walls and ceilings: properly bui...",OUT OF COMPLIANCE
2,2,DA000211Z,F014,# 14. Food contact surfaces: clean and sanitized,OUT OF COMPLIANCE
3,1,DA000211Z,F029,"# 29. Toxic substances properly identified, st...",OUT OF COMPLIANCE
4,1,DA000211Z,F035,# 35. Equipment/Utensils - approved; installed...,OUT OF COMPLIANCE


## Inspect the Data - Perform Exploratory Data Analysis

In [5]:
# Get the column names in each DataFrame
print(inspections_df.columns)
print(violations_df.columns)

Index([':@computed_region_cbw7_skn5', ':@computed_region_gj26_y8x3',
       ':@computed_region_pft9_733t', ':@computed_region_x8wy_s94z',
       'activity_date', 'employee_id', 'facility_address', 'facility_city',
       'facility_id', 'facility_name', 'facility_state', 'facility_zip',
       'geocoded_column', 'grade', 'owner_id', 'owner_name', 'pe_description',
       'program_element_pe', 'program_name', 'program_status', 'record_id',
       'score', 'serial_number', 'service_code', 'service_description'],
      dtype='object')
Index(['points', 'serial_number', 'violation_code', 'violation_description',
       'violation_status'],
      dtype='object')


In [6]:
# Get the data types for each column in Inspections DataFrame
inspections_df.dtypes

:@computed_region_cbw7_skn5    float64
:@computed_region_gj26_y8x3    float64
:@computed_region_pft9_733t    float64
:@computed_region_x8wy_s94z    float64
activity_date                   object
employee_id                     object
facility_address                object
facility_city                   object
facility_id                     object
facility_name                   object
facility_state                  object
facility_zip                    object
geocoded_column                 object
grade                           object
owner_id                        object
owner_name                      object
pe_description                  object
program_element_pe               int64
program_name                    object
program_status                  object
record_id                       object
score                            int64
serial_number                   object
service_code                     int64
service_description             object
dtype: object

In [7]:
# Get the data types for each column in Violations DataFrame
violations_df.dtypes

points                    int64
serial_number            object
violation_code           object
violation_description    object
violation_status         object
dtype: object

In [8]:
# Check for null values in Inspections DataFrame
inspections_df.isnull().sum()

:@computed_region_cbw7_skn5    32
:@computed_region_gj26_y8x3    32
:@computed_region_pft9_733t    32
:@computed_region_x8wy_s94z    32
activity_date                   0
employee_id                     0
facility_address                0
facility_city                   0
facility_id                     0
facility_name                   0
facility_state                  0
facility_zip                    0
geocoded_column                32
grade                           0
owner_id                        0
owner_name                      0
pe_description                  0
program_element_pe              0
program_name                    0
program_status                  0
record_id                       0
score                           0
serial_number                   0
service_code                    0
service_description             0
dtype: int64

In [9]:
# Check for null values in Violations DataFrame
violations_df.isnull().sum()

points                   0
serial_number            0
violation_code           0
violation_description    0
violation_status         0
dtype: int64

In [10]:
# Drop columns from Inspections DataFrame
inspections_df = inspections_df.drop(columns = [":@computed_region_cbw7_skn5", 
                                                ":@computed_region_pft9_733t", 
                                                ":@computed_region_gj26_y8x3", 
                                                ":@computed_region_x8wy_s94z",])
inspections_df

Unnamed: 0,activity_date,employee_id,facility_address,facility_city,facility_id,facility_name,facility_state,facility_zip,geocoded_column,grade,...,owner_name,pe_description,program_element_pe,program_name,program_status,record_id,score,serial_number,service_code,service_description
0,2018-09-10T00:00:00.000,EE0000495,203 W OLIVE AVE # C,BURBANK,FA0242046,SERVERY- NICKELODEON,CA,91502,"{'type': 'Point', 'coordinates': [-118.314661,...",A,...,"GUCKENHEIMER SERVICES, LLC.",RESTAURANT (31-60) SEATS HIGH RISK,1635,SERVERY- NICKELODEON,ACTIVE,PR0190194,96,DARRFUZBW,1,ROUTINE INSPECTION
1,2018-07-19T00:00:00.000,EE0001130,1030 W MARTIN LUTHER KING JR BLVD STE 108,LOS ANGELES,FA0252769,TOMS JR BURGERS,CA,90037-1867,"{'type': 'Point', 'coordinates': [-118.292543,...",A,...,ANASTACIOS POLITIS,RESTAURANT (0-30) SEATS HIGH RISK,1632,TOMS JR BURGERS,ACTIVE,PR0202127,98,DA0XQVMTN,1,ROUTINE INSPECTION
2,2018-08-15T00:00:00.000,EE0000500,1515 E AMAR RD,WEST COVINA,FA0011237,DJ BIBINGKAHAN,CA,91792,"{'type': 'Point', 'coordinates': [-117.913926,...",A,...,DJ BIBINGKAHAN CORPORATION,RESTAURANT (0-30) SEATS MODERATE RISK,1631,DJ BIBINGKAHAN BAKESHOP,ACTIVE,PR0035416,98,DAMPOJNY8,1,ROUTINE INSPECTION
3,2018-07-16T00:00:00.000,EE0000045,13560 ROSCOE BLVD,PANORAMA CITY,FA0061073,DOROSE LIQUOR,CA,91402,"{'type': 'Point', 'coordinates': [-118.428399,...",A,...,KULWINDER KAUR,"FOOD MKT RETAIL (1-1,999 SF) LOW RISK",1610,DOROSE LIQUOR,ACTIVE,PR0027907,91,DAUTU3DPD,1,ROUTINE INSPECTION
4,2018-09-07T00:00:00.000,EE0000526,29002 AGOURA RD,AGOURA HILLS,FA0252595,MEJICO GRILL AND TEQUILLA LOUNGE,CA,91301,"{'type': 'Point', 'coordinates': [-118.756808,...",A,...,JUAN C OROZCO,RESTAURANT (151 + ) SEATS HIGH RISK,1641,MEJICO GRILL AND TEQUILLA LOUNGE,ACTIVE,PR0201914,90,DAUEU4NGF,1,ROUTINE INSPECTION
5,2018-09-18T00:00:00.000,EE0000437,5725 FLORENCE AVE,BELL GARDENS,FA0158101,MCDONALD'S #10681,CA,90201,"{'type': 'Point', 'coordinates': [-118.163665,...",A,...,REDONDO ENTERPRISES LLC,RESTAURANT (61-150) SEATS MODERATE RISK,1637,MCDONALD'S #10681,ACTIVE,PR0146191,91,DARQIUA45,1,ROUTINE INSPECTION
6,2018-09-18T00:00:00.000,EE0001121,11540 HAWTHORNE BLVD STE A,HAWTHORNE,FA0264511,HULI HULI HAWAIIAN GRILL,CA,90250,"{'type': 'Point', 'coordinates': [-118.352484,...",A,...,LH HAWAII BBQ INC.,RESTAURANT (0-30) SEATS HIGH RISK,1632,HULI HULI HAWAIIAN GRILL,ACTIVE,PR0215618,90,DA0JE0XXO,1,ROUTINE INSPECTION
7,2018-07-17T00:00:00.000,EE0000399,6032 RESEDA BLVD,TARZANA,FA0011473,BURRITOS EL CHAVO,CA,91356,"{'type': 'Point', 'coordinates': [-118.536014,...",A,...,"OCHOA,SALVADOR",RESTAURANT (31-60) SEATS HIGH RISK,1635,BURRITOS EL CHAVO,ACTIVE,PR0005803,96,DAGKKNDCV,1,ROUTINE INSPECTION
8,2018-07-12T00:00:00.000,EE0000946,2530 GLENDALE BLVD,LOS ANGELES,FA0017398,CVS/PHARMACY,CA,90039,"{'type': 'Point', 'coordinates': [-118.259145,...",A,...,"GARFIELD BEACH CVS, LLC.","FOOD MKT RETAIL (2,000+ SF) LOW RISK",1613,CVS/PHARMACY,ACTIVE,PR0027636,93,DACOHF2H4,1,ROUTINE INSPECTION
9,2018-08-24T00:00:00.000,EE0001002,9755 CULVER BLVD,CULVER CITY,FA0138817,CAFE VIDA,CA,90232,"{'type': 'Point', 'coordinates': [-118.396347,...",A,...,"CCM CREATIONS, INC.",RESTAURANT (31-60) SEATS HIGH RISK,1635,CAFE VIDA,ACTIVE,PR0123185,91,DACIDHL0U,401,OWNER INITIATED ROUTINE INSPECT.


In [12]:
# Drop geocoded_column from Inspections DataFrame becaus it is hashed.
inspections_df = inspections_df.drop(columns = ["geocoded_column"])
inspections_df

Unnamed: 0,activity_date,employee_id,facility_address,facility_city,facility_id,facility_name,facility_state,facility_zip,grade,owner_id,owner_name,pe_description,program_element_pe,program_name,program_status,record_id,score,serial_number,service_code,service_description
0,2018-09-10T00:00:00.000,EE0000495,203 W OLIVE AVE # C,BURBANK,FA0242046,SERVERY- NICKELODEON,CA,91502,A,OW0105348,"GUCKENHEIMER SERVICES, LLC.",RESTAURANT (31-60) SEATS HIGH RISK,1635,SERVERY- NICKELODEON,ACTIVE,PR0190194,96,DARRFUZBW,1,ROUTINE INSPECTION
1,2018-07-19T00:00:00.000,EE0001130,1030 W MARTIN LUTHER KING JR BLVD STE 108,LOS ANGELES,FA0252769,TOMS JR BURGERS,CA,90037-1867,A,OW0246461,ANASTACIOS POLITIS,RESTAURANT (0-30) SEATS HIGH RISK,1632,TOMS JR BURGERS,ACTIVE,PR0202127,98,DA0XQVMTN,1,ROUTINE INSPECTION
2,2018-08-15T00:00:00.000,EE0000500,1515 E AMAR RD,WEST COVINA,FA0011237,DJ BIBINGKAHAN,CA,91792,A,OW0010130,DJ BIBINGKAHAN CORPORATION,RESTAURANT (0-30) SEATS MODERATE RISK,1631,DJ BIBINGKAHAN BAKESHOP,ACTIVE,PR0035416,98,DAMPOJNY8,1,ROUTINE INSPECTION
3,2018-07-16T00:00:00.000,EE0000045,13560 ROSCOE BLVD,PANORAMA CITY,FA0061073,DOROSE LIQUOR,CA,91402,A,OW0020051,KULWINDER KAUR,"FOOD MKT RETAIL (1-1,999 SF) LOW RISK",1610,DOROSE LIQUOR,ACTIVE,PR0027907,91,DAUTU3DPD,1,ROUTINE INSPECTION
4,2018-09-07T00:00:00.000,EE0000526,29002 AGOURA RD,AGOURA HILLS,FA0252595,MEJICO GRILL AND TEQUILLA LOUNGE,CA,91301,A,OW0246329,JUAN C OROZCO,RESTAURANT (151 + ) SEATS HIGH RISK,1641,MEJICO GRILL AND TEQUILLA LOUNGE,ACTIVE,PR0201914,90,DAUEU4NGF,1,ROUTINE INSPECTION
5,2018-09-18T00:00:00.000,EE0000437,5725 FLORENCE AVE,BELL GARDENS,FA0158101,MCDONALD'S #10681,CA,90201,A,OW0123199,REDONDO ENTERPRISES LLC,RESTAURANT (61-150) SEATS MODERATE RISK,1637,MCDONALD'S #10681,ACTIVE,PR0146191,91,DARQIUA45,1,ROUTINE INSPECTION
6,2018-09-18T00:00:00.000,EE0001121,11540 HAWTHORNE BLVD STE A,HAWTHORNE,FA0264511,HULI HULI HAWAIIAN GRILL,CA,90250,A,OW0255633,LH HAWAII BBQ INC.,RESTAURANT (0-30) SEATS HIGH RISK,1632,HULI HULI HAWAIIAN GRILL,ACTIVE,PR0215618,90,DA0JE0XXO,1,ROUTINE INSPECTION
7,2018-07-17T00:00:00.000,EE0000399,6032 RESEDA BLVD,TARZANA,FA0011473,BURRITOS EL CHAVO,CA,91356,A,OW0026553,"OCHOA,SALVADOR",RESTAURANT (31-60) SEATS HIGH RISK,1635,BURRITOS EL CHAVO,ACTIVE,PR0005803,96,DAGKKNDCV,1,ROUTINE INSPECTION
8,2018-07-12T00:00:00.000,EE0000946,2530 GLENDALE BLVD,LOS ANGELES,FA0017398,CVS/PHARMACY,CA,90039,A,OW0012926,"GARFIELD BEACH CVS, LLC.","FOOD MKT RETAIL (2,000+ SF) LOW RISK",1613,CVS/PHARMACY,ACTIVE,PR0027636,93,DACOHF2H4,1,ROUTINE INSPECTION
9,2018-08-24T00:00:00.000,EE0001002,9755 CULVER BLVD,CULVER CITY,FA0138817,CAFE VIDA,CA,90232,A,OW0102114,"CCM CREATIONS, INC.",RESTAURANT (31-60) SEATS HIGH RISK,1635,CAFE VIDA,ACTIVE,PR0123185,91,DACIDHL0U,401,OWNER INITIATED ROUTINE INSPECT.


In [13]:
# Check the number of unique values in each column
inspections_df.nunique()

activity_date            56
employee_id             227
facility_address        989
facility_city           114
facility_id             992
facility_name           959
facility_state            1
facility_zip            303
grade                     3
owner_id                902
owner_name              900
pe_description           19
program_element_pe       18
program_name            963
program_status            2
record_id               999
score                    23
serial_number          1000
service_code              2
service_description       2
dtype: int64

In [None]:
# Cast XXXXX column from object to integer
#df.<column_name> = df.<column_name>.astype('int64')

In [None]:
# Get dataset of health dataset using Los Angeles County API
community_df = pd.read_csv("./Resources/LOS_ANGELES_COUNTY_RESTAURANT_AND_MARKET_INSPECTIONS.csv")

In [None]:
# Create Inspections DataFrame
inspections_df = pd.DataFrame(inspections_dataset)
inspections_df.head()