## Import Dependencies

In [1]:
# Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt

import datetime as dt

import sqlalchemy
from sqlalchemy import create_engine
import psycopg2
from config import db_password

## Load data to create 2 Dataframes: Inspections and Violations

In [2]:
# Get datasets using Los Angeles County API
inspections_dataset = pd.read_csv("./Resources/LOS_ANGELES_COUNTY_RESTAURANT_AND_MARKET_INSPECTIONS.csv")
violations_dataset = pd.read_csv("./Resources/LOS_ANGELES_COUNTY_RESTAURANT_AND_MARKET_VIOLATIONS.csv")

In [3]:
# Create Inspections DataFrame
inspections_df = pd.DataFrame(inspections_dataset)
inspections_df.head()

Unnamed: 0,ACTIVITY DATE,OWNER ID,OWNER NAME,FACILITY ID,FACILITY NAME,RECORD ID,PROGRAM NAME,PROGRAM STATUS,PROGRAM ELEMENT (PE),PE DESCRIPTION,...,FACILITY CITY,FACILITY STATE,FACILITY ZIP,SERVICE CODE,SERVICE DESCRIPTION,SCORE,GRADE,SERIAL NUMBER,EMPLOYEE ID,Location
0,09/10/2018,OW0105348,"GUCKENHEIMER SERVICES, LLC.",FA0242046,SERVERY- NICKELODEON,PR0190194,SERVERY- NICKELODEON,ACTIVE,1635,RESTAURANT (31-60) SEATS HIGH RISK,...,BURBANK,CA,91502,1,ROUTINE INSPECTION,96,A,DARRFUZBW,EE0000495,POINT (-118.314661 34.175253)
1,07/19/2018,OW0246461,ANASTACIOS POLITIS,FA0252769,TOMS JR BURGERS,PR0202127,TOMS JR BURGERS,ACTIVE,1632,RESTAURANT (0-30) SEATS HIGH RISK,...,LOS ANGELES,CA,90037-1867,1,ROUTINE INSPECTION,98,A,DA0XQVMTN,EE0001130,POINT (-118.292543 34.010859)
2,08/15/2018,OW0010130,DJ BIBINGKAHAN CORPORATION,FA0011237,DJ BIBINGKAHAN,PR0035416,DJ BIBINGKAHAN BAKESHOP,ACTIVE,1631,RESTAURANT (0-30) SEATS MODERATE RISK,...,WEST COVINA,CA,91792,1,ROUTINE INSPECTION,98,A,DAMPOJNY8,EE0000500,POINT (-117.913926 34.030964)
3,07/16/2018,OW0020051,KULWINDER KAUR,FA0061073,DOROSE LIQUOR,PR0027907,DOROSE LIQUOR,ACTIVE,1610,"FOOD MKT RETAIL (1-1,999 SF) LOW RISK",...,PANORAMA CITY,CA,91402,1,ROUTINE INSPECTION,91,A,DAUTU3DPD,EE0000045,POINT (-118.428399 34.221664)
4,09/07/2018,OW0246329,JUAN C OROZCO,FA0252595,MEJICO GRILL AND TEQUILLA LOUNGE,PR0201914,MEJICO GRILL AND TEQUILLA LOUNGE,ACTIVE,1641,RESTAURANT (151 + ) SEATS HIGH RISK,...,AGOURA HILLS,CA,91301,1,ROUTINE INSPECTION,90,A,DAUEU4NGF,EE0000526,POINT (-118.756808 34.143452)


In [4]:
# Create Violations DataFrame
violations_df = pd.DataFrame(violations_dataset)
violations_df.head()

Unnamed: 0,SERIAL NUMBER,VIOLATION STATUS,VIOLATION CODE,VIOLATION DESCRIPTION,POINTS
0,DA000211Z,OUT OF COMPLIANCE,F006,# 06. Adequate handwashing facilities supplied...,2.0
1,DA000211Z,OUT OF COMPLIANCE,F044,"# 44. Floors, walls and ceilings: properly bui...",1.0
2,DA000211Z,OUT OF COMPLIANCE,F014,# 14. Food contact surfaces: clean and sanitized,2.0
3,DA000211Z,OUT OF COMPLIANCE,F029,"# 29. Toxic substances properly identified, st...",1.0
4,DA000211Z,OUT OF COMPLIANCE,F035,# 35. Equipment/Utensils - approved; installed...,1.0


## Inspect the Data - Perform Exploratory Data Analysis

In [5]:
# Get the column names in each DataFrame
print(inspections_df.columns)
print(violations_df.columns)

Index(['ACTIVITY DATE', 'OWNER ID', 'OWNER NAME', 'FACILITY ID',
       'FACILITY NAME', 'RECORD ID', 'PROGRAM NAME', 'PROGRAM STATUS',
       'PROGRAM ELEMENT (PE)', 'PE DESCRIPTION', 'FACILITY ADDRESS',
       'FACILITY CITY', 'FACILITY STATE', 'FACILITY ZIP', 'SERVICE CODE',
       'SERVICE DESCRIPTION', 'SCORE', 'GRADE', 'SERIAL NUMBER', 'EMPLOYEE ID',
       'Location'],
      dtype='object')
Index(['SERIAL NUMBER', 'VIOLATION  STATUS', 'VIOLATION CODE',
       'VIOLATION DESCRIPTION', 'POINTS'],
      dtype='object')


In [6]:
# Get the data types for each column in Inspections DataFrame
inspections_df.dtypes

ACTIVITY DATE           object
OWNER ID                object
OWNER NAME              object
FACILITY ID             object
FACILITY NAME           object
RECORD ID               object
PROGRAM NAME            object
PROGRAM STATUS          object
PROGRAM ELEMENT (PE)     int64
PE DESCRIPTION          object
FACILITY ADDRESS        object
FACILITY CITY           object
FACILITY STATE          object
FACILITY ZIP            object
SERVICE CODE             int64
SERVICE DESCRIPTION     object
SCORE                    int64
GRADE                   object
SERIAL NUMBER           object
EMPLOYEE ID             object
Location                object
dtype: object

In [7]:
# Get the data types for each column in Violations DataFrame
violations_df.dtypes

SERIAL NUMBER             object
VIOLATION  STATUS         object
VIOLATION CODE            object
VIOLATION DESCRIPTION     object
POINTS                   float64
dtype: object

In [8]:
# Check for null values in Inspections DataFrame
inspections_df.isnull().sum()

ACTIVITY DATE              0
OWNER ID                   0
OWNER NAME                 0
FACILITY ID                0
FACILITY NAME              0
RECORD ID                  0
PROGRAM NAME               0
PROGRAM STATUS             0
PROGRAM ELEMENT (PE)       0
PE DESCRIPTION             0
FACILITY ADDRESS           0
FACILITY CITY              0
FACILITY STATE             0
FACILITY ZIP               0
SERVICE CODE               0
SERVICE DESCRIPTION        0
SCORE                      0
GRADE                     77
SERIAL NUMBER              0
EMPLOYEE ID                0
Location                7106
dtype: int64

In [9]:
# Check for null values in Violations DataFrame
violations_df.isnull().sum()

SERIAL NUMBER            0
VIOLATION  STATUS        0
VIOLATION CODE           0
VIOLATION DESCRIPTION    0
POINTS                   0
dtype: int64

In [12]:
# Drop geocoded_column from Inspections DataFrame because it is hashed so I can find
# out the count of unique values in each column.
inspections_df = inspections_df.drop(columns = ["Location"])
inspections_df

Unnamed: 0,ACTIVITY DATE,OWNER ID,OWNER NAME,FACILITY ID,FACILITY NAME,RECORD ID,PROGRAM NAME,PROGRAM STATUS,PROGRAM ELEMENT (PE),PE DESCRIPTION,FACILITY ADDRESS,FACILITY CITY,FACILITY STATE,FACILITY ZIP,SERVICE CODE,SERVICE DESCRIPTION,SCORE,GRADE,SERIAL NUMBER,EMPLOYEE ID
0,09/10/2018,OW0105348,"GUCKENHEIMER SERVICES, LLC.",FA0242046,SERVERY- NICKELODEON,PR0190194,SERVERY- NICKELODEON,ACTIVE,1635,RESTAURANT (31-60) SEATS HIGH RISK,203 W OLIVE AVE # C,BURBANK,CA,91502,1,ROUTINE INSPECTION,96,A,DARRFUZBW,EE0000495
1,07/19/2018,OW0246461,ANASTACIOS POLITIS,FA0252769,TOMS JR BURGERS,PR0202127,TOMS JR BURGERS,ACTIVE,1632,RESTAURANT (0-30) SEATS HIGH RISK,1030 W MARTIN LUTHER KING JR BLVD STE 108,LOS ANGELES,CA,90037-1867,1,ROUTINE INSPECTION,98,A,DA0XQVMTN,EE0001130
2,08/15/2018,OW0010130,DJ BIBINGKAHAN CORPORATION,FA0011237,DJ BIBINGKAHAN,PR0035416,DJ BIBINGKAHAN BAKESHOP,ACTIVE,1631,RESTAURANT (0-30) SEATS MODERATE RISK,1515 E AMAR RD,WEST COVINA,CA,91792,1,ROUTINE INSPECTION,98,A,DAMPOJNY8,EE0000500
3,07/16/2018,OW0020051,KULWINDER KAUR,FA0061073,DOROSE LIQUOR,PR0027907,DOROSE LIQUOR,ACTIVE,1610,"FOOD MKT RETAIL (1-1,999 SF) LOW RISK",13560 ROSCOE BLVD,PANORAMA CITY,CA,91402,1,ROUTINE INSPECTION,91,A,DAUTU3DPD,EE0000045
4,09/07/2018,OW0246329,JUAN C OROZCO,FA0252595,MEJICO GRILL AND TEQUILLA LOUNGE,PR0201914,MEJICO GRILL AND TEQUILLA LOUNGE,ACTIVE,1641,RESTAURANT (151 + ) SEATS HIGH RISK,29002 AGOURA RD,AGOURA HILLS,CA,91301,1,ROUTINE INSPECTION,90,A,DAUEU4NGF,EE0000526
5,09/18/2018,OW0123199,REDONDO ENTERPRISES LLC,FA0158101,MCDONALD'S #10681,PR0146191,MCDONALD'S #10681,ACTIVE,1637,RESTAURANT (61-150) SEATS MODERATE RISK,5725 FLORENCE AVE,BELL GARDENS,CA,90201,1,ROUTINE INSPECTION,91,A,DARQIUA45,EE0000437
6,09/18/2018,OW0255633,LH HAWAII BBQ INC.,FA0264511,HULI HULI HAWAIIAN GRILL,PR0215618,HULI HULI HAWAIIAN GRILL,ACTIVE,1632,RESTAURANT (0-30) SEATS HIGH RISK,11540 HAWTHORNE BLVD STE A,HAWTHORNE,CA,90250,1,ROUTINE INSPECTION,90,A,DA0JE0XXO,EE0001121
7,07/17/2018,OW0026553,"OCHOA,SALVADOR",FA0011473,BURRITOS EL CHAVO,PR0005803,BURRITOS EL CHAVO,ACTIVE,1635,RESTAURANT (31-60) SEATS HIGH RISK,6032 RESEDA BLVD,TARZANA,CA,91356,1,ROUTINE INSPECTION,96,A,DAGKKNDCV,EE0000399
8,07/12/2018,OW0012926,"GARFIELD BEACH CVS, LLC.",FA0017398,CVS/PHARMACY,PR0027636,CVS/PHARMACY,ACTIVE,1613,"FOOD MKT RETAIL (2,000+ SF) LOW RISK",2530 GLENDALE BLVD,LOS ANGELES,CA,90039,1,ROUTINE INSPECTION,93,A,DACOHF2H4,EE0000946
9,08/24/2018,OW0102114,"CCM CREATIONS, INC.",FA0138817,CAFE VIDA,PR0123185,CAFE VIDA,ACTIVE,1635,RESTAURANT (31-60) SEATS HIGH RISK,9755 CULVER BLVD,CULVER CITY,CA,90232,401,OWNER INITIATED ROUTINE INSPECT.,91,A,DACIDHL0U,EE0001002


In [13]:
# Check the number of unique values in each column
inspections_df.nunique()

ACTIVITY DATE              765
OWNER ID                 36246
OWNER NAME               35310
FACILITY ID              44848
FACILITY NAME            37110
RECORD ID                48581
PROGRAM NAME             40025
PROGRAM STATUS               2
PROGRAM ELEMENT (PE)        18
PE DESCRIPTION              34
FACILITY ADDRESS         39887
FACILITY CITY              177
FACILITY STATE               3
FACILITY ZIP              3683
SERVICE CODE                 2
SERVICE DESCRIPTION          2
SCORE                       47
GRADE                        3
SERIAL NUMBER           210427
EMPLOYEE ID                266
dtype: int64

In [None]:
# Cast XXXXX column from object to integer
#df.<column_name> = df.<column_name>.astype('int64')

## Load data and create another DataFrame, Community Health

In [16]:
# Get dataset of health dataset using Los Angeles County API and create a DataFrame.
community_df = pd.read_csv("./Resources/Los_Angeles_County_City_and_Community_Health_Profiles_2018.csv")
community_df

Unnamed: 0,GEONAME,Pop_Tot,Prop_18y,Prop_64y,Prop_65y+,Prop_Blk,Prop_Lat,Prop_Whi,Prop_Asi,Prop_Ami,...,Rte_mein,Rte_cein,Rte_luin,Prop_fru,Prop_bev,Prop_hyp,Prop_marj,Prop_HI,Prop_grad,Prop_trua
0,Alhambra,86705,0.1831,0.6504,0.1665,0.0133,0.3435,0.0913,0.5498,0.0014,...,**,**,34.6,0.2,0.2,0.2,0.1,0.0,1.0,0.1
1,Altadena,42525,0.2072,0.6221,0.1707,0.2374,0.2905,0.4129,0.0553,0.0019,...,33.8,**,35.3,0.1,0.3,0.3,0.2,0.1,0.9,0.4
2,Arcadia,56992,0.1794,0.6389,0.1817,0.0115,0.1250,0.2304,0.6315,0.0014,...,22.0,**,29.5,0.1,0.2,0.2,0.0,0.0,1.0,0.1
3,Azusa,49479,0.2507,0.6538,0.0956,0.0292,0.6838,0.1938,0.0895,0.0024,...,**,**,29.7,0.1,0.4,0.2,0.1,0.0,0.9,0.0
4,Baldwin Park,74438,0.2623,0.6330,0.1047,0.0099,0.7934,0.0432,0.1514,0.0012,...,**,**,26.5,0.1,0.4,0.3,0.1,0.0,1.0,0.4
5,Bell,36595,0.2893,0.6249,0.0857,0.0061,0.9286,0.0569,0.0063,0.0020,...,**,**,**,0.1,0.4,0.2,0.1,0.0,0.9,0.4
6,Bell Gardens,42817,0.3026,0.6274,0.0700,0.0050,0.9584,0.0280,0.0055,0.0022,...,**,**,**,0.1,0.4,0.2,0.1,0.0,0.9,0.2
7,Bellflower,76057,0.2529,0.6383,0.1088,0.1507,0.5185,0.1939,0.1258,0.0031,...,14.7,**,39.3,0.2,0.5,0.4,0.1,0.0,1.0,0.1
8,Beverly Hills,34652,0.1574,0.6406,0.2021,0.0209,0.0591,0.8212,0.0976,0.0009,...,51.2,**,34.5,0.2,0.1,0.4,0.2,0.0,1.0,0.2
9,Burbank,104692,0.1820,0.6683,0.1497,0.0249,0.2551,0.5918,0.1256,0.0018,...,20.3,**,42.7,0.2,0.3,0.2,0.2,0.0,1.0,0.2


## Connect to SQL database


In [None]:
# Prepare the database file to be connected to
#engine = create_engine("<name>")

In [None]:

#from config import db_password