### Data cleaning accomplished in this notebook:
* Creating a new column from 'SEATS' into 'new_seats' with bins:
    * 0 = 0-30
    * 31 = 31-60
    * 61 = 61-150
    * 151 = 151+
    
* Modified "Activity Date" to include MONTH-YEAR and MONTH columns

In [23]:
# Import dependencies
import pandas as pd
import datetime

In [2]:
# Expand output display to view all
pd.set_option('display.max_columns', 999)

In [3]:
# Import datasets
dataset = pd.read_csv("./Resources/Clean/clean_new_rats.csv")

In [4]:
# Set up DataFrames
rats_df = pd.DataFrame(dataset)

In [5]:
# Delete 'unnamed' columns in DataFrames
del rats_df['Unnamed: 0']
del rats_df['Unnamed: 0.1']

In [6]:
# Display DF
rats_df.head()

Unnamed: 0,ACTIVITY_DATE,FACILITY_ID,FACILITY_NAME,PROGRAM_NAME,PROGRAM_STATUS,FACILITY_ADDRESS,FACILITY_CITY,FACILITY_STATE,FACILITY_ZIP,SCORE,GRADE,serial_number,SEATS,LAT,LNG,violation_status,violation_code,violation_description,points,GEONAME,Pop_Tot,Prop_18y,Prop_64y,Prop_65y+,Prop_Blk,Prop_Lat,Prop_Whi,Prop_Asi,Prop_Ami,Prop_NHO,Prop_FPL1,Prop_FPL2,Prop_forb,Prop_Eng,Prop_edLH,Prop_edHG,Prop_edSC,Prop_edCG,MHI,Prop_groc,Prop_60mi,No_farm,Prop_obse,Prop_DM,Rte_CVD,Prop_ownr,Prop_rentr,No_hless,Rte_crim,Rte_alco,Propt_envi,Prop_depr,Propt_HPI,Prop_fru,Prop_bev,Prop_hyp,Prop_marj
0,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F007,# 07. Proper hot and cold holding temperatures,4.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1
1,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F034,"# 34. Warewashing facilities: Adequate, mainta...",1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1
2,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F040,"# 40. Plumbing: Plumbing in good repair, prope...",1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1
3,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F037,# 37. Adequate ventilation and lighting; desig...,1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1
4,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F006,# 06. Adequate handwashing facilities supplied...,2.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1


In [7]:
# Display data types
rats_df.dtypes

ACTIVITY_DATE             object
FACILITY_ID               object
FACILITY_NAME             object
PROGRAM_NAME              object
PROGRAM_STATUS            object
FACILITY_ADDRESS          object
FACILITY_CITY             object
FACILITY_STATE            object
FACILITY_ZIP               int64
SCORE                      int64
GRADE                     object
serial_number             object
SEATS                     object
LAT                      float64
LNG                      float64
violation_status          object
violation_code            object
violation_description     object
points                   float64
GEONAME                   object
Pop_Tot                  float64
Prop_18y                 float64
Prop_64y                 float64
Prop_65y+                float64
Prop_Blk                 float64
Prop_Lat                 float64
Prop_Whi                 float64
Prop_Asi                 float64
Prop_Ami                 float64
Prop_NHO                 float64
Prop_FPL1 

### Creating a new column from 'SEATS': 'new_seats' 

In [8]:
# Get unique values in 'seats'
rats_df[["SEATS"]].nunique()

SEATS    4
dtype: int64

In [9]:
# What are the 4 unique values in 'seats'?
rats_df["SEATS"].value_counts()

0-30      350775
31-60     198613
61-150    153215
151 +      56227
Name: SEATS, dtype: int64

In [10]:
# Replace ' +' with '-'
seats = rats_df["SEATS"].str.replace(" +", "-")
seats

0         61-150
1         61-150
2         61-150
3         61-150
4         61-150
           ...  
758825      0-30
758826    151-+-
758827    151-+-
758828    151-+-
758829    151-+-
Name: SEATS, Length: 758830, dtype: object

In [11]:
# Apply .map() and lambda function to edit 'seats'
seats.map(lambda value: value.split("-"))

0          [61, 150]
1          [61, 150]
2          [61, 150]
3          [61, 150]
4          [61, 150]
             ...    
758825       [0, 30]
758826    [151, +, ]
758827    [151, +, ]
758828    [151, +, ]
758829    [151, +, ]
Name: SEATS, Length: 758830, dtype: object

In [16]:
# Lower end of the range, which can be used for the encoding
rev_seats = seats.map(lambda value: value.split("-")[0])
rev_seats

0          61
1          61
2          61
3          61
4          61
         ... 
758825      0
758826    151
758827    151
758828    151
758829    151
Name: SEATS, Length: 758830, dtype: object

In [17]:
# Change the data type of 'new_seats'
new_seats = rev_seats.astype(int)

In [18]:
# Add 'new_seats' to rats_df
rats_df["new_seats"] = new_seats
rats_df.head()

Unnamed: 0,ACTIVITY_DATE,FACILITY_ID,FACILITY_NAME,PROGRAM_NAME,PROGRAM_STATUS,FACILITY_ADDRESS,FACILITY_CITY,FACILITY_STATE,FACILITY_ZIP,SCORE,GRADE,serial_number,SEATS,LAT,LNG,violation_status,violation_code,violation_description,points,GEONAME,Pop_Tot,Prop_18y,Prop_64y,Prop_65y+,Prop_Blk,Prop_Lat,Prop_Whi,Prop_Asi,Prop_Ami,Prop_NHO,Prop_FPL1,Prop_FPL2,Prop_forb,Prop_Eng,Prop_edLH,Prop_edHG,Prop_edSC,Prop_edCG,MHI,Prop_groc,Prop_60mi,No_farm,Prop_obse,Prop_DM,Rte_CVD,Prop_ownr,Prop_rentr,No_hless,Rte_crim,Rte_alco,Propt_envi,Prop_depr,Propt_HPI,Prop_fru,Prop_bev,Prop_hyp,Prop_marj,new_seats
0,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F007,# 07. Proper hot and cold holding temperatures,4.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61
1,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F034,"# 34. Warewashing facilities: Adequate, mainta...",1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61
2,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F040,"# 40. Plumbing: Plumbing in good repair, prope...",1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61
3,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F037,# 37. Adequate ventilation and lighting; desig...,1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61
4,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,61-150,-118.358308,33.872838,OUT OF COMPLIANCE,F006,# 06. Adequate handwashing facilities supplied...,2.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61


NOTE: Keep in mind the bins of 'new_seats
1. 0 = 0-30
2. 31 = 31-60
3. 61 = 61-150
4. 151 = 151+


In [19]:
# Display data types
rats_df.dtypes

ACTIVITY_DATE             object
FACILITY_ID               object
FACILITY_NAME             object
PROGRAM_NAME              object
PROGRAM_STATUS            object
FACILITY_ADDRESS          object
FACILITY_CITY             object
FACILITY_STATE            object
FACILITY_ZIP               int64
SCORE                      int64
GRADE                     object
serial_number             object
SEATS                     object
LAT                      float64
LNG                      float64
violation_status          object
violation_code            object
violation_description     object
points                   float64
GEONAME                   object
Pop_Tot                  float64
Prop_18y                 float64
Prop_64y                 float64
Prop_65y+                float64
Prop_Blk                 float64
Prop_Lat                 float64
Prop_Whi                 float64
Prop_Asi                 float64
Prop_Ami                 float64
Prop_NHO                 float64
Prop_FPL1 

In [22]:
# Drop 'SEATS', 'FACILITY_STATE', 'FACILITY_ADDRESS'
#rats_df = rats_df.drop(columns = "SEATS", 
#                                 "FACILITY_ADDRESS",
#                                 "FACILITY_STATE", 
#                                 "GEONAME")
#rats_df.head()

Unnamed: 0,ACTIVITY_DATE,FACILITY_ID,FACILITY_NAME,PROGRAM_NAME,PROGRAM_STATUS,FACILITY_ADDRESS,FACILITY_CITY,FACILITY_STATE,FACILITY_ZIP,SCORE,GRADE,serial_number,LAT,LNG,violation_status,violation_code,violation_description,points,GEONAME,Pop_Tot,Prop_18y,Prop_64y,Prop_65y+,Prop_Blk,Prop_Lat,Prop_Whi,Prop_Asi,Prop_Ami,Prop_NHO,Prop_FPL1,Prop_FPL2,Prop_forb,Prop_Eng,Prop_edLH,Prop_edHG,Prop_edSC,Prop_edCG,MHI,Prop_groc,Prop_60mi,No_farm,Prop_obse,Prop_DM,Rte_CVD,Prop_ownr,Prop_rentr,No_hless,Rte_crim,Rte_alco,Propt_envi,Prop_depr,Propt_HPI,Prop_fru,Prop_bev,Prop_hyp,Prop_marj,new_seats
0,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F007,# 07. Proper hot and cold holding temperatures,4.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61
1,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F034,"# 34. Warewashing facilities: Adequate, mainta...",1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61
2,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F040,"# 40. Plumbing: Plumbing in good repair, prope...",1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61
3,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F037,# 37. Adequate ventilation and lighting; desig...,1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61
4,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F006,# 06. Adequate handwashing facilities supplied...,2.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.133,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.111,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61


### Creating two new columns from 'ACTIVITY_DATE':  'ACTIVITY_DATE_MONTHYEAR' and 'ACTIVITY_DATE_MONTH'

In [24]:
sSourceColumn = 'ACTIVITY_DATE' 
sListNewColumns = ['ACTIVITY_DATE_MONTHYEAR', 'ACTIVITY_DATE_MONTH']
sListMissing1 = []
sListMissing2 = []

for i, row in rats_df.iterrows():
    sInitialValue = row[sSourceColumn]
    
    # First format - month + year
    sNewValue = datetime.datetime.strptime(sInitialValue, '%Y-%m-%d').strftime('%Y-%m')
    rats_df.at[i, sListNewColumns[0]] = sNewValue
    sListMissing1.append(sNewValue) if sNewValue not in sListMissing1 else sListMissing1
    
    # Second format - month only
    sNewValue = datetime.datetime.strptime(sInitialValue, '%Y-%m-%d').strftime('%m')
    rats_df.at[i, sListNewColumns[1]] = sNewValue
    sListMissing2.append(sNewValue) if sNewValue not in sListMissing2 else sListMissing2
  
#del df['Unnamed: 0']
rats_df

Unnamed: 0,ACTIVITY_DATE,FACILITY_ID,FACILITY_NAME,PROGRAM_NAME,PROGRAM_STATUS,FACILITY_ADDRESS,FACILITY_CITY,FACILITY_STATE,FACILITY_ZIP,SCORE,GRADE,serial_number,LAT,LNG,violation_status,violation_code,violation_description,points,GEONAME,Pop_Tot,Prop_18y,Prop_64y,Prop_65y+,Prop_Blk,Prop_Lat,Prop_Whi,Prop_Asi,Prop_Ami,Prop_NHO,Prop_FPL1,Prop_FPL2,Prop_forb,Prop_Eng,Prop_edLH,Prop_edHG,Prop_edSC,Prop_edCG,MHI,Prop_groc,Prop_60mi,No_farm,Prop_obse,Prop_DM,Rte_CVD,Prop_ownr,Prop_rentr,No_hless,Rte_crim,Rte_alco,Propt_envi,Prop_depr,Propt_HPI,Prop_fru,Prop_bev,Prop_hyp,Prop_marj,new_seats,ACTIVITY_DATE_MONTHYEAR,ACTIVITY_DATE_MONTH
0,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F007,# 07. Proper hot and cold holding temperatures,4.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.1330,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.1110,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61,2020-02,02
1,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F034,"# 34. Warewashing facilities: Adequate, mainta...",1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.1330,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.1110,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61,2020-02,02
2,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F040,"# 40. Plumbing: Plumbing in good repair, prope...",1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.1330,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.1110,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61,2020-02,02
3,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F037,# 37. Adequate ventilation and lighting; desig...,1.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.1330,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.1110,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61,2020-02,02
4,2020-02-04,FA0240932,THE GREAT ROOM CAFE,THE GREAT ROOM CAFE,ACTIVE,2810 ARTESIA BLVD,REDONDO BEACH,CA,90278,91,A,DA000B8AV,-118.358308,33.872838,OUT OF COMPLIANCE,F006,# 06. Adequate handwashing facilities supplied...,2.0,REDONDO BEACH,69316.0,0.2056,0.6614,0.1330,0.0272,0.1567,0.6806,0.1302,0.0025,0.0028,0.0462,0.0957,0.1877,0.0393,0.0415,0.1110,0.2785,0.5689,105145.0,0.7645,0.116,2,0.1971,0.0599,177.9619,0.499,0.501,216.0,231.0,26.0,84,0.1129,93.0,0.2,0.2,0.2,0.1,61,2020-02,02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
758825,2018-08-27,FA0222602,CHOP STOP #5,CHOP STOP #5,ACTIVE,601 GLENDALE AVE,GLENDALE,CA,91206,98,A,DAZZXWPZN,-118.243261,34.154154,OUT OF COMPLIANCE,F034,"# 34. Warewashing facilities: Adequate, mainta...",1.0,GLENDALE,201604.0,0.1690,0.6548,0.1762,0.0120,0.1755,0.6382,0.1727,0.0010,0.0005,0.1318,0.2943,0.5444,0.2522,0.1564,0.1908,0.2738,0.3790,52574.0,0.7505,0.098,1,0.1516,0.0716,181.4710,0.353,0.647,240.0,113.0,17.0,1,0.0980,46.1,0.1,0.3,0.2,0.2,0,2018-08,08
758826,2018-02-15,FA0068935,MAYAN,MAYAN,ACTIVE,1038 S HILL ST,"LOS ANGELES, CITY OF",CA,90015,96,A,DAZZY0TED,-118.259326,34.040671,OUT OF COMPLIANCE,F037,# 37. Adequate ventilation and lighting; desig...,1.0,"LOS ANGELES, CITY OF",4032993.0,0.2221,0.6574,0.1205,0.0912,0.5004,0.2888,0.1168,0.0017,0.0011,0.2031,0.4241,0.3820,0.1575,0.2453,0.1963,0.2387,0.3197,50205.0,0.6835,0.121,58,0.2193,0.1006,205.0990,0.368,0.632,28464.0,620.0,15.0,21,0.0925,28.6,0.2,0.4,0.2,0.1,151,2018-02,02
758827,2018-02-15,FA0068935,MAYAN,MAYAN,ACTIVE,1038 S HILL ST,"LOS ANGELES, CITY OF",CA,90015,96,A,DAZZY0TED,-118.259326,34.040671,OUT OF COMPLIANCE,F040,"# 40. Plumbing: Plumbing in good repair, prope...",1.0,"LOS ANGELES, CITY OF",4032993.0,0.2221,0.6574,0.1205,0.0912,0.5004,0.2888,0.1168,0.0017,0.0011,0.2031,0.4241,0.3820,0.1575,0.2453,0.1963,0.2387,0.3197,50205.0,0.6835,0.121,58,0.2193,0.1006,205.0990,0.368,0.632,28464.0,620.0,15.0,21,0.0925,28.6,0.2,0.4,0.2,0.1,151,2018-02,02
758828,2018-02-15,FA0068935,MAYAN,MAYAN,ACTIVE,1038 S HILL ST,"LOS ANGELES, CITY OF",CA,90015,96,A,DAZZY0TED,-118.259326,34.040671,OUT OF COMPLIANCE,F044,"# 44. Floors, walls and ceilings: properly bui...",1.0,"LOS ANGELES, CITY OF",4032993.0,0.2221,0.6574,0.1205,0.0912,0.5004,0.2888,0.1168,0.0017,0.0011,0.2031,0.4241,0.3820,0.1575,0.2453,0.1963,0.2387,0.3197,50205.0,0.6835,0.121,58,0.2193,0.1006,205.0990,0.368,0.632,28464.0,620.0,15.0,21,0.0925,28.6,0.2,0.4,0.2,0.1,151,2018-02,02


In [25]:
print(sListMissing2)
len(sListMissing2)

['02', '08', '05', '10', '09', '07', '01', '04', '03', '12', '06', '11']


12

In [26]:
print(sListMissing1)
len(sListMissing1)

['2020-02', '2019-08', '2019-05', '2018-10', '2018-09', '2017-09', '2019-07', '2019-10', '2018-01', '2018-04', '2019-09', '2020-03', '2018-12', '2019-12', '2019-04', '2017-10', '2019-06', '2018-08', '2017-12', '2020-01', '2018-05', '2019-03', '2018-06', '2019-01', '2017-11', '2019-11', '2018-03', '2018-07', '2019-02', '2018-11', '2018-02']


31

In [None]:
# Reorder the columns such that the new dates are next to the original
#sListNewOrder = []
#sOriginalOrder = df.columns

#for sColumn in sOriginalOrder:   
#    sListNewOrder.append(sColumn) if sColumn not in sListNewOrder else sListNewOrder
#    if sColumn == 'ACTIVITY_DATE':
#        sListNewOrder.append(sListNewColumns[0])
#        sListNewOrder.append(sListNewColumns[1])
    #print(sListNewOrder)

#dfSorted = df.reindex(columns=sListNewOrder)
#dfSorted

In [None]:
#dfSorted.to_csv(r'clean_inspections_datechanged.csv')

In [27]:
# Export to csv
rats_df.to_csv("./Resources/Clean/clean_new_rats_JUNE09.csv")