# Predicting binary Google Rating from Health Inspection Scores by Restaurant - adding violation counts

Preparing the datasets for ML model comparison:

1. Import averaged inspection scores which are binned into Health Scores.
2. Import Google ratings and violation counts. Violation counts are the number of times each violation occurred by facility.
3. Combine the datasets into a single dataframe.
4. Categorize the Google ratings into High and Low based on the distribution of all ratings. "High Rating"= 4.52 - 5 and "Low Rating" = < 4.51
5. Use OneHotEncoder to change typeOfFacility and categoryOfFacility into binaries.
6. Final prep of dataframe for exporting to be used in ML Model Comparison.

In [1]:
# Import our dependencies
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import numpy as np

# Import our input datasets
ave_score_df=pd.read_csv('db_average_health_scores_bucketed.csv', index_col=0)
ave_score_df.drop("ave_insp_score", axis=1, inplace=True)
ave_score_df

Unnamed: 0,facilityId,typeOfFacility,categoryOfFacility,healthScore
0,FA0000009,RESTAURANT 0 TO 100 SEATS,BARS FRATERNAL ORGANIZATIONS,4
1,FA0000010,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5
2,FA0000011,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5
3,FA0000015,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,3
4,FA0000017,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5
...,...,...,...,...
890,FA0005506,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5
891,FA0005508,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5
892,FA0005510,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5
893,FA0005539,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5


In [2]:
# Import the Google ratings.csv
google_df=pd.read_csv('facility_ratings.csv', index_col=0)
google_df

Unnamed: 0_level_0,google_rating,total_ratings
facilityId,Unnamed: 1_level_1,Unnamed: 2_level_1
FA0001543,4.5,323
FA0002027,4.5,1160
FA0002162,4.1,174
FA0001624,4.3,628
FA0003535,4.3,578
...,...,...
FA0001776,4.3,541
FA0005279,4.4,1110
FA0005162,4.4,1106
FA0001660,4.9,59


In [3]:
# Import the violation COUNTS
vio_df=pd.read_csv('violation_cat_counts_pivot.csv', index_col=0).fillna(0)
vio_df

Unnamed: 0_level_0,cat_1,cat_2,cat_3,cat_5,cat_6,cat_7,cat_8,cat_9,cat_10,cat_12,cat_13,cat_14
facilityId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
FA0000009,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
FA0000010,1.0,2.0,1.0,2.0,1.0,0.0,0.0,3.0,1.0,0.0,0.0,1.0
FA0000011,0.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
FA0000015,1.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0
FA0000017,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
FA0005494,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FA0005506,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
FA0005510,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FA0005534,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
# Add Google ratings
ave_score_ratings=pd.merge(ave_score_df, google_df, on='facilityId', how='outer')
ave_score_ratings

Unnamed: 0,facilityId,typeOfFacility,categoryOfFacility,healthScore,google_rating,total_ratings
0,FA0000009,RESTAURANT 0 TO 100 SEATS,BARS FRATERNAL ORGANIZATIONS,4,4.9,15
1,FA0000010,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,937
2,FA0000011,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.3,119
3,FA0000015,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,3,4.7,232
4,FA0000017,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,4.3,109
...,...,...,...,...,...,...
890,FA0005506,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,422
891,FA0005508,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5,3.0,6
892,FA0005510,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5,4.9,63
893,FA0005539,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.8,669


In [5]:
# Add violation categories to df
categories=pd.merge(ave_score_ratings, vio_df, on='facilityId', how='inner')
categories

Unnamed: 0,facilityId,typeOfFacility,categoryOfFacility,healthScore,google_rating,total_ratings,cat_1,cat_2,cat_3,cat_5,cat_6,cat_7,cat_8,cat_9,cat_10,cat_12,cat_13,cat_14
0,FA0000009,RESTAURANT 0 TO 100 SEATS,BARS FRATERNAL ORGANIZATIONS,4,4.9,15,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,FA0000010,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,937,1.0,2.0,1.0,2.0,1.0,0.0,0.0,3.0,1.0,0.0,0.0,1.0
2,FA0000011,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.3,119,0.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,FA0000015,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,3,4.7,232,1.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0
4,FA0000017,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,4.3,109,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,FA0005449,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,4,4.1,242,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
862,FA0005472,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,4.5,177,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
863,FA0005494,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.5,924,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
864,FA0005506,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,422,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [6]:
cat_counts= categories.drop(columns=["total_ratings"])
#cat_counts.to_csv('cat_counts.csv')

In [7]:
categories.dtypes

facilityId             object
typeOfFacility         object
categoryOfFacility     object
healthScore             int64
google_rating         float64
total_ratings           int64
cat_1                 float64
cat_2                 float64
cat_3                 float64
cat_5                 float64
cat_6                 float64
cat_7                 float64
cat_8                 float64
cat_9                 float64
cat_10                float64
cat_12                float64
cat_13                float64
cat_14                float64
dtype: object

In [8]:
# Change googleRating into two categories "High Rating"= 4.52 - 5 and "Low Rating" = < 4.51
bins=[0, 4.52, 5]
google_rated = ["Low", "High"]
cat_counts["googleRating"]=pd.cut(cat_counts["google_rating"], bins, include_lowest=True, labels=google_rated)
cat_counts

Unnamed: 0,facilityId,typeOfFacility,categoryOfFacility,healthScore,google_rating,cat_1,cat_2,cat_3,cat_5,cat_6,cat_7,cat_8,cat_9,cat_10,cat_12,cat_13,cat_14,googleRating
0,FA0000009,RESTAURANT 0 TO 100 SEATS,BARS FRATERNAL ORGANIZATIONS,4,4.9,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,High
1,FA0000010,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,1.0,2.0,1.0,2.0,1.0,0.0,0.0,3.0,1.0,0.0,0.0,1.0,High
2,FA0000011,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.3,0.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,Low
3,FA0000015,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,3,4.7,1.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,High
4,FA0000017,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,4.3,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,FA0005449,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,4,4.1,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Low
862,FA0005472,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,4.5,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Low
863,FA0005494,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.5,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Low
864,FA0005506,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,High


In [9]:
feature_counts_df= cat_counts.drop(columns=["facilityId", "google_rating"])
feature_counts_df

Unnamed: 0,typeOfFacility,categoryOfFacility,healthScore,cat_1,cat_2,cat_3,cat_5,cat_6,cat_7,cat_8,cat_9,cat_10,cat_12,cat_13,cat_14,googleRating
0,RESTAURANT 0 TO 100 SEATS,BARS FRATERNAL ORGANIZATIONS,4,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,High
1,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,1.0,2.0,1.0,2.0,1.0,0.0,0.0,3.0,1.0,0.0,0.0,1.0,High
2,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,0.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,Low
3,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,3,1.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,High
4,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,4,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Low
862,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Low
863,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Low
864,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,High


In [10]:
feature_counts_df.dtypes

typeOfFacility          object
categoryOfFacility      object
healthScore              int64
cat_1                  float64
cat_2                  float64
cat_3                  float64
cat_5                  float64
cat_6                  float64
cat_7                  float64
cat_8                  float64
cat_9                  float64
cat_10                 float64
cat_12                 float64
cat_13                 float64
cat_14                 float64
googleRating          category
dtype: object

In [14]:
#feature_df.to_csv('features.csv')

# Preparing Data for Machine Learning Model Comparisons

In [11]:
# Preparing our data for ML
# Generate our categorical variable list
fac_cat = feature_counts_df.dtypes[feature_counts_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
feature_counts_df[fac_cat].nunique()

typeOfFacility        9
categoryOfFacility    6
dtype: int64

In [12]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(feature_counts_df[fac_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(fac_cat)
encode_df

Unnamed: 0,typeOfFacility_GROCERY STORE 0 TO 15000 SQ FT,typeOfFacility_GROCERY STORE W DELI 0 TO 15000 SQ FT,typeOfFacility_GROCERY STORE W DELI MORE THAN 15000 SQ FT,typeOfFacility_LIMITED FOOD SERVICE CONVENIENCE OTHER,typeOfFacility_MOBILE UNIT FULL FOOD SERVICE,typeOfFacility_NO FEE LICENSE K12 SCHOOLS NON PROFIT,typeOfFacility_RESTAURANT 0 TO 100 SEATS,typeOfFacility_RESTAURANT 101 TO 200 SEATS,typeOfFacility_RESTAURANT MORE THAN 200 SEATS,categoryOfFacility_BARS FRATERNAL ORGANIZATIONS,categoryOfFacility_FAST FOOD LIMITED MENU,categoryOfFacility_FULL MENU LIMITED SERVICE,categoryOfFacility_FULL SERVICE FULL MENU,categoryOfFacility_MOBILE UNITS,categoryOfFacility_RETAIL COMMISSARY
0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
862,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
863,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
864,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [13]:
encode_df.dtypes

typeOfFacility_GROCERY STORE 0 TO 15000 SQ FT                float64
typeOfFacility_GROCERY STORE W DELI 0 TO 15000 SQ FT         float64
typeOfFacility_GROCERY STORE W DELI MORE THAN 15000 SQ FT    float64
typeOfFacility_LIMITED FOOD SERVICE CONVENIENCE OTHER        float64
typeOfFacility_MOBILE UNIT FULL FOOD SERVICE                 float64
typeOfFacility_NO FEE LICENSE K12 SCHOOLS NON PROFIT         float64
typeOfFacility_RESTAURANT 0 TO 100 SEATS                     float64
typeOfFacility_RESTAURANT 101 TO 200 SEATS                   float64
typeOfFacility_RESTAURANT MORE THAN 200 SEATS                float64
categoryOfFacility_BARS FRATERNAL ORGANIZATIONS              float64
categoryOfFacility_FAST FOOD LIMITED MENU                    float64
categoryOfFacility_FULL MENU LIMITED SERVICE                 float64
categoryOfFacility_FULL SERVICE FULL MENU                    float64
categoryOfFacility_MOBILE UNITS                              float64
categoryOfFacility_RETAIL COMMISSA

In [14]:
# Merge one-hot encoded features and drop the originals
feature_cat_counts_df = feature_counts_df.merge(encode_df, left_index=True, right_index=True)
feature_cat_counts_df = feature_cat_counts_df.drop(fac_cat,1)
feature_cat_counts_df

  feature_cat_counts_df = feature_cat_counts_df.drop(fac_cat,1)


Unnamed: 0,healthScore,cat_1,cat_2,cat_3,cat_5,cat_6,cat_7,cat_8,cat_9,cat_10,...,typeOfFacility_NO FEE LICENSE K12 SCHOOLS NON PROFIT,typeOfFacility_RESTAURANT 0 TO 100 SEATS,typeOfFacility_RESTAURANT 101 TO 200 SEATS,typeOfFacility_RESTAURANT MORE THAN 200 SEATS,categoryOfFacility_BARS FRATERNAL ORGANIZATIONS,categoryOfFacility_FAST FOOD LIMITED MENU,categoryOfFacility_FULL MENU LIMITED SERVICE,categoryOfFacility_FULL SERVICE FULL MENU,categoryOfFacility_MOBILE UNITS,categoryOfFacility_RETAIL COMMISSARY
0,4,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,5,1.0,2.0,1.0,2.0,1.0,0.0,0.0,3.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,5,0.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,3,1.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,5,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,4,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
862,5,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
863,5,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
864,5,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [17]:
#df = df.reindex(['the','order','you','want'], axis=1)
feature_cat_counts_df =feature_cat_counts_df.reindex(['cat_1','cat_2', 'cat_3', 'cat_5', 
                                                      'cat_6', 'cat_7', 'cat_8', 'cat_9', 
                                                      'cat_10', 'cat_12', 'cat_13', 'cat_14', 
                                                      'typeOfFacility_GROCERY STORE 0 TO 15000 SQ FT', 
                                                      'typeOfFacility_GROCERY STORE W DELI 0 TO 15000 SQ FT', 
                                                      'typeOfFacility_GROCERY STORE W DELI MORE THAN 15000 SQ FT', 
                                                      'typeOfFacility_LIMITED FOOD SERVICE CONVENIENCE OTHER', 
                                                      'typeOfFacility_MOBILE UNIT FULL FOOD SERVICE', 
                                                      'typeOfFacility_NO FEE LICENSE K12 SCHOOLS NON PROFIT', 
                                                      'typeOfFacility_RESTAURANT 0 TO 100 SEATS', 
                                                      'typeOfFacility_RESTAURANT 101 TO 200 SEATS', 
                                                      'typeOfFacility_RESTAURANT MORE THAN 200 SEATS', 
                                                      'categoryOfFacility_BARS FRATERNAL ORGANIZATIONS', 
                                                      'categoryOfFacility_FAST FOOD LIMITED MENU', 
                                                      'categoryOfFacility_FULL MENU LIMITED SERVICE', 
                                                      'categoryOfFacility_FULL SERVICE FULL MENU', 
                                                      'categoryOfFacility_MOBILE UNITS', 
                                                      'categoryOfFacility_RETAIL COMMISSARY', 
                                                      'healthScore', 
                                                      'googleRating'], axis=1)
feature_cat_counts_df

Unnamed: 0,cat_1,cat_2,cat_3,cat_5,cat_6,cat_7,cat_8,cat_9,cat_10,cat_12,...,typeOfFacility_RESTAURANT 101 TO 200 SEATS,typeOfFacility_RESTAURANT MORE THAN 200 SEATS,categoryOfFacility_BARS FRATERNAL ORGANIZATIONS,categoryOfFacility_FAST FOOD LIMITED MENU,categoryOfFacility_FULL MENU LIMITED SERVICE,categoryOfFacility_FULL SERVICE FULL MENU,categoryOfFacility_MOBILE UNITS,categoryOfFacility_RETAIL COMMISSARY,healthScore,googleRating
0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4,High
1,1.0,2.0,1.0,2.0,1.0,0.0,0.0,3.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,5,High
2,0.0,2.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5,Low
3,1.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,3,High
4,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,4,Low
862,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5,Low
863,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5,Low
864,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,5,High


In [18]:
# Remove index and header for ML readiness and export
feature_cat_counts_df.to_csv('features_counts.csv', header=False, index=False)