# Predicting binary Google Rating from Health Inspection Scores by Restaurant

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import pandas as pd
import numpy as np
import tensorflow as tf

# Import our input datasets
ave_score_df=pd.read_csv('db_average_health_scores_bucketed.csv', index_col=0)
ave_score_df.drop("ave_insp_score", axis=1, inplace=True)
ave_score_df

Unnamed: 0,facilityId,typeOfFacility,categoryOfFacility,healthScore
0,FA0000009,RESTAURANT 0 TO 100 SEATS,BARS FRATERNAL ORGANIZATIONS,4
1,FA0000010,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5
2,FA0000011,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5
3,FA0000015,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,3
4,FA0000017,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5
...,...,...,...,...
890,FA0005506,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5
891,FA0005508,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5
892,FA0005510,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5
893,FA0005539,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5


In [2]:
ave_score_df.dtypes

facilityId            object
typeOfFacility        object
categoryOfFacility    object
healthScore            int64
dtype: object

In [3]:
# Import the Google ratings.csv
google_df=pd.read_csv('facility_ratings.csv', index_col=0)
google_df

Unnamed: 0_level_0,google_rating,total_ratings
facilityId,Unnamed: 1_level_1,Unnamed: 2_level_1
FA0001543,4.5,323
FA0002027,4.5,1160
FA0002162,4.1,174
FA0001624,4.3,628
FA0003535,4.3,578
...,...,...
FA0001776,4.3,541
FA0005279,4.4,1110
FA0005162,4.4,1106
FA0001660,4.9,59


In [4]:
# Add Google ratings - going to need to .join
ave_score_ratings=pd.merge(ave_score_df, google_df, on='facilityId', how='outer')
ave_score_ratings

Unnamed: 0,facilityId,typeOfFacility,categoryOfFacility,healthScore,google_rating,total_ratings
0,FA0000009,RESTAURANT 0 TO 100 SEATS,BARS FRATERNAL ORGANIZATIONS,4,4.9,15
1,FA0000010,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,937
2,FA0000011,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.3,119
3,FA0000015,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,3,4.7,232
4,FA0000017,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,4.3,109
...,...,...,...,...,...,...
890,FA0005506,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,422
891,FA0005508,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5,3.0,6
892,FA0005510,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5,4.9,63
893,FA0005539,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.8,669


In [5]:
ave_score_ratings.dtypes

facilityId             object
typeOfFacility         object
categoryOfFacility     object
healthScore             int64
google_rating         float64
total_ratings           int64
dtype: object

In [6]:
# Change googleRating into two categories "High Rating"= 4.52 - 5 and "Low Rating" = < 4.51
bins=[0, 4.52, 5]
google_rated = ["Low", "High"]
ave_score_ratings["googleRating"]=pd.cut(ave_score_ratings["google_rating"], bins, include_lowest=True, labels=google_rated)
ave_score_ratings

Unnamed: 0,facilityId,typeOfFacility,categoryOfFacility,healthScore,google_rating,total_ratings,googleRating
0,FA0000009,RESTAURANT 0 TO 100 SEATS,BARS FRATERNAL ORGANIZATIONS,4,4.9,15,High
1,FA0000010,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,937,High
2,FA0000011,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.3,119,Low
3,FA0000015,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,3,4.7,232,High
4,FA0000017,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,4.3,109,Low
...,...,...,...,...,...,...,...
890,FA0005506,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,4.7,422,High
891,FA0005508,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5,3.0,6,Low
892,FA0005510,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5,4.9,63,High
893,FA0005539,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,4.8,669,High


In [7]:
feature_df= ave_score_ratings.drop(columns=["facilityId", "total_ratings", "google_rating"])
feature_df

Unnamed: 0,typeOfFacility,categoryOfFacility,healthScore,googleRating
0,RESTAURANT 0 TO 100 SEATS,BARS FRATERNAL ORGANIZATIONS,4,High
1,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,High
2,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,Low
3,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,3,High
4,RESTAURANT 0 TO 100 SEATS,FULL SERVICE FULL MENU,5,Low
...,...,...,...,...
890,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,5,High
891,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5,Low
892,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,5,High
893,RESTAURANT 0 TO 100 SEATS,FAST FOOD LIMITED MENU,5,High


In [8]:
feature_df.dtypes

typeOfFacility          object
categoryOfFacility      object
healthScore              int64
googleRating          category
dtype: object

In [9]:
#ave_score_df.to_csv('average_scores.csv')

# Machine Learning Model Comparisons

In [10]:
# Preparing our data for ML
# Generate our categorical variable list
fac_cat = feature_df.dtypes[feature_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
feature_df[fac_cat].nunique()

typeOfFacility        9
categoryOfFacility    6
dtype: int64

In [11]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(feature_df[fac_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(fac_cat)
encode_df

Unnamed: 0,typeOfFacility_GROCERY STORE 0 TO 15000 SQ FT,typeOfFacility_GROCERY STORE W DELI 0 TO 15000 SQ FT,typeOfFacility_GROCERY STORE W DELI MORE THAN 15000 SQ FT,typeOfFacility_LIMITED FOOD SERVICE CONVENIENCE OTHER,typeOfFacility_MOBILE UNIT FULL FOOD SERVICE,typeOfFacility_NO FEE LICENSE K12 SCHOOLS NON PROFIT,typeOfFacility_RESTAURANT 0 TO 100 SEATS,typeOfFacility_RESTAURANT 101 TO 200 SEATS,typeOfFacility_RESTAURANT MORE THAN 200 SEATS,categoryOfFacility_BARS FRATERNAL ORGANIZATIONS,categoryOfFacility_FAST FOOD LIMITED MENU,categoryOfFacility_FULL MENU LIMITED SERVICE,categoryOfFacility_FULL SERVICE FULL MENU,categoryOfFacility_MOBILE UNITS,categoryOfFacility_RETAIL COMMISSARY
0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
890,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
891,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
892,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
893,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [13]:
encode_df.to_csv('encoded_fac_cat.csv')

In [31]:
# Merge one-hot encoded features and drop the originals
feature_df = feature_df.merge(encode_df, left_index=True, right_index=True)
feature_df = feature_df.drop(fac_cat,1)
feature_df.head()

  feature_df = feature_df.drop(fac_cat,1)


Unnamed: 0,healthScore,googleRating,typeOfFacility_GROCERY STORE 0 TO 15000 SQ FT,typeOfFacility_GROCERY STORE W DELI 0 TO 15000 SQ FT,typeOfFacility_GROCERY STORE W DELI MORE THAN 15000 SQ FT,typeOfFacility_LIMITED FOOD SERVICE CONVENIENCE OTHER,typeOfFacility_MOBILE UNIT FULL FOOD SERVICE,typeOfFacility_NO FEE LICENSE K12 SCHOOLS NON PROFIT,typeOfFacility_RESTAURANT 0 TO 100 SEATS,typeOfFacility_RESTAURANT 101 TO 200 SEATS,typeOfFacility_RESTAURANT MORE THAN 200 SEATS,categoryOfFacility_BARS FRATERNAL ORGANIZATIONS,categoryOfFacility_FAST FOOD LIMITED MENU,categoryOfFacility_FULL MENU LIMITED SERVICE,categoryOfFacility_FULL SERVICE FULL MENU,categoryOfFacility_MOBILE UNITS,categoryOfFacility_RETAIL COMMISSARY
0,4,High,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,5,High,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,5,Low,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,3,High,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,5,Low,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [32]:
# Remove Google outcome target from features data
y = feature_df.googleRating
X = feature_df.drop(columns="googleRating")
target_names = ["negative", "positive"]

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [33]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [34]:
# Define the logistic regression model
log_classifier = LogisticRegression(solver="lbfgs",max_iter=200)

# Train the model
log_classifier.fit(X_train,y_train)

# Evaluate the model
y_pred = log_classifier.predict(X_test)
print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Logistic regression model accuracy: 0.737


In [35]:
# Create the SVM model
svm = SVC(kernel='linear')

# Train the model
svm.fit(X_train, y_train)

# Evaluate the model
y_pred = svm.predict(X_test_scaled)
print(f" SVM model accuracy: {accuracy_score(y_test,y_pred):.3f}")

 SVM model accuracy: 0.737


In [36]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.728


In [37]:
# Create an AdaBoost classifier model.
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(random_state=1).fit(X_train_scaled, y_train)
print(classification_report(y_test, y_pred, target_names=target_names))
print(f'Training Score: {clf.score(X_train_scaled, y_train)}')
print(f'Testing Score: {clf.score(X_test_scaled, y_test)}')

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00        59
    positive       0.73      0.99      0.84       165

    accuracy                           0.73       224
   macro avg       0.37      0.49      0.42       224
weighted avg       0.54      0.73      0.62       224

Training Score: 0.7391952309985097
Testing Score: 0.7321428571428571


In [38]:
# Fit a model, and then print a classification report
clf = RandomForestClassifier(random_state=1).fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
print(classification_report(y_test, y_pred, target_names=target_names))
print(f'Training Score: {clf.score(X_train_scaled, y_train)}')
print(f'Testing Score: {clf.score(X_test_scaled, y_test)}')

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00        59
    positive       0.73      0.98      0.84       165

    accuracy                           0.72       224
   macro avg       0.37      0.49      0.42       224
weighted avg       0.54      0.72      0.62       224

Training Score: 0.7436661698956781
Testing Score: 0.7232142857142857


In [39]:
def model_tester(model, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    scaler = StandardScaler().fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    clf = model.fit(X_train_scaled, y_train)
    print(classification_report(y_test, y_pred, target_names=target_names))
    print(f'Training Score: {clf.score(X_train_scaled, y_train)}')
    print(f'Testing Score: {clf.score(X_test_scaled, y_test)}')
    
model_tester(AdaBoostClassifier(random_state=1, n_estimators=100), X, y)
model_tester(AdaBoostClassifier(random_state=1, n_estimators=200), X, y)
model_tester(AdaBoostClassifier(random_state=1, n_estimators=200, learning_rate=0.1), X, y)
model_tester(AdaBoostClassifier(random_state=1, n_estimators=500, learning_rate=0.1), X, y)
model_tester(AdaBoostClassifier(random_state=1, n_estimators=1000, learning_rate=0.1), X, y)
model_tester(AdaBoostClassifier(random_state=1, n_estimators=2000, learning_rate=0.1), X, y)

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00        70
    positive       0.68      0.98      0.81       154

    accuracy                           0.67       224
   macro avg       0.34      0.49      0.40       224
weighted avg       0.47      0.67      0.55       224

Training Score: 0.7555886736214605
Testing Score: 0.6785714285714286
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00        70
    positive       0.68      0.98      0.81       154

    accuracy                           0.67       224
   macro avg       0.34      0.49      0.40       224
weighted avg       0.47      0.67      0.55       224

Training Score: 0.7555886736214605
Testing Score: 0.6785714285714286
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00        70
    positive       0.68      0.98      0.81       154

    accuracy                           0.67 

In [40]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 24
hidden_nodes_layer2 = 12

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/50


UnimplementedError: Graph execution error:

Detected at node 'binary_crossentropy/Cast' defined at (most recent call last):
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\asyncio\base_events.py", line 596, in run_forever
      self._run_once()
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\asyncio\base_events.py", line 1890, in _run_once
      handle._run()
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
      await self.process_one()
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
      await dispatch(*args)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
      await result
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
      reply_content = await reply_content
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 2901, in run_cell
      result = self._run_cell(
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 2947, in _run_cell
      return runner(coro)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 3172, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 3364, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\scheir3\AppData\Local\Temp/ipykernel_33776/2011197294.py", line 24, in <module>
      fit_model = nn.fit(X_train_scaled, y_train, epochs=50)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 860, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 918, in compute_loss
      return self.compiled_loss(
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\losses.py", line 141, in __call__
      losses = call_fn(y_true, y_pred)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\losses.py", line 245, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\scheir3\Anaconda3\envs\mlenv\lib\site-packages\keras\losses.py", line 1922, in binary_crossentropy
      y_true = tf.cast(y_true, y_pred.dtype)
Node: 'binary_crossentropy/Cast'
Cast string to float is not supported
	 [[{{node binary_crossentropy/Cast}}]] [Op:__inference_train_function_767]

In [None]:
# Add the prediction to the dataframe
# ave_score_df["googlePrediction"]= y_pred
# ave_score_df.head()