<h1>Prepare Data</h1>
<p>Best Models So Far: Random Forest and RBF SVMs</p>

In [19]:
import pandas as pd
import numpy as np
import datetime as dt
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score

SECONDS_IN_YEAR = (dt.datetime(2021, 1, 1) - dt.datetime(2020, 1, 1)).total_seconds()

def get_population_density(location):
    if location == "Kenosha;WI":
        return 1
    elif location == "Hackensack;NJ":
        return 2

def get_SINE(date):
    seconds = (date - dt.datetime(2020, 1, 1, 0)).total_seconds()
    return np.sin(2 * np.pi * seconds / SECONDS_IN_YEAR)

def get_COSINE(date):
    seconds = (date - dt.datetime(2020, 1, 1, 0)).total_seconds()
    return np.cos(2 * np.pi * seconds / SECONDS_IN_YEAR)

def get_hour_difference(start_datetime, end_datetime):
    return (end_datetime - start_datetime).total_seconds() / 3600

# Import file 1
import_file = "DoorDash_Kenosha_WI_V1.csv"
columns = ["Location", "Start_Datetime", "End_Datetime", "Base", "Peak", "Tip", "Total"]
df1 = pd.read_csv(import_file, names=columns)

# Import file 2
import_file = "DoorDash_Kenosha_WI_V2.csv"
columns = ["Location", "Start_Datetime", "End_Datetime", "DPH", "TEPH"]
df2 = pd.read_csv(import_file, names=columns)

# Change to numerical data for df1
df1["Location"] = df1["Location"].map(get_population_density)
df1["Start_Datetime"] = pd.to_datetime(df1["Start_Datetime"], format="%Y-%m-%dT%H:%MZ", errors="coerce")
df1["DayOfWeek"] = df1["Start_Datetime"].dt.dayofweek
df1["Start_Datetime"] = (df1["Start_Datetime"] - dt.datetime(2020, 1, 1)).dt.total_seconds()
df1["Start_Datetime_SINE"] = np.sin(2 * np.pi * df1["Start_Datetime"] / SECONDS_IN_YEAR)
df1["Start_Datetime_COSINE"] = np.cos(2 * np.pi * df1["Start_Datetime"] / SECONDS_IN_YEAR)
df1["End_Datetime"] = pd.to_datetime(df1["End_Datetime"], format="%Y-%m-%dT%H:%MZ", errors="coerce")
df1["End_Datetime"] = (df1["End_Datetime"] - dt.datetime(2020, 1, 1)).dt.total_seconds()
df1["End_Datetime_SINE"] = np.sin(2 * np.pi * df1["End_Datetime"] / SECONDS_IN_YEAR)
df1["End_Datetime_COSINE"] = np.cos(2 * np.pi * df1["End_Datetime"] / SECONDS_IN_YEAR)
df1 = df1[["Location", "Start_Datetime_SINE", "Start_Datetime_COSINE", "End_Datetime_SINE", "End_Datetime_COSINE", "DayOfWeek", "Base", "Peak", "Tip", "Total"]]

# Change to numerical data for df2
df2["Location"] = df2["Location"].map(get_population_density)
df2["Start_Datetime"] = pd.to_datetime(df2["Start_Datetime"], format="%Y-%m-%dT%H:%MZ", errors="coerce")
df2["DayOfWeek"] = df2["Start_Datetime"].dt.dayofweek
df2["Start_Datetime"] = (df2["Start_Datetime"] - dt.datetime(2020, 1, 1)).dt.total_seconds()
df2["Start_Datetime_SINE"] = np.sin(2 * np.pi * df2["Start_Datetime"] / SECONDS_IN_YEAR)
df2["Start_Datetime_COSINE"] = np.cos(2 * np.pi * df2["Start_Datetime"] / SECONDS_IN_YEAR)
df2["End_Datetime"] = pd.to_datetime(df2["End_Datetime"], format="%Y-%m-%dT%H:%MZ", errors="coerce")
df2["End_Datetime"] = (df2["End_Datetime"] - dt.datetime(2020, 1, 1)).dt.total_seconds()
df2["End_Datetime_SINE"] = np.sin(2 * np.pi * df2["End_Datetime"] / SECONDS_IN_YEAR)
df2["End_Datetime_COSINE"] = np.cos(2 * np.pi * df2["End_Datetime"] / SECONDS_IN_YEAR)
df2 = df2[["Location", "Start_Datetime_SINE", "Start_Datetime_COSINE", "End_Datetime_SINE", "End_Datetime_COSINE", "DayOfWeek", "DPH", "TEPH"]]

# Separate features and classes for df1
feature_names = ["Location", "Start_Datetime_SINE", "Start_Datetime_COSINE", "End_Datetime_SINE", "End_Datetime_COSINE", "DayOfWeek"]
all_features_df1 = df1[feature_names].values
all_classes_base = df1["Base"].values
all_classes_peak = df1["Peak"].values
all_classes_tip = df1["Tip"].values
all_classes_total = df1["Total"].values

# Separate features and classes for df2
feature_names = ["Location", "Start_Datetime_SINE", "Start_Datetime_COSINE", "End_Datetime_SINE", "End_Datetime_COSINE", "DayOfWeek"]
all_features_df2 = df2[feature_names].values
all_classes_DPH = df2["DPH"].values
all_classes_TEPH = df2["TEPH"].values

# Normalize data
scaler = preprocessing.StandardScaler()
all_features_df1_scaled = scaler.fit_transform(all_features_df1)
all_features_df2_scaled = scaler.fit_transform(all_features_df2)

# Test inputs
location = get_population_density("Hackensack;NJ")
start_datetime = dt.datetime(2020, 10, 5, 6, 0)
end_datetime = dt.datetime(2020, 10, 5, 7, 0)
start_datetime_SINE = get_SINE(start_datetime)
start_datetime_COSINE = get_COSINE(start_datetime)
end_datetime_SINE = get_SINE(end_datetime)
end_datetime_COSINE = get_COSINE(end_datetime)
dayOfWeek = start_datetime.weekday()
hour_difference = get_hour_difference(start_datetime, end_datetime)

In [20]:
df1.describe()

Unnamed: 0,Location,Start_Datetime_SINE,Start_Datetime_COSINE,End_Datetime_SINE,End_Datetime_COSINE,DayOfWeek,Base,Peak,Tip,Total
count,1247.0,1247.0,1247.0,1247.0,1247.0,1247.0,1247.0,1247.0,1247.0,1247.0
mean,1.0,0.5955,-0.371143,0.594476,-0.373265,2.757017,2.935044,0.632719,5.25498,8.824346
std,0.0,0.567789,0.430878,0.568531,0.429478,1.912906,0.645153,1.126004,2.364477,2.643884
min,1.0,-0.863331,-0.999999,-0.864633,-0.999999,0.0,2.0,0.0,0.0,2.0
25%,1.0,0.225114,-0.711678,0.223824,-0.714353,1.0,3.0,0.0,4.0,7.0
50%,1.0,0.902169,-0.431382,0.90075,-0.434338,3.0,3.0,0.0,5.0,8.0
75%,1.0,0.979002,0.017846,0.978252,0.014735,4.0,3.0,1.0,6.0,10.0
max,1.0,1.0,0.398202,0.99998,0.395138,6.0,7.5,6.0,19.42,23.92


In [21]:
df2.describe()

Unnamed: 0,Location,Start_Datetime_SINE,Start_Datetime_COSINE,End_Datetime_SINE,End_Datetime_COSINE,DayOfWeek,DPH,TEPH
count,158.0,158.0,158.0,158.0,158.0,158.0,158.0,158.0
mean,1.0,0.517903,-0.419314,0.516944,-0.420813,2.56962,2.142722,18.825823
std,0.0,0.606759,0.43742,0.607448,0.436156,1.879718,0.519547,4.87656
min,1.0,-0.863331,-0.999999,-0.864633,-0.999999,0.0,0.25,1.72
25%,1.0,0.005478,-0.747335,0.00214,-0.74876,1.0,1.88,16.235
50%,1.0,0.855939,-0.506966,0.854199,-0.507836,2.0,2.13,18.545
75%,1.0,0.967761,0.010106,0.968308,0.006777,4.0,2.4075,22.1975
max,1.0,1.0,0.398202,0.99998,0.395138,6.0,4.03,34.03


In [22]:
import math

total_mean = df1["Total"].mean()
DPH_mean = df2["DPH"].mean()
print("Total Calculation:", total_mean * DPH_mean * hour_difference)
total_std = df1["Total"].std()
total_variance = total_std * total_std
total_test_std = math.sqrt(total_variance * DPH_mean * hour_difference)
print("Total STD Calculation:", total_test_std)

TEPH_mean = df2["TEPH"].mean()
print("TEPH Calculation:", TEPH_mean * hour_difference)
TEPH_std = df2["TEPH"].std()
TEPH_variance = TEPH_std * TEPH_std
TEPH_test_std = math.sqrt(TEPH_variance * hour_difference)
print("TEPH STD Calculation:", TEPH_test_std)

Total Calculation: 18.90811698963588
Total STD Calculation: 3.870127599112604
TEPH Calculation: 18.825822784810125
TEPH STD Calculation: 4.8765598471583465


<h1>Random Forest</h1>

In [23]:
from sklearn.ensemble import RandomForestRegressor

clf = RandomForestRegressor(n_estimators=10, random_state=1)

# Return random forest estimates and k-fold cross-validation scores
def get_random_forest(location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek):
    test_input = [[location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek]]
    # Base
    cv_scores = cross_val_score(clf, all_features_df1_scaled, all_classes_base, cv=10)
    base_k = cv_scores.mean()
    clf.fit(all_features_df1_scaled, all_classes_base)
    base = clf.predict(test_input)[0]
    base_array = [base, base_k, "Base"]
    # Peak
    cv_scores = cross_val_score(clf, all_features_df1_scaled, all_classes_peak, cv=10)
    peak_k = cv_scores.mean()
    clf.fit(all_features_df1_scaled, all_classes_peak)
    peak = clf.predict(test_input)[0]
    peak_array = [peak, peak_k, "Peak"]
    # Tip
    cv_scores = cross_val_score(clf, all_features_df1_scaled, all_classes_tip, cv=10)
    tip_k = cv_scores.mean()
    clf.fit(all_features_df1_scaled, all_classes_tip)
    tip = clf.predict(test_input)[0]
    tip_array = [tip, tip_k, "Tip"]
    # Total
    cv_scores = cross_val_score(clf, all_features_df1_scaled, all_classes_total, cv=10)
    total_k = cv_scores.mean()
    clf.fit(all_features_df1_scaled, all_classes_total)
    total = clf.predict(test_input)[0]
    total_array = [total, total_k, "Total"]
    # DPH
    cv_scores = cross_val_score(clf, all_features_df2_scaled, all_classes_DPH, cv=10)
    DPH_k = cv_scores.mean()
    clf.fit(all_features_df2_scaled, all_classes_DPH)
    DPH = clf.predict(test_input)[0]
    DPH_array = [DPH, DPH_k, "DPH"]
    # TEPH
    cv_scores = cross_val_score(clf, all_features_df2_scaled, all_classes_TEPH, cv=10)
    TEPH_k = cv_scores.mean()
    clf.fit(all_features_df2_scaled, all_classes_TEPH)
    TEPH = clf.predict(test_input)[0]
    TEPH_array = [TEPH, TEPH_k, "TEPH"]
    return [base_array, peak_array, tip_array, total_array, DPH_array, TEPH_array]

estimates = get_random_forest(location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek)
print(estimates)

print("Total Calculation:", estimates[3][0] * estimates[4][0] * hour_difference)
total_test_std = math.sqrt(total_variance * estimates[4][0] * hour_difference)
print("Total STD Calculation:", total_test_std)

print("TEPH Calculation:", estimates[5][0] * hour_difference)
TEPH_test_std = math.sqrt(TEPH_variance * hour_difference)
print("TEPH STD Calculation:", TEPH_test_std)

[[2.9089015151515154, -0.20160678717620445, 'Base'], [1.2708333333333335, -1.1908936444530118, 'Peak'], [5.394583333333334, -0.11065689775955123, 'Tip'], [9.327636904761905, -0.12892944110926788, 'Total'], [1.85, -0.5382985294080384, 'DPH'], [18.078, -0.3354776571122365, 'TEPH']]
Total Calculation: 17.256128273809523
Total STD Calculation: 3.5960712062821756
TEPH Calculation: 18.078
TEPH STD Calculation: 4.8765598471583465


<h1>Linear SVM</h1>

In [24]:
from sklearn import svm

# Hyperparameters
C = 1.0

svr = svm.SVR(kernel="linear", C=C)

# Return linear SVM estimates and k-fold cross-validation scores
def get_linear_SVM(location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek):
    test_input = [[location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek]]
    # Base
    cv_scores = cross_val_score(svr, all_features_df1_scaled, all_classes_base, cv=10)
    base_k = cv_scores.mean()
    svr.fit(all_features_df1_scaled, all_classes_base)
    base = svr.predict(test_input)[0]
    base_array = [base, base_k, "Base"]
    # Peak
    cv_scores = cross_val_score(svr, all_features_df1_scaled, all_classes_peak, cv=10)
    peak_k = cv_scores.mean()
    svr.fit(all_features_df1_scaled, all_classes_peak)
    peak = svr.predict(test_input)[0]
    peak_array = [peak, peak_k, "Peak"]
    # Tip
    cv_scores = cross_val_score(svr, all_features_df1_scaled, all_classes_tip, cv=10)
    tip_k = cv_scores.mean()
    svr.fit(all_features_df1_scaled, all_classes_tip)
    tip = svr.predict(test_input)[0]
    tip_array = [tip, tip_k, "Tip"]
    # Total
    cv_scores = cross_val_score(svr, all_features_df1_scaled, all_classes_total, cv=10)
    total_k = cv_scores.mean()
    svr.fit(all_features_df1_scaled, all_classes_total)
    total = svr.predict(test_input)[0]
    total_array = [total, total_k, "Total"]
    # DPH
    cv_scores = cross_val_score(svr, all_features_df2_scaled, all_classes_DPH, cv=10)
    DPH_k = cv_scores.mean()
    svr.fit(all_features_df2_scaled, all_classes_DPH)
    DPH = svr.predict(test_input)[0]
    DPH_array = [DPH, DPH_k, "DPH"]
    # TEPH
    cv_scores = cross_val_score(svr, all_features_df2_scaled, all_classes_TEPH, cv=10)
    TEPH_k = cv_scores.mean()
    svr.fit(all_features_df2_scaled, all_classes_TEPH)
    TEPH = svr.predict(test_input)[0]
    TEPH_array = [TEPH, TEPH_k, "TEPH"]
    return [base_array, peak_array, tip_array, total_array, DPH_array, TEPH_array]

estimates = get_linear_SVM(location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek)
print(estimates)

print("Total Calculation:", estimates[3][0] * estimates[4][0] * hour_difference)
total_test_std = math.sqrt(total_variance * estimates[4][0] * hour_difference)
print("Total STD Calculation:", total_test_std)

print("TEPH Calculation:", estimates[5][0] * hour_difference)
TEPH_test_std = math.sqrt(TEPH_variance * hour_difference)
print("TEPH STD Calculation:", TEPH_test_std)

[[2.899965298556714, -0.021787612713334892, 'Base'], [0.1002438860846637, -0.2888180979355207, 'Peak'], [4.985695745189954, -0.03528083618764792, 'Tip'], [8.370295628976567, -0.01799587504508974, 'Total'], [1.94601293391354, -0.13340048295964888, 'DPH'], [17.26888391905293, -0.3487516384095234, 'TEPH']]
Total Calculation: 16.28870355466837
Total STD Calculation: 3.6882069312762864
TEPH Calculation: 17.26888391905293
TEPH STD Calculation: 4.8765598471583465


<h1>RBF SVM</h1>

In [25]:
from sklearn import svm

# Hyperparameters
C = 1.0

svr = svm.SVR(kernel="rbf", C=C)

# Return RBF SVM estimates and k-fold cross-validation scores
def get_RBF_SVM(location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek):
    test_input = [[location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek]]
    # Base
    cv_scores = cross_val_score(svr, all_features_df1_scaled, all_classes_base, cv=10)
    base_k = cv_scores.mean()
    svr.fit(all_features_df1_scaled, all_classes_base)
    base = svr.predict(test_input)[0]
    base_array = [base, base_k, "Base"]
    # Peak
    cv_scores = cross_val_score(svr, all_features_df1_scaled, all_classes_peak, cv=10)
    peak_k = cv_scores.mean()
    svr.fit(all_features_df1_scaled, all_classes_peak)
    peak = svr.predict(test_input)[0]
    peak_array = [peak, peak_k, "Peak"]
    # Tip
    cv_scores = cross_val_score(svr, all_features_df1_scaled, all_classes_tip, cv=10)
    tip_k = cv_scores.mean()
    svr.fit(all_features_df1_scaled, all_classes_tip)
    tip = svr.predict(test_input)[0]
    tip_array = [tip, tip_k, "Tip"]
    # Total
    cv_scores = cross_val_score(svr, all_features_df1_scaled, all_classes_total, cv=10)
    total_k = cv_scores.mean()
    svr.fit(all_features_df1_scaled, all_classes_total)
    total = svr.predict(test_input)[0]
    total_array = [total, total_k, "Total"]
    # DPH
    cv_scores = cross_val_score(svr, all_features_df2_scaled, all_classes_DPH, cv=10)
    DPH_k = cv_scores.mean()
    svr.fit(all_features_df2_scaled, all_classes_DPH)
    DPH = svr.predict(test_input)[0]
    DPH_array = [DPH, DPH_k, "DPH"]
    # TEPH
    cv_scores = cross_val_score(svr, all_features_df2_scaled, all_classes_TEPH, cv=10)
    TEPH_k = cv_scores.mean()
    svr.fit(all_features_df2_scaled, all_classes_TEPH)
    TEPH = svr.predict(test_input)[0]
    TEPH_array = [TEPH, TEPH_k, "TEPH"]
    return [base_array, peak_array, tip_array, total_array, DPH_array, TEPH_array]

estimates = get_RBF_SVM(location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek)
print(estimates)

print("Total Calculation:", estimates[3][0] * estimates[4][0] * hour_difference)
total_test_std = math.sqrt(total_variance * estimates[4][0] * hour_difference)
print("Total STD Calculation:", total_test_std)

print("TEPH Calculation:", estimates[5][0] * hour_difference)
TEPH_test_std = math.sqrt(TEPH_variance * hour_difference)
print("TEPH STD Calculation:", TEPH_test_std)

[[2.945620709335246, -0.0237374286088629, 'Base'], [0.29223805451695856, -0.6215163903294936, 'Peak'], [4.8802761710025, -0.049936759962413, 'Tip'], [8.399370819969398, -0.05483948596850261, 'Total'], [2.0705172115599937, -0.1432492581290766, 'DPH'], [18.4974107455068, -0.29583132639943555, 'TEPH']]
Total Calculation: 17.391041849021416
Total STD Calculation: 3.8043620517892283
TEPH Calculation: 18.4974107455068
TEPH STD Calculation: 4.8765598471583465


<h1>K-Nearest Neighbors</h1>

In [26]:
from sklearn import neighbors

clf = neighbors.KNeighborsRegressor(n_neighbors=10)

# Find optimal value for neighbors
#for n in range(1, 50):
#    clf = neighbors.KNeighborsRegressor(n_neighbors=n)
#    cv_scores = cross_val_score(clf, all_features_scaled, all_classes_base, cv=10)
#    print(n, cv_scores.mean())

# Return KNN estimates and k-fold cross-validation scores
def get_KNN(location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek):
    test_input = [[location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek]]
    # Base
    cv_scores = cross_val_score(clf, all_features_df1_scaled, all_classes_base, cv=10)
    base_k = cv_scores.mean()
    clf.fit(all_features_df1_scaled, all_classes_base)
    base = clf.predict(test_input)[0]
    base_array = [base, base_k, "Base"]
    # Peak
    cv_scores = cross_val_score(clf, all_features_df1_scaled, all_classes_peak, cv=10)
    peak_k = cv_scores.mean()
    clf.fit(all_features_df1_scaled, all_classes_peak)
    peak = clf.predict(test_input)[0]
    peak_array = [peak, peak_k, "Peak"]
    # Tip
    cv_scores = cross_val_score(clf, all_features_df1_scaled, all_classes_tip, cv=10)
    tip_k = cv_scores.mean()
    clf.fit(all_features_df1_scaled, all_classes_tip)
    tip = clf.predict(test_input)[0]
    tip_array = [tip, tip_k, "Tip"]
    # Total
    cv_scores = cross_val_score(clf, all_features_df1_scaled, all_classes_total, cv=10)
    total_k = cv_scores.mean()
    clf.fit(all_features_df1_scaled, all_classes_total)
    total = clf.predict(test_input)[0]
    total_array = [total, total_k, "Total"]
    # DPH
    cv_scores = cross_val_score(clf, all_features_df2_scaled, all_classes_DPH, cv=10)
    DPH_k = cv_scores.mean()
    clf.fit(all_features_df2_scaled, all_classes_DPH)
    DPH = clf.predict(test_input)[0]
    DPH_array = [DPH, DPH_k, "DPH"]
    # TEPH
    cv_scores = cross_val_score(clf, all_features_df2_scaled, all_classes_TEPH, cv=10)
    TEPH_k = cv_scores.mean()
    clf.fit(all_features_df2_scaled, all_classes_TEPH)
    TEPH = clf.predict(test_input)[0]
    TEPH_array = [TEPH, TEPH_k, "TEPH"]
    return [base_array, peak_array, tip_array, total_array, DPH_array, TEPH_array]

estimates = get_KNN(location, start_datetime_SINE, start_datetime_COSINE, end_datetime_SINE, end_datetime_COSINE, dayOfWeek)
print(estimates)

print("Total Calculation:", estimates[3][0] * estimates[4][0] * hour_difference)
total_test_std = math.sqrt(total_variance * estimates[4][0] * hour_difference)
print("Total STD Calculation:", total_test_std)

print("TEPH Calculation:", estimates[5][0] * hour_difference)
TEPH_test_std = math.sqrt(TEPH_variance * hour_difference)
print("TEPH STD Calculation:", TEPH_test_std)

[[2.9, -0.1346326353550232, 'Base'], [0.3, -0.7809615373769722, 'Peak'], [6.294, -0.14381924065801707, 'Tip'], [9.494, -0.15185535441263334, 'Total'], [1.908, -0.1861387035216599, 'DPH'], [17.006999999999998, -0.27448851878140806, 'TEPH']]
Total Calculation: 18.114552
Total STD Calculation: 3.6520070197827446
TEPH Calculation: 17.006999999999998
TEPH STD Calculation: 4.8765598471583465


<h1>Neural Network</h1>

<h2>Base Pay</h2>

In [9]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

# Create model
def create_model():
    model = Sequential()
    model.add(Dense(5, input_dim=5, kernel_initializer="normal", activation="relu"))
    model.add(Dense(3, kernel_initializer="normal", activation="relu"))
    model.add(Dense(1, kernel_initializer="normal", activation="sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model

In [10]:
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

estimator = KerasRegressor(build_fn=create_model, epochs=100, verbose=0)

# K-Fold Cross-Validation (Base)
cv_scores = cross_val_score(estimator, all_features_scaled, all_classes_base, cv=10)
cv_scores.mean()

NameError: name 'all_features_scaled' is not defined

In [None]:
# Predict (Base)
estimator.fit(all_features_scaled, all_classes_base)
print(estimator.predict(test_input))

<h2>Peak Pay</h2>

In [None]:
# K-Fold Cross-Validation (Peak)
cv_scores = cross_val_score(estimator, all_features_scaled, all_classes_peak, cv=10)
cv_scores.mean()

In [None]:
# Predict (Peak)
estimator.fit(all_features_scaled, all_classes_peak)
print(estimator.predict(test_input))

<h2>Customer Tip</h2>

In [None]:
# K-Fold Cross-Validation (Tip)
cv_scores = cross_val_score(estimator, all_features_scaled, all_classes_tip, cv=10)
cv_scores.mean()

In [None]:
# Predict (Tip)
estimator.fit(all_features_scaled, all_classes_tip)
print(estimator.predict(test_input))

<h2>Total</h2>

In [None]:
# K-Fold Cross-Validation (Total)
cv_scores = cross_val_score(estimator, all_features_scaled, all_classes_total, cv=10)
cv_scores.mean()

In [None]:
# Predict (Total)
estimator.fit(all_features_scaled, all_classes_total)
print(estimator.predict(test_input))