<h1>Prepare Data</h1>
<p>Attributes:</p>
<ul>
    <li>Platform: Uber=0, Lyft=1, HopSkipDrive=2 (nominal)</li>
    <li>Location: population density / people per square mile (integer)</li>
    <li>Datetime: YYYY-MM-DDTHH:MMZ (ordinal)</li>
    <li>Bots: surge pricing (float)</li>
    <li>Earnings: earnings (float)</li>
</ul>

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score

# Import file
import_file = "rideshare.csv"
columns = ["Platform", "Location", "Datetime", "Bots", "Earnings"]
df = pd.read_csv(import_file, names=columns)

# Change to numerical data
platforms = {"Uber": 0, "Lyft": 1, "HopSkipDrive": 2}
def get_population_density(location):
    # Calculate population density of location and return result
    return location
df["Platform"] = df["Platform"].map(platforms)
df["Location"] = df["Location"].map(get_population_density)
df["Datetime"] = pd.to_datetime(df["Datetime"], format="%Y-%m-%dT%H:%MZ", errors="coerce")
df["Month"] = df["Datetime"].dt.month
df["Day"] = df["Datetime"].dt.day
df["DayOfWeek"] = df["Datetime"].dt.dayofweek
df["Hour"] = df["Datetime"].dt.hour
df.drop(columns=["Datetime"], axis=1, inplace=True)
df = df[["Platform", "Location", "Month", "Day", "DayOfWeek", "Hour", "Bots", "Earnings"]]

# Separate features and classes
feature_names = ["Platform", "Location", "Month", "Day", "DayOfWeek", "Hour", "Bots"]
all_features = df[feature_names].values
all_classes = df["Earnings"].values

# Normalize data
scaler = preprocessing.StandardScaler()
all_features_scaled = scaler.fit_transform(all_features)

# Test input
test_input = [[0, 500, 3, 3, 3, 2, 16]]

df.head()

Unnamed: 0,Platform,Location,Month,Day,DayOfWeek,Hour,Bots,Earnings
0,0,66940,1,1,2,1,18.0,15
1,1,20000,1,2,3,2,20.0,14
2,2,15000,1,3,4,3,12.0,10
3,0,40000,1,4,5,4,19.12,17
4,1,60000,1,5,6,5,20.12,19


<h1>Logistic Regression</h1>

In [2]:
from sklearn.linear_model import LogisticRegression

# K-Fold Cross-Validation
clf = LogisticRegression()
cv_scores = cross_val_score(clf, all_features_scaled, all_classes, cv=10)
cv_scores.mean()

1.0

In [3]:
# Predict
clf.fit(all_features_scaled, all_classes)
clf.predict(test_input)

array([19])

<h1>Naïve Bayes</h1>

In [4]:
from sklearn.naive_bayes import MultinomialNB

# Normalize data
scaler = preprocessing.MinMaxScaler()
all_features_minmax = scaler.fit_transform(all_features)

# K-Fold Cross-Validation
clf = MultinomialNB()
cv_scores = cross_val_score(clf, all_features_minmax, all_classes, cv=10)
cv_scores.mean()

0.9

In [5]:
# Predict
clf.fit(all_features_minmax, all_classes)
clf.predict(test_input)

array([15])

<h1>Decision Trees</h1>

In [6]:
from sklearn.tree import DecisionTreeClassifier

# K-Fold Cross-Validation
clf = DecisionTreeClassifier(random_state=1)
cv_scores = cross_val_score(clf, all_features_scaled, all_classes, cv=10)
cv_scores.mean()

1.0

In [7]:
# Predict
clf.fit(all_features_scaled, all_classes)
clf.predict(test_input)

array([19])

<h1>Random Forest</h1>

In [8]:
from sklearn.ensemble import RandomForestClassifier

# K-Fold Cross-Validation
clf = RandomForestClassifier(n_estimators=10, random_state=1)
cv_scores = cross_val_score(clf, all_features_scaled, all_classes, cv=10)
cv_scores.mean()

1.0

In [9]:
# Predict
clf.fit(all_features_scaled, all_classes)
clf.predict(test_input)

array([19])

<h1>SVM</h1>

In [10]:
from sklearn import svm

# Hyperparameters
C = 1.0

# K-Fold Cross-Validation
svc = svm.SVC(kernel="linear", C=C)
cv_scores = cross_val_score(svc, all_features_scaled, all_classes, cv=10)
cv_scores.mean()

1.0

In [11]:
# Predict
clf.fit(all_features_scaled, all_classes)
clf.predict(test_input)

array([19])

In [12]:
# K-Fold Cross-Validation
svc = svm.SVC(kernel="rbf", C=C)
cv_scores = cross_val_score(svc, all_features_scaled, all_classes, cv=10)
cv_scores.mean()

1.0

In [13]:
# Predict
clf.fit(all_features_scaled, all_classes)
clf.predict(test_input)

array([19])

In [14]:
# K-Fold Cross-Validation
svc = svm.SVC(kernel="sigmoid", C=C)
cv_scores = cross_val_score(svc, all_features_scaled, all_classes, cv=10)
cv_scores.mean()

1.0

In [15]:
# Predict
clf.fit(all_features_scaled, all_classes)
clf.predict(test_input)

array([19])

In [16]:
# K-Fold Cross-Validation
svc = svm.SVC(kernel="poly", C=C)
cv_scores = cross_val_score(svc, all_features_scaled, all_classes, cv=10)
cv_scores.mean()

1.0

In [17]:
# Predict
clf.fit(all_features_scaled, all_classes)
clf.predict(test_input)

array([19])

<h1>K Nearest Neighbors</h1>

In [18]:
from sklearn import neighbors

# K-Fold Cross-Validation
clf = neighbors.KNeighborsClassifier(n_neighbors=10)
cv_scores = cross_val_score(clf, all_features_scaled, all_classes, cv=10)
cv_scores.mean()

1.0

In [19]:
# Find optimal value for neighbors
for n in range(1, 50):
    clf = neighbors.KNeighborsClassifier(n_neighbors=n)
    cv_scores = cross_val_score(clf, all_features_scaled, all_classes, cv=10)
    print(n, cv_scores.mean())

1 1.0
2 1.0
3 1.0
4 1.0
5 1.0
6 1.0
7 1.0
8 1.0
9 1.0
10 1.0
11 1.0
12 1.0
13 1.0
14 1.0
15 1.0
16 1.0
17 1.0
18 0.5
19 0.5
20 0.5
21 0.5
22 0.5
23 0.5
24 0.5
25 0.5
26 0.5
27 0.2
28 0.2
29 0.2
30 0.2
31 0.2
32 0.2
33 0.2
34 0.2
35 0.2
36 0.2
37 0.29999999999999993
38 0.29999999999999993
39 0.29999999999999993
40 0.29999999999999993
41 0.29999999999999993
42 0.29999999999999993
43 0.29999999999999993
44 0.29999999999999993
45 0.29999999999999993
46 0.29999999999999993
47 0.29999999999999993
48 0.29999999999999993
49 0.29999999999999993


In [20]:
# Predict
clf.fit(all_features_scaled, all_classes)
clf.predict(test_input)

array([19])

<h1>Neural Network</h1>

In [21]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

# Create model
def create_model():
    model = Sequential()
    model.add(Dense(10, input_dim=7, kernel_initializer="normal", activation="relu"))
    model.add(Dense(5, kernel_initializer="normal", activation="relu"))
    model.add(Dense(1, kernel_initializer="normal", activation="sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model

In [22]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# K-Fold Cross-Validation
estimator = KerasClassifier(build_fn=create_model, epochs=100, verbose=0)
cv_scores = cross_val_score(estimator, all_features_scaled, all_classes, cv=10)
cv_scores.mean()

0.15000000223517418

In [23]:
# Predict
clf.fit(all_features_scaled, all_classes)
clf.predict(test_input)

array([19])