In [47]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import tensorflow as tf

In [48]:
# Read in cleaned and encoded DataFrame from Google Colab and PySpark
model_df = pd.read_csv('model_df.csv')
model_df = model_df.drop('Unnamed: 0', axis=1)
model_df.dtypes

price                        float64
room_type_Entire home/apt      int64
room_type_Hotel room           int64
room_type_Private room         int64
room_type_Shared room          int64
accommodates                   int64
bedrooms                       int64
beds                           int64
bathrooms                    float64
bathroom_type_private          int64
bathroom_type_shared           int64
neighbourhood_Brooklyn         int64
neighbourhood_Manhattan        int64
neighbourhood_Queens           int64
wifi                           int64
smoke_alarm                    int64
carbon_monoxide_alarm          int64
kitchen                        int64
air_conditioning               int64
tv                             int64
iron                           int64
essentials                     int64
hangers                        int64
shampoo                        int64
refrigerator                   int64
hair_dryer                     int64
dishes_and_silverware          int64
h

# Logistic Regression

In [49]:
y = model_df["price"]
X = model_df.drop(columns="price")

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [50]:
regr = linear_model.LinearRegression()
regr.fit(X_train, y_train)

In [60]:
y_pred_regr = regr.predict(X_test)

# Calculate Mean Squared Error (MSE)
mse_regr = mean_squared_error(y_test, y_pred_regr)

# Calculate Root Mean Squared Error (RMSE)
rmse_regr = np.sqrt(mse_regr)

# Calculate R-squared for the deep learning model
r2_regr = r2_score(y_test, y_pred_regr)

print("Linear Regression Model - Root Mean Squared Error (RMSE):", rmse_regr)
print("Linear Regression Model - R-squared:", r2_regr)

Linear Regression Model - Root Mean Squared Error (RMSE): 101.27787596535208
Linear Regression Model - R-squared: 0.4485418030023708


In [52]:
# Score the model
print(f"Training Data Score: {regr.score(X_train, y_train)}")
print(f"Testing Data Score: {regr.score(X_test, y_test)}")

Training Data Score: 0.437809110582532
Testing Data Score: 0.4485418030023708


# Deep Learning

In [53]:
nn = tf.keras.models.Sequential()
nn.add(tf.keras.layers.Dense(units=38, activation="relu", input_dim=38))
nn.add(tf.keras.layers.Dense(units=20, activation="relu"))
nn.add(tf.keras.layers.Dense(units=20, activation="relu"))
nn.add(tf.keras.layers.Dense(units=20, activation="relu"))
nn.add(tf.keras.layers.Dense(units=1))

In [54]:
nn.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_absolute_error'])

In [55]:
fit_model = nn.fit(X_train, y_train, epochs=100, validation_split=0.25)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [56]:
model_loss, model_accuracy = nn.evaluate(X_test, y_test)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Loss: 8489.4794921875, Accuracy: 58.82362747192383


In [57]:
# Calculate predictions using the deep learning model
y_pred_nn = nn.predict(X_test)

# Calculate Mean Squared Error (MSE) for the deep learning model
mse_nn = mean_squared_error(y_test, y_pred_nn)

# Calculate Root Mean Squared Error (RMSE) for the deep learning model
rmse_nn = np.sqrt(mse_nn)

# Calculate R-squared for the deep learning model
r2_nn = r2_score(y_test, y_pred_nn)

print("Deep Learning Model - Root Mean Squared Error (RMSE):", rmse_nn)
print("Deep Learning Model - R-squared:", r2_nn)

Deep Learning Model - Root Mean Squared Error (RMSE): 92.13838153240141
Deep Learning Model - R-squared: 0.5435800847128894


In [58]:
# Export to pickle file
import pickle

with open('EL_machine_learning_model.pkl', 'wb') as model_file:
    pickle.dump(nn, model_file)

In [59]:
# Export to joblib file
import joblib

joblib.dump(nn, 'EL_machine_learning_model.joblib')

['EL_machine_learning_model.joblib']