In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from datetime import datetime

# loading dataset
df_dev = pd.read_csv("../datasets/NYC_Airbnb/development.csv")
df_eval = pd.read_csv("../datasets/NYC_Airbnb/evaluation.csv")
df_dev.drop(columns=["name", "host_name", "neighbourhood"], inplace=True)
df_eval.drop(columns=["name", "host_name", "neighbourhood"], inplace=True)


In [2]:
# fixing NaN values
df_dev.fillna(
    value = {
        "last_review": "2008-08-11",
        "reviews_per_month": 0
    },
    inplace=True
)
df_eval.fillna(
    value = {
        "last_review": "2008-08-11",
        "reviews_per_month": 0
    },
    inplace=True
)


In [3]:
# converting last_review feature to a numerical value representing days from the newest review
newest_review = datetime.today()
df_dev["last_review"] = pd.to_datetime(df_dev["last_review"]).apply(lambda x: abs((newest_review - x).days))
df_eval["last_review"] = pd.to_datetime(df_eval["last_review"]).apply(lambda x: abs((newest_review - x).days))

In [4]:
# one-hot encoding of categorical features
df_dev = pd.get_dummies(df_dev, columns=["room_type", "neighbourhood_group"], prefix=["room_type=", "neig_group="])
df_eval = pd.get_dummies(df_eval, columns=["room_type", "neighbourhood_group"], prefix=["room_type=", "neig_group="])

In [5]:
# getting dataset as numpy arrays
X_dev = df_dev.drop(columns=["id", "price"], inplace=False).values
y_dev = df_dev["price"].values

In [6]:
model = RandomForestRegressor(n_estimators=500, max_features="sqrt", max_depth=30, n_jobs=16)
model.fit(X_dev, y_dev)

RandomForestRegressor(max_depth=30, max_features='sqrt', n_estimators=500,
                      n_jobs=16)

In [7]:
ids = df_eval["id"].values
X_eval = df_eval.drop(columns=["id"], inplace=False).values
y_pred = model.predict(X_eval)

In [9]:
if len(ids) != len(y_pred):
    print("ERROR")
with open("results.csv", "w") as f:
    f.write("Id,Predicted\n")
    for i in range(len(y_pred)):
        f.write(str(ids[i]) + "," + str(y_pred[i]) + "\n")