In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
%matplotlib inline
import glob
import pickle
import time
from sklearn.metrics import roc_auc_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, RandomForestClassifier
import math
from sklearn.base import clone
!pip install scikit-optimize
from skopt import gp_minimize

In [None]:
X_test = pd.read_csv("X_test.csv")
y_test = pd.read_csv("y_test.csv")

In [None]:
X_test.head(5)

In [None]:
y_test.head(5)

In [None]:
X_test["beds"].unique()

In [None]:
import decimal

def float_range(start, stop, step):
    start = decimal.Decimal(start)
    stop = decimal.Decimal(stop)
    step = decimal.Decimal(step)
    while (start < stop):
        yield float(start)
        start += step

In [None]:
thresholds = list(float_range('0.025', '1', '0.025'))
revenue_by_threshold= pd.DataFrame({"revenue": 0}, index=thresholds)
revenue_by_threshold

In [None]:
def calc_revenue_at_tuned_price(row, threshold, clf):
    current_price = row["price_calendar"]
    print("price_calendar: " + str(current_price))
    df = pd.DataFrame(data=row.to_dict(), index=[0])
    prob_booked = clf.predict_proba(df)[0][0]
#     print ("probability of booked at price_calendar: " + str(prob_booked))
    if (prob_booked < threshold):
#         print ("Will try with reduced the price")
        trial_price = current_price
        saved_trial_price = None
        steps_in_trap = 0
        while (prob_booked < threshold and trial_price > 10):
            saved_trial_price = trial_price
            trial_price = trial_price * 0.95
            row["price_calendar"] = trial_price
            df = pd.DataFrame(data=row.to_dict(), index=[0])
            prob_booked_trial = clf.predict_proba(df)[0][0]
#             print ("trial_price: " + str(trial_price))
#             print ("prob_booked_trial: " + str(prob_booked_trial))

            if prob_booked_trial >= prob_booked:
                if prob_booked_trial == prob_booked:
                    steps_in_trap += 1
                    if steps_in_trap > 3:
                        return saved_trial_price * prob_booked
                else:
                    steps_in_trap = 0
                prob_booked = prob_booked_trial
            else:
                return saved_trial_price * prob_booked
        return trial_price * prob_booked
    else:
#         print ("Will try with increased the price")
        trial_price = current_price
        saved_trial_price = None
        steps_in_trap = 0
        while (True):
            saved_trial_price = trial_price
            trial_price = trial_price * 1.05
            row["price_calendar"] = trial_price
            df = pd.DataFrame(data=row.to_dict(), index=[0])
            prob_booked_trial = clf.predict_proba(df)[0][0]
#             print ("trial_price: " + str(trial_price))
#             print ("prob_booked_trial: " + str(prob_booked_trial))
            if prob_booked_trial <= prob_booked:
                if prob_booked_trial == prob_booked:
                    steps_in_trap += 1
                    if steps_in_trap > 3:
                        return saved_trial_price * prob_booked
                else:
                    steps_in_trap = 0
                prob_booked = prob_booked_trial
                if (prob_booked > threshold):
                    pass
                else:
                    return saved_trial_price * prob_booked    
            else:
                return saved_trial_price * prob_booked
        

In [None]:
clf = pickle.load(open("gb_best.pkl", 'rb'))

In [None]:
clf.classes_

In [None]:
highest_revenue = -math.inf
threshold_at_highest_revenue = None
for threshold in thresholds:
#     print("====================================================\n")    
#     print ("Evaluating revenue at threshold: " + str(threshold))
#     print("====================================================\n")
    revenue = 0
    for index, row in X_test.sample(n=10000, random_state=42).copy().iterrows():
        revenue_at_tuned_price = calc_revenue_at_tuned_price(row, threshold, clf)
#         print ("threshold: " + str(threshold))
#         print ("revenue at tuned price: " + str(revenue_at_tuned_price))
        revenue += revenue_at_tuned_price
#         print ("------------------------------------------------------------")
    if revenue > highest_revenue:
        highest_revenue = revenue
        threshold_at_highest_revenue = threshold
    revenue_by_threshold.loc[threshold, "revenue"] = revenue

In [None]:
revenue_by_threshold

In [None]:
highest_revenue

In [None]:
threshold_at_highest_revenue

In [None]:
import matplotlib.pyplot as plt
plt.plot(revenue_by_threshold.index, revenue_by_threshold)
plt.ylabel('Revenue projection')
plt.xlabel('Threshold of probability for property to be booked')
plt.show()