In [1]:
import numpy as np
import pandas as pd
from datetime import datetime

from src.data.store_as_csv import store_as_csv
from src.data.load_data import load_data

In [2]:
from datetime import datetime
current_date = datetime.now().strftime('%Y-%m-%d')

In [None]:
store_as_csv(current_date)

In [None]:
df = load_data(current_date)

game_rules = df.get('game_rules')
#awards = df.get('awards')
map = df.get('map')

first_customers_per_personality = map.drop_duplicates(subset="personality", keep="first")

mapName = game_rules['name'].iloc[0]
budget = float(game_rules['budget'].iloc[0])
gameLengthInMonths = int(game_rules['gameLengthInMonths'].iloc[0])

rates = [0.001, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 0.5, 1, 5, 10, 100]

months = range(0, (gameLengthInMonths*10)+1, 12)

print(f"\nmapName: {mapName}\nbudget: {budget}\ngameLengthInMonths: {gameLengthInMonths}")

In [6]:
change_award = True

# step 1, grid-search on indivual-level

In [None]:
from src.gather_result.init_setup2 import collect_grid

collect_grid(rates=rates, months=months, local=True, current_date=current_date, key="personality", change_award=change_award)

# step 2, optimize each customer 
- step 1 helps me narrow down the values.
- saves each iteration

In [None]:
from src.gather_result.optimize_customer_1 import optimize_each_customer

In [None]:
print(f"estimate run-time: {round(((1245/25)*map.shape[0]) / 60, 1)/60}")

In [None]:
rates

In [None]:
current_date = datetime.now().strftime('%Y-%m-%d')
game_rules = pd.read_csv(f'data/{current_date}/game_rules.csv')

mapName = game_rules['name'].iloc[0]
budget = float(game_rules['budget'].iloc[0])
gameLengthInMonths = int(game_rules['gameLengthInMonths'].iloc[0])

customer_result = pd.DataFrame(columns=["mapName", "name", "interest_rate", "loan_duration", "award_duration", "award", "totalScore", "totalProfit", "happynessScore", "environmentalImpact"])

map_estimated_personalities = pd.read_csv('artifacts/map_estimated_personalities.csv')

def find_neighbors(rates, min_value, max_value):
    try:
        min_index = rates.index(min_value)
        max_index = rates.index(max_value)

        min_before = rates[min_index - 1] if min_index > 1 else rates[min_index] # index 0 is 99999999999

        max_after = rates[max_index + 1] if max_index + 1 < len(rates) else rates[max_index]
    except:
        min_before = rates[1]
        max_after = rates[len(rates)-1]

    return min_before, max_after


def find_month_neighbors(months, min_value, max_value):
    try:
        min_index = months.index(min_value)
        max_index = months.index(max_value)

        min_before = months[min_index - 1] if min_index > 1 else months[min_index]  

        max_after = months[max_index + 1] if max_index + 1 < len(months) else months[max_index]
    except:
        min_before = months[1]
        max_after = months[len(months)-1]

    return min_before, max_after

customer_result = pd.DataFrame()
for i, customer in map_estimated_personalities.iterrows():

    min_before, max_after = find_neighbors(rates, customer['acceptedMinInterest'], customer['acceptedMaxInterest'])
    min_months_before, max_months_after = find_month_neighbors(months, customer['min_months'], customer['max_months'])

    customer_result_iter = optimize_each_customer(mapName=mapName, 
                           customer=customer['name'], 
                           gameLengthInMonths = gameLengthInMonths, 
                           min_interest=min_before, 
                           max_interest=max_after, 
                           min_loan_duration=min_months_before, 
                           max_loan_duration=max_months_after, 
                           customer_idx=i+1, 
                           total_customers=map_estimated_personalities.shape[0],
                           n_random_starts=30, 
                           n_calls=50,
                           early_stopping_patience=10, # max 100
                           early_stopping_min_delta=10,
                           change_award = change_award)
    
    customer_result = pd.concat([customer_result, customer_result_iter], ignore_index=True)

customer_result.to_csv('artifacts/customer_result.csv', index=False)

In [None]:
customer_result = pd.read_csv('artifacts/customer_result.csv')

# step 3, find the optimal combination
- Integer Linear Programming to solve my knapsack problem
- based on grid from step 2
- first submit

In [3]:
class EarlyStoppingAfterRandomStarts:
    def __init__(self, patience, min_delta, n_random_starts):
        self.patience = patience
        self.min_delta = min_delta
        self.n_random_starts = n_random_starts
        self.best_score = None
        self.no_improvement_count = 0
        self.total_iters = 0

    def __call__(self, res):
        self.total_iters += 1
        
        # Skip early stopping check for random starts
        if self.total_iters <= self.n_random_starts:
            return False

        # After random starts, begin tracking for early stopping
        current_score = res.fun
        if self.best_score is None or current_score < self.best_score - self.min_delta:
            self.best_score = current_score
            self.no_improvement_count = 0
        else:
            self.no_improvement_count += 1

        # Stop if patience is exceeded
        if self.no_improvement_count >= self.patience:
            print(f"Early stopping at iteration {self.total_iters} due to no improvement.")
            return True
        return False

In [7]:
import json
import http.client

from skopt import gp_minimize, gbrt_minimize
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args

from pulp import LpMaximize, LpProblem, LpVariable, lpSum

map_estimated_personalities = pd.read_csv('artifacts/map_estimated_personalities.csv')
customer_result = pd.read_csv('artifacts/map_estimated_personalities.csv')

api_key = "bb244a0b-6c37-48fa-88f7-0644064e5065"
game_url = "api.considition.com"
hostname = "localhost"
port = 8080
local = False

df_df = pd.merge(customer_result, map_estimated_personalities[["name", "loan_amount", "happinessEffect"]], on='name', how="left")

if change_award:
    param_space = [
        Real(0, 1000, name="award_cost1"), 
        Real(0, 1000, name="award_cost2"),
    ]
else:    
    param_space = [
        Real(0, 1000, name="award_cost1"), 
    ]


def cost_objective(params):
    
    award_cost1 =  params[0]
    if change_award:
        award_cost2 =  params[1]

    if change_award:
        df_df['totalCost'] = df_df['loan_amount'] + ((award_cost1*int(np.floor(gameLengthInMonths/3)))/2) + ((award_cost2*int(np.floor(gameLengthInMonths/3)))/2) 
    else:
        df_df['totalCost'] = df_df['loan_amount'] + (award_cost1*int(np.floor(gameLengthInMonths/3)))

    df_all = df_df[(df_df['totalScore'] > 0) & (df_df['environmentalImpact'] > 0)][["mapName", "name", "interest_rate", "loan_duration", "totalCost", "totalScore", 'happynessScore']]


    # Sort by name, totalCost, and totalScore to ensure correct ordering before filtering
    df_all.sort_values(by=["name", "totalCost", "totalScore"], ascending=[True, False, False], inplace=True)

    # Remove rows if another row have higher score and lower Cost.
    filtered_rows = []

    for name in df_all['name'].unique():
        name_rows = df_all[df_all['name'] == name]
        
        for i, row in name_rows.iterrows():
            if not ((name_rows['totalScore'] > row['totalScore']) & (name_rows['totalCost'] < row['totalCost']) & (name_rows['totalCost'] < row['totalCost'])).any():
                filtered_rows.append(row)

    df_final_filtered = pd.DataFrame(filtered_rows)

    df_grouped = df_final_filtered.groupby('name', group_keys=False).apply(lambda x: x.loc[x['totalScore'].idxmax()])

    df_grouped = pd.concat([df_grouped, pd.DataFrame({
        "mapName": df_grouped['mapName'],
        "name": df_grouped['name'],
        "interest_rate": 0,
        "loan_duration": 0,
        "award_duration": 0,
        "award": "NoSelection",
        "totalCost": 0,
        "totalScore": 0
    })])

    # define the optimization problem to maximize totalScore without going over budget
    model = LpProblem("Maximize_TotalScore", LpMaximize)

    # create binary decision variables for each row (1 = selected, 0 = not selected)
    x = [LpVariable(f"x_{i}", cat="Binary") for i in range(len(df_grouped))]

    # objective function: maximize the total score
    model += lpSum([df_grouped['totalScore'].iloc[i] * x[i] for i in range(len(df_grouped))])

    # budget constraint: ensure the total cost does not exceed the budget
    model += lpSum([df_grouped['totalCost'].iloc[i] * x[i] for i in range(len(df_grouped))]) <= budget

    # solve the optimization problem
    model.solve()

    # retrieve the optimal solution rows
    selected_indices = [i for i in range(len(df_grouped)) if x[i].value() == 1 and df_grouped.iloc[i]["award"] != "NoSelection"]
    optimal_rows = df_grouped.iloc[selected_indices]

    proposals = [
        {
            "CustomerName": str(customer['name']),
            "YearlyInterestRate": customer['interest_rate'],
            "MonthsToPayBackLoan": customer['loan_duration']
        } for _, customer in optimal_rows.iterrows()
    ]

    iterations = [
        {
            customer['name']: {
                "Type": "Award" if ((month+1) % 3 == 0) and customer['loan_duration'] <= month else "Skip",
                "Award": "IkeaFoodCoupon" if (((month+1) % 3 == 0) & ((month+1) % 6 != 0)) and customer['loan_duration'] <= month
                                            else "IkeaDeliveryCheck" if (((month+1) % 3 == 0) & ((month+1) % 6 == 0)) and change_award  and customer['loan_duration'] <= month
                                                else "IkeaFoodCoupon" if (((month+1) % 3 == 0) & ((month+1) % 6 == 0))  and customer['loan_duration'] <= month
                                                    else "None" 
            }
            for _, customer in optimal_rows.iterrows()
        }
        for month in range(gameLengthInMonths)
    ]

    input_data = {
        "MapName": mapName,
        "Proposals": proposals,
        "Iterations": iterations
    }

    conn = http.client.HTTPConnection(hostname, port) if local else http.client.HTTPSConnection(game_url)
    headers = {
        "Content-Type": "application/json",
        "x-api-key": api_key,
    }

    conn.request("POST", "/game", json.dumps(input_data), headers)
    response = conn.getresponse()
    body = response.read().decode("utf-8")
    
    try:
        data = json.loads(body)
        if isinstance(data, dict) and "score" in data:
            score = data["score"].get("totalScore", None)

            if score is not None:
                conn.close()
                return -score 
            else:
                print(f"Error: 'totalScore' not found in score data: {data}")
        else:
            print(f"Error: Unexpected response structure: {data}")
    except json.JSONDecodeError:
        print(f"Error: Response not in JSON format: {body}")
    conn.close()
    return 1e6  # Penalize with a large score if the response is invalid or improperly formatted

In [8]:
def progress_callback(res):
    iteration = len(res.x_iters)  
    current_score = round(res.func_vals[-1], 2) 
    best_score = round(res.fun, 2) 
    print(f"Iteration {iteration}: \tCurrent Score = {current_score}, \tBest Score = {best_score}")


n_random_starts = 10
n_calls = 500

early_stopping_callback = EarlyStoppingAfterRandomStarts(
    patience=50, min_delta=1, n_random_starts=n_random_starts
)

In [9]:
result = gbrt_minimize(cost_objective, 
                        param_space, 
                        n_random_starts=n_random_starts, 
                        n_calls=n_calls, 
                        n_jobs=1,
                        callback=[early_stopping_callback, progress_callback])

KeyError: 'loan_amount'

# step 3.1, trying do optimize by trying different awards as second award and mixing with how often they should get it

In [None]:
df_df = pd.merge(customer_result, map_estimated_personalities[["name", "loan_amount", "happinessEffect"]], on='name', how="left")

param_space2 = [
    Integer(0, 10000, name="award_cost2"),
    Integer(0, 10000, name="award_cost3"),
    Integer(1, 12, name="award_duration"),
    Categorical(["IkeaDeliveryCheck", "IkeaCheck", "GiftCard", "HalfInterestRate", "NoInterestRate"], name="award") 
]

def cost_objective2(params):
    
    award_cost1 = result['x'][0]
    award_cost2 = params[0]
    award_cost3 = params[1]
    award_duration = params[2]
    award =  params[3]

    df_df['totalCost'] = df_df['loan_amount'] + ((award_cost1*int(np.floor(gameLengthInMonths/3)))/2) + ((award_cost2*int(np.floor(gameLengthInMonths/3)))/2)
    df_df['totalCost'] = np.where(
                        df_df['happinessEffect'] == 1,
                        df_df['loan_amount'] + ((award_cost1*int(np.floor(gameLengthInMonths/3)))/2) + ((award_cost3*int(np.floor(gameLengthInMonths/3)))/2),
                        df_df['loan_amount'] + ((award_cost1*int(np.floor(gameLengthInMonths/3)))/2) + ((award_cost2*int(np.floor(gameLengthInMonths/3)))/2)
                    )
    df_all = df_df[df_df['totalScore'] > 0][["mapName", "name", "interest_rate", "loan_duration", "totalCost", "totalScore", 'happynessScore', 'happinessEffect']]

    df_all.sort_values(by=["name", "totalCost", "totalScore"], ascending=[True, False, False], inplace=True)

    filtered_rows = []

    for name in df_all['name'].unique():
        name_rows = df_all[df_all['name'] == name]
        
        for i, row in name_rows.iterrows():
            if not ((name_rows['totalScore'] > row['totalScore']) & (name_rows['totalCost'] < row['totalCost'])).any():
                filtered_rows.append(row)

    df_final_filtered = pd.DataFrame(filtered_rows)

    df_grouped = df_final_filtered.groupby('name', group_keys=False).apply(lambda x: x.loc[x['totalScore'].idxmax()])

    df_grouped = pd.concat([df_grouped, pd.DataFrame({
        "mapName": df_grouped['mapName'],
        "name": df_grouped['name'],
        "interest_rate": 0,
        "loan_duration": 0,
        "award_duration": 0,
        "award": "NoSelection",
        "totalCost": 0,
        "totalScore": 0
    })])

    # define the optimization problem to maximize totalScore without exceeding budget
    model = LpProblem("Maximize_TotalScore", LpMaximize)

    # create binary decision variables for each row (1 = selected, 0 = not selected)
    x = [LpVariable(f"x_{i}", cat="Binary") for i in range(len(df_grouped))]

    # objective function: maximize the total score
    model += lpSum([df_grouped['totalScore'].iloc[i] * x[i] for i in range(len(df_grouped))])

    # budget constraint: ensure the total cost does not exceed the budget
    model += lpSum([df_grouped['totalCost'].iloc[i] * x[i] for i in range(len(df_grouped))]) <= budget

    # solve the optimization problem
    model.solve()

    # retrieve the optimal solution rows
    selected_indices = [i for i in range(len(df_grouped)) if x[i].value() == 1 and df_grouped.iloc[i]["award"] != "NoSelection"]
    optimal_rows = df_grouped.iloc[selected_indices]

    proposals = [
        {
            "CustomerName": str(customer['name']),
            "YearlyInterestRate": customer['interest_rate'],
            "MonthsToPayBackLoan": customer['loan_duration']
        } for _, customer in optimal_rows.iterrows()
    ]

    # build the iterations list using the specific parameters for each customer. 
    iterations = [
        {
            customer['name']: {
                "Type": "Award" if ((month+1) % award_duration == 0) else "Skip",
                "Award": "IkeaFoodCoupon" if (((month + 1) % award_duration == 0) and ((month + 1) % award_duration*2 != 0)) 
                                                else award if (((month + 1) % award_duration == 0) and ((month + 1) % award_duration*2 == 0) and (customer['happinessEffect'] > 1.0)) 
                                                                else "IkeaDeliveryCheck" if (((month + 1) % award_duration == 0) and ((month + 1) % award_duration*2 == 0)) 
                                                                    else "None" 
            }
            for _, customer in optimal_rows.iterrows()
        }
        for month in range(gameLengthInMonths)
    ]

    input_data = {
        "MapName": mapName,
        "Proposals": proposals,
        "Iterations": iterations
    }

    conn = http.client.HTTPConnection(hostname, port) if local else http.client.HTTPSConnection(game_url)
    headers = {
        "Content-Type": "application/json",
        "x-api-key": api_key,
    }

    conn.request("POST", "/game", json.dumps(input_data), headers)
    response = conn.getresponse()
    body = response.read().decode("utf-8")
    
    try:
        data = json.loads(body)
        if isinstance(data, dict) and "score" in data:
            score = data["score"].get("totalScore", None)

            if score is not None:
                conn.close()
                return -score  
            else:
                print(f"Error: 'totalScore' not found in score data: {data}")
        else:
            print(f"Error: Unexpected response structure: {data}")
    except json.JSONDecodeError:
        print(f"Error: Response not in JSON format: {body}")
    conn.close()
    return 1e6 

In [None]:
def progress_callback(res):
    iteration = len(res.x_iters)  
    current_score = round(res.func_vals[-1], 2)  
    best_score = round(res.fun, 2)  
    print(f"Iteration {iteration}: \tCurrent Score = {current_score}, \tBest Score = {best_score}")

n_random_starts = 10
n_calls = 500

early_stopping_callback = EarlyStoppingAfterRandomStarts(
    patience=50, min_delta=1, n_random_starts=n_random_starts
)

In [None]:
if change_award:
    result2 = gbrt_minimize(cost_objective2, 
                            param_space2, 
                            n_random_starts=n_random_starts, 
                            n_calls=n_calls, 
                            n_jobs=1,
                            callback=[early_stopping_callback, progress_callback])