In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm

In [None]:
df = pd.read_excel('/deal_or_no_deal/data/Deal or No Deal Data.xlsx')

In [None]:
df.head()

In [None]:
from deal_or_no_deal.preprocess import preprocess_historical_case_data

In [None]:
case_data_df = preprocess_historical_case_data(df, keep_contestant_id=True)
case_data_df = case_data_df.sort_values(by=['contestant_id', 'round'])
contestants_to_keep = case_data_df[case_data_df['round'] == 1]['contestant_id'].unique()
case_data_df = case_data_df[case_data_df['contestant_id'].isin(contestants_to_keep)]

case_data_df

## `DQNAgent` 

In [None]:
from deal_or_no_deal.dqn import DQNAgent

In [None]:
agent = DQNAgent(28, 2)

In [None]:
agent.load('/deal_or_no_deal/data/dqn_v3_4000.h5')

In [None]:
deal_outcomes_list = list()
no_deal_outcomes_list = list()


for player in tqdm(case_data_df['contestant_id'].unique()):
    player_case_data_df = case_data_df[case_data_df['contestant_id'] == player]
    winnings = df[df['Contestant ID'] == player]['Winnings'].values[0]
    case_amount = df[df['Contestant ID'] == player]['Case Amount'].values[0]
    
    if not winnings or winnings == 'None':
        continue
    
    for round_num, row in player_case_data_df.iterrows():
        game_state = row[['case_0.01',
                          'case_1.0',
                          'case_5.0',
                          'case_10.0',
                          'case_25.0',
                          'case_50.0',
                          'case_75.0',
                          'case_100.0',
                          'case_200.0',
                          'case_300.0',
                          'case_400.0',
                          'case_500.0',
                          'case_750.0',
                          'case_1000.0',
                          'case_5000.0',
                          'case_10000.0',
                          'case_25000.0',
                          'case_50000.0',
                          'case_75000.0',
                          'case_100000.0',
                          'case_200000.0',
                          'case_300000.0',
                          'case_400000.0',
                          'case_500000.0',
                          'case_750000.0',
                          'case_1000000.0']].values.tolist()
        offer = [row['offer'] / 500000]
        round_number = [row['round'] / 10]
        model_input = np.array([game_state + offer + round_number])
        
        # make model prediction now
        model_prediction = agent.model.predict(model_input)[0]
        # next_action = 0 if model_prediction[0] > model_prediction[1] else 1
        
        model_difference = model_prediction[0] - model_prediction[1]
        initial_value = 1 if row['offer'] > 134477.52 else 0
        next_action = 0 if (initial_value - model_difference) > 1.63 else 1
        
        if next_action == 0:
            did_model_do_better = row['offer'] >= winnings
            deal_outcomes_list.append(did_model_do_better)
            break
        
        if row['round'] == player_case_data_df['round'].max():
            did_model_do_better = case_amount >= winnings
            no_deal_outcomes_list.append(did_model_do_better)
            break

In [None]:
pd.Series(deal_outcomes_list).value_counts()

In [None]:
pd.Series(no_deal_outcomes_list).value_counts()

## Brute Force 

In [None]:
from deal_or_no_deal.fast_play import Deal_or_No_Deal_Fast_Play

In [None]:
fast_player = Deal_or_No_Deal_Fast_Play(
    banker_model_filename='/deal_or_no_deal/data/banker_model_0908.pkl',
)

In [None]:
deal_outcomes_list = list()
no_deal_outcomes_list = list()


for player in tqdm(case_data_df['contestant_id'].unique()):
    player_case_data_df = case_data_df[case_data_df['contestant_id'] == player]
    winnings = df[df['Contestant ID'] == player]['Winnings'].values[0]
    case_amount = df[df['Contestant ID'] == player]['Case Amount'].values[0]
    
    if not winnings or winnings == 'None':
        continue
    
    for round_num, row in player_case_data_df.iterrows():
        game_state = row[['case_0.01',
                          'case_1.0',
                          'case_5.0',
                          'case_10.0',
                          'case_25.0',
                          'case_50.0',
                          'case_75.0',
                          'case_100.0',
                          'case_200.0',
                          'case_300.0',
                          'case_400.0',
                          'case_500.0',
                          'case_750.0',
                          'case_1000.0',
                          'case_5000.0',
                          'case_10000.0',
                          'case_25000.0',
                          'case_50000.0',
                          'case_75000.0',
                          'case_100000.0',
                          'case_200000.0',
                          'case_300000.0',
                          'case_400000.0',
                          'case_500000.0',
                          'case_750000.0',
                          'case_1000000.0']].values.tolist()

        continue_probability = fast_player.generate_future_game_states(
            cases_opened=game_state,
            round_num=int(row['round']),
            offer=float(row['offer']),
            number_of_games_to_run=100,
        )
        
        # make model prediction now
        next_action = 0 if continue_probability < 0.58 else 1
        
        if next_action == 0:
            did_model_do_better = row['offer'] >= winnings
            deal_outcomes_list.append(did_model_do_better)
            break
        
        if row['round'] == player_case_data_df['round'].max():
            did_model_do_better = case_amount >= winnings
            no_deal_outcomes_list.append(did_model_do_better)
            break

In [None]:
pd.Series(deal_outcomes_list).value_counts()

In [None]:
pd.Series(no_deal_outcomes_list).value_counts()

----- 