# Multi-Armed Bandit Game
## Andrew Yeh

In [1]:
import numpy as np
import time
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import clear_output
from IPython.core.debugger import set_trace

In [2]:
def print_houses():
    # Prints out current state of houses and (sample) average reward
    for i in range(len(houses)):
        if(houses[i][1] == 1):
            print("House ",i,": ",np.round(houses[i][0],2))
    
def choose_house(i):
    # Chooses house i and adds to total rewards. 
    # Marks house as revealed
    # Prints out house award and total award
    houses[i][1] = 1
    reward = houses[i][0]
    return(reward)
    
    
def generate_houses_normal(mean = 0, std = 10, n = 1000):
    means = np.random.normal(mean, std, n)
    houses = [[i,0] for i in means]
    
    return(houses)

Factors of interest:
* How different are the rewards?

If they aren't that different, you don't need to try that much. If they are very different, you need to try more.

* How many houses are there?

There is a trying phase and an exploitation phase. At first glance, I think we want to find the point such that 1 more house tried as a percent of total houses is just less than 1 more try as a percent of tries, i.e. a given try decreases our "exploitation time" as a percent more than it increases our chance of finding the true maximum.

* Is the reward for a given house fixed or a random variable?

If it's a random variable, do we know the distribution? I'm just going to make it normally distributed for this game.

In [80]:
def play_game(tries, strategy = "manual", threshold = 0.5):
    # Print information about game:
    
    
    # Play game and print status reports:
    total_rewards = np.zeros(tries)
    for turn in range(tries):
        
        revealed_houses = [i[0]*i[1] for i in houses]
        
        if strategy == "manual":
            choice = input()
            # Clear output every try:
            clear_output()

            print("Try Number:", turn+1)
            print("Your choice:", choice)
            print("Total reward:",np.round(total_rewards[turn], 2),"\n")
            print_houses()
        else:
            choice = strategy(revealed_houses, turn, tries, threshold = threshold)
        
        if choice == "max":
            choice = revealed_houses.index(max(revealed_houses))
        if choice == "explore":
            unexplored = np.where(np.array(revealed_houses) == 0)[0]
            choice = np.random.choice(unexplored)
            
        if turn == 0:
            total_rewards[turn] = choose_house(choice)
        else:
            total_rewards[turn] = total_rewards[turn-1] + choose_house(choice)
            
        choice = int(choice)
        
        

    
    
    print("\n Game finished!")
    return(total_rewards)
    

In [81]:
def plot_results(total_rewards):
    plt.plot(range(len(total_rewards)), total_rewards)
    plt.xlabel("Tries")
    plt.ylabel("Reward")

Todo: Build out some sample strategies

In [82]:
def learn_p_then_exploit(revealed_houses, try_number, total_tries, threshold = 0.5):
    # set_trace()
    if 0 not in revealed_houses:
        return("max")
    
    if try_number/total_tries < threshold:
        return("explore")
    
    return("max")

In [84]:
def randomly_explore_p(revealed_houses, try_number, total_tries, threshold = 0.1):
    # set_trace()
    if 0 not in revealed_houses:
        return("max")
    
    if np.random.uniform(0, 1) < threshold:
        return("explore")
    
    return("max")

In [85]:
houses = generate_houses_normal(100,40,1000)
play_game(100, strategy = learn_p_then_exploit, threshold = 0.5)

House  742 :  144.4
House  497 :  89.57
House  742 :  144.4
House  473 :  185.78
House  497 :  89.57
House  742 :  144.4
House  473 :  185.78
House  497 :  89.57
House  618 :  167.25
House  742 :  144.4
House  473 :  185.78
House  497 :  89.57
House  618 :  167.25
House  742 :  144.4
House  760 :  142.57
House  473 :  185.78
House  497 :  89.57
House  618 :  167.25
House  741 :  82.48
House  742 :  144.4
House  760 :  142.57
House  473 :  185.78
House  497 :  89.57
House  552 :  75.49
House  618 :  167.25
House  741 :  82.48
House  742 :  144.4
House  760 :  142.57
House  430 :  131.54
House  473 :  185.78
House  497 :  89.57
House  552 :  75.49
House  618 :  167.25
House  741 :  82.48
House  742 :  144.4
House  760 :  142.57
House  127 :  20.68
House  430 :  131.54
House  473 :  185.78
House  497 :  89.57
House  552 :  75.49
House  618 :  167.25
House  741 :  82.48
House  742 :  144.4
House  760 :  142.57
House  127 :  20.68
House  157 :  28.72
House  430 :  131.54
House  473 :  185.7

House  282 :  112.1
House  371 :  113.48
House  381 :  98.76
House  430 :  131.54
House  465 :  98.68
House  473 :  185.78
House  497 :  89.57
House  500 :  97.53
House  506 :  128.95
House  550 :  88.17
House  552 :  75.49
House  618 :  167.25
House  632 :  96.16
House  633 :  92.17
House  658 :  84.23
House  714 :  75.35
House  741 :  82.48
House  742 :  144.4
House  760 :  142.57
House  810 :  71.88
House  828 :  153.93
House  861 :  115.51
House  864 :  61.51
House  914 :  121.83
House  925 :  129.23
House  969 :  62.04
House  997 :  26.91
House  999 :  140.49
House  11 :  155.91
House  32 :  159.76
House  90 :  105.66
House  127 :  20.68
House  137 :  128.37
House  154 :  128.94
House  157 :  28.72
House  181 :  122.87
House  244 :  98.28
House  282 :  112.1
House  371 :  113.48
House  381 :  98.76
House  430 :  131.54
House  465 :  98.68
House  473 :  185.78
House  497 :  89.57
House  500 :  97.53
House  506 :  128.95
House  550 :  88.17
House  552 :  75.49
House  618 :  167.25
H

House  361 :  178.11
House  371 :  113.48
House  381 :  98.76
House  410 :  33.46
House  423 :  99.31
House  430 :  131.54
House  443 :  97.26
House  465 :  98.68
House  466 :  -2.77
House  473 :  185.78
House  497 :  89.57
House  500 :  97.53
House  506 :  128.95
House  550 :  88.17
House  552 :  75.49
House  618 :  167.25
House  622 :  84.78
House  632 :  96.16
House  633 :  92.17
House  658 :  84.23
House  713 :  94.38
House  714 :  75.35
House  741 :  82.48
House  742 :  144.4
House  760 :  142.57
House  792 :  100.08
House  810 :  71.88
House  814 :  102.62
House  828 :  153.93
House  836 :  66.23
House  861 :  115.51
House  864 :  61.51
House  914 :  121.83
House  925 :  129.23
House  947 :  112.06
House  969 :  62.04
House  997 :  26.91
House  999 :  140.49
House  11 :  155.91
House  32 :  159.76
House  90 :  105.66
House  127 :  20.68
House  137 :  128.37
House  154 :  128.94
House  157 :  28.72
House  181 :  122.87
House  189 :  196.23
House  232 :  76.91
House  244 :  98.28
H

House  361 :  178.11
House  371 :  113.48
House  381 :  98.76
House  410 :  33.46
House  423 :  99.31
House  430 :  131.54
House  443 :  97.26
House  465 :  98.68
House  466 :  -2.77
House  473 :  185.78
House  497 :  89.57
House  500 :  97.53
House  506 :  128.95
House  550 :  88.17
House  552 :  75.49
House  618 :  167.25
House  622 :  84.78
House  632 :  96.16
House  633 :  92.17
House  658 :  84.23
House  713 :  94.38
House  714 :  75.35
House  741 :  82.48
House  742 :  144.4
House  760 :  142.57
House  792 :  100.08
House  810 :  71.88
House  814 :  102.62
House  828 :  153.93
House  836 :  66.23
House  861 :  115.51
House  864 :  61.51
House  914 :  121.83
House  925 :  129.23
House  947 :  112.06
House  969 :  62.04
House  997 :  26.91
House  999 :  140.49
House  11 :  155.91
House  32 :  159.76
House  90 :  105.66
House  127 :  20.68
House  137 :  128.37
House  154 :  128.94
House  157 :  28.72
House  181 :  122.87
House  189 :  196.23
House  232 :  76.91
House  244 :  98.28
H

House  361 :  178.11
House  371 :  113.48
House  381 :  98.76
House  410 :  33.46
House  423 :  99.31
House  430 :  131.54
House  443 :  97.26
House  465 :  98.68
House  466 :  -2.77
House  473 :  185.78
House  497 :  89.57
House  500 :  97.53
House  506 :  128.95
House  550 :  88.17
House  552 :  75.49
House  618 :  167.25
House  622 :  84.78
House  632 :  96.16
House  633 :  92.17
House  658 :  84.23
House  713 :  94.38
House  714 :  75.35
House  741 :  82.48
House  742 :  144.4
House  760 :  142.57
House  792 :  100.08
House  810 :  71.88
House  814 :  102.62
House  828 :  153.93
House  836 :  66.23
House  861 :  115.51
House  864 :  61.51
House  914 :  121.83
House  925 :  129.23
House  947 :  112.06
House  969 :  62.04
House  997 :  26.91
House  999 :  140.49
House  11 :  155.91
House  32 :  159.76
House  90 :  105.66
House  127 :  20.68
House  137 :  128.37
House  154 :  128.94
House  157 :  28.72
House  181 :  122.87
House  189 :  196.23
House  232 :  76.91
House  244 :  98.28
H

House  371 :  113.48
House  381 :  98.76
House  410 :  33.46
House  423 :  99.31
House  430 :  131.54
House  443 :  97.26
House  465 :  98.68
House  466 :  -2.77
House  473 :  185.78
House  497 :  89.57
House  500 :  97.53
House  506 :  128.95
House  550 :  88.17
House  552 :  75.49
House  618 :  167.25
House  622 :  84.78
House  632 :  96.16
House  633 :  92.17
House  658 :  84.23
House  713 :  94.38
House  714 :  75.35
House  741 :  82.48
House  742 :  144.4
House  760 :  142.57
House  792 :  100.08
House  810 :  71.88
House  814 :  102.62
House  828 :  153.93
House  836 :  66.23
House  861 :  115.51
House  864 :  61.51
House  914 :  121.83
House  925 :  129.23
House  947 :  112.06
House  969 :  62.04
House  997 :  26.91
House  999 :  140.49
House  11 :  155.91
House  32 :  159.76
House  90 :  105.66
House  127 :  20.68
House  137 :  128.37
House  154 :  128.94
House  157 :  28.72
House  181 :  122.87
House  189 :  196.23
House  232 :  76.91
House  244 :  98.28
House  282 :  112.1
Ho

array([  144.39808861,   233.97096632,   419.74807705,   586.99684788,
         729.57000129,   812.04701165,   887.5368391 ,  1019.08019353,
        1039.75734256,  1068.47943953,  1197.41893088,  1319.24498161,
        1411.41227845,  1472.91933789,  1628.82512962,  1757.770979  ,
        1887.00103951,  1975.16700041,  2103.53963743,  2257.47439478,
        2319.51167147,  2432.98693431,  2538.64714753,  2636.18035417,
        2751.69445911,  2778.60200482,  2853.954439  ,  2950.10977591,
        3072.98402372,  3171.66829092,  3331.43043818,  3403.31203227,
        3543.79969393,  3655.89817938,  3754.65749517,  3838.89197664,
        3937.1669876 ,  4037.25094578,  4149.30952162,  4251.92639289,
        4285.38415799,  4362.29812333,  4459.56105469,  4544.33741168,
        4722.44267389,  4821.75085952,  4818.98093878,  4885.20734032,
        5081.44159772,  5175.81732808,  5372.05158548,  5568.28584288,
        5764.52010029,  5960.75435769,  6156.98861509,  6353.22287249,
      