# Simulation: Roulette

**Project Objective:** understand win loss distributions, particularly frequency of streaks above certain k-size, in game of chance with fixed independent probabilities 

**Subject:** double zero roulette

**How:** use two methods: empirical simulation and binomial distribution

In [3]:
import pandas as pd
import numpy as np
from scipy.stats import binom, geom
import random
import seaborn as sns
import matplotlib.pyplot as plt

import itertools

% matplotlib inline

import pprint as pp

## Simulate Roulette

In [2]:
def simulateroulette(n:int):

    p_w = 12 / 38
    p_l = 1 - p_w

    outcome = 0
    outcome_loss = 0
    
    results = []

    random.seed(123)

    for i in np.arange(1,n+1):
        num = random.uniform(0,1)
        if num <= p_w:
            outcome = 1
            outcome_loss = 0
            dozen = 1
        elif num <= (2 * p_w):
            outcome = 0
            outcome_loss = 1
            dozen = 2
        elif num <= (3 * p_w):
            outcome = 0
            outcome_loss = 1
            dozen = 3
        else:
            outcome = 0
            outcome_loss = 1
            dozen = 99

        result = {'spin': i, 'number': num, 'dozen': dozen, 'outcome': outcome, 'outcome_loss': outcome_loss}
        results.append(result)
    
    df_results = pd.DataFrame(results, columns=['spin','number','dozen', 'outcome', 'outcome_loss'])
    df_results.set_index('spin',inplace=True)

    return df_results

In [3]:
df = simulateroulette(1000000) #1000000000 1 Billion
df.head(10)

Unnamed: 0_level_0,number,dozen,outcome,outcome_loss
spin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0.052364,1,1,0
2,0.087187,1,1,0
3,0.407242,2,0,1
4,0.1077,1,1,0
5,0.901199,3,0,1
6,0.038154,1,1,0
7,0.536202,2,0,1
8,0.332198,2,0,1
9,0.852087,3,0,1
10,0.159662,1,1,0


In [4]:
df.describe()

Unnamed: 0,number,dozen,outcome,outcome_loss
count,1000000.0,1000000.0,1000000.0,1000000.0
mean,0.500089,7.093983,0.315443,0.684557
std,0.288705,21.650415,0.464692,0.464692
min,1e-06,1.0,0.0,0.0
25%,0.250368,1.0,0.0,0.0
50%,0.499826,2.0,0.0,1.0
75%,0.750382,3.0,1.0,1.0
max,0.999999,99.0,1.0,1.0


In [5]:
df_dozen = df['dozen'].value_counts().to_frame()
df_dozen.reset_index(inplace=True)
df_dozen.rename({'index': 'dozen', 'dozen': 'value_counts'}, inplace=True)
df_dozen['occurrance'] = df_dozen['dozen'].apply(lambda x: x / df_dozen['dozen'].sum())
df_dozen

Unnamed: 0,index,dozen,occurrance
0,3,316053,0.316053
1,2,315995,0.315995
2,1,315443,0.315443
3,99,52509,0.052509


## Count streaks

In [6]:
def runs_of_ones_list(bits):
    return([sum(g) for b, g in itertools.groupby(bits) if b])

In [7]:
wins = np.array(runs_of_ones_list(list(df['outcome'])))

In [8]:
sum(wins)

315443

In [9]:
df['outcome'].sum()

315443

In [10]:
s_wins = pd.Series(wins)
df_wins = s_wins.value_counts().to_frame()
df_wins.reset_index(inplace=True)
df_wins.rename(columns={'index': 'win_streak', 0: 'value_counts'}, inplace=True)
df_wins.sort_values('win_streak', inplace=True)
df_wins

Unnamed: 0,win_streak,value_counts
0,1,147924
1,2,46674
2,3,14795
3,4,4610
4,5,1410
5,6,485
6,7,128
7,8,39
8,9,15
9,10,2


In [11]:
losses = runs_of_ones_list(list(df['outcome_loss']))

In [12]:
sum(losses)

684557

In [13]:
df['outcome_loss'].sum()

684557

In [14]:
s_losses = pd.Series(losses)
df_losses = s_losses.value_counts().to_frame()
df_losses.reset_index(inplace=True)
df_losses.rename(columns={'index': 'loss_streak', 0: 'value_counts'}, inplace=True)
df_losses.sort_values('loss_streak', inplace=True)
df_losses

Unnamed: 0,loss_streak,value_counts
0,1,68456
1,2,46203
2,3,32125
3,4,21938
4,5,14904
5,6,10260
6,7,6948
7,8,4777
8,9,3356
9,10,2228


# Binomial Distribution 

In [4]:
p_w = 12 / 38
p_l = 1 - p_w

#x = range(n+1)
#n = 20
p = p_l

loss_streak_pmf = []

for i in range(1,51):
    loss_streak_pmf.append([i, binom.pmf(i,i,p)])

In [16]:
df_pmf = pd.DataFrame(loss_streak_pmf)
df_pmf.columns = ['loss_streak', 'pmf']
df_pmf.head(10)

Unnamed: 0,loss_streak,pmf
0,1,0.684211
1,2,0.468144
2,3,0.320309
3,4,0.219159
4,5,0.149951
5,6,0.102598
6,7,0.070199
7,8,0.048031
8,9,0.032863
9,10,0.022485


# Combine Simulation with Binomial Distribution

In [19]:
df = df_pmf.merge(df_losses, on='loss_streak', how='left')
df.set_index('loss_streak', inplace=True)

In [20]:
df.head(15) #geometric distribution

Unnamed: 0_level_0,pmf,value_counts
loss_streak,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.684211,68456.0
2,0.468144,46203.0
3,0.320309,32125.0
4,0.219159,21938.0
5,0.149951,14904.0
6,0.102598,10260.0
7,0.070199,6948.0
8,0.048031,4777.0
9,0.032863,3356.0
10,0.022485,2228.0


In [2]:
6948.0 / 1000000

0.006948

In [58]:
#binom.pmf(i,i,p)
geometric = []
for i in range(1,100):
    geometric.append([i, geom.pmf(i, p_w)])

In [59]:
df_geometric = pd.DataFrame(geometric)
df_geometric.columns = ['Spin', 'Geometric_pmf']
df_geometric.set_index('Spin', inplace=True)

In [60]:
df_geometric.head(10)

Unnamed: 0_level_0,Geometric_pmf
Spin,Unnamed: 1_level_1
1,0.315789
2,0.216066
3,0.147835
4,0.10115
5,0.069208
6,0.047353
7,0.032399
8,0.022168
9,0.015168
10,0.010378
