# Similarities and Differences of Top vs. Bottom 20% (Pt.1 - Age, Gender, Won/Lost Amount)

In [1]:
# Define libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import os
import plotting_fn as pf
import counting_fns as cf

month_file = '3_July'
cut_off = 10000
# Set working directory
os.chdir("/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/"+month_file)


In [2]:
# Read in data
df_bottom_20 = pd.read_parquet("Bottom_20_gambles.parquet")
df_top_20 = pd.read_parquet("Top_20_gambles.parquet")

# Eliminate players who maximum number of gambles is 1
df_bottom_20 = df_bottom_20.groupby('playerkey').filter(lambda x: x['gambles'].nunique() > 1)
df_top_20 = df_top_20.groupby('playerkey').filter(lambda x: x['gambles'].nunique() > 1)

# Reset index
df_bottom_20.index = np.arange(1, len(df_bottom_20) + 1)
df_top_20.index = np.arange(1, len(df_top_20) + 1)

# Round wageredamt and profit to 2 decimal places
df_bottom_20['wageredamt'] = df_bottom_20['wageredamt'].round(1)
df_bottom_20['profit'] = df_bottom_20['profit'].round(1)
df_bottom_20['percent_return'] = df_bottom_20['percent_return'].round(1)
df_top_20['wageredamt'] = df_top_20['wageredamt'].round(1)
df_top_20['profit'] = df_top_20['profit'].round(1)
df_top_20['percent_return'] = df_top_20['percent_return'].round(1)

In [3]:
# Create a new column 'result_type' that is a categorical variable which takes the value 'loss' if the change is negative and 'gain' if the change is positive, and 'draw' of change is 0
df_bottom_20['result_type'] = df_bottom_20['percent_return'].apply(lambda x: 'loss' if x == -100 else 'near-hit' if x < 0 else 'gain' if x > 0 else 'draw')

# Create dummy variables from 'result_type'
dummy_variables = pd.get_dummies(df_bottom_20['result_type']).rename(columns=lambda x: '#' + str(x[0].capitalize()))

# Add the dummy variables to the original DataFrame
df_bottom_20 = pd.concat([df_bottom_20, dummy_variables], axis=1).reset_index(drop=True)

# Create a new column 'result_type' that is a categorical variable which takes the value 'loss' if the change is negative and 'gain' if the change is positive, and 'draw' of change is 0
df_top_20['result_type'] = df_top_20['percent_return'].apply(lambda x: 'loss' if x == -100 else 'near-hit' if x < 0 else 'gain' if x > 0 else 'draw')

# Create dummy variables from 'result_type'
dummy_variables = pd.get_dummies(df_top_20['result_type']).rename(columns=lambda x: '#' + str(x[0].capitalize()))

# Add the dummy variables to the original DataFrame
df_top_20 = pd.concat([df_top_20, dummy_variables], axis=1).reset_index(drop=True)

# Convert starttime to delte format for operations
df_bottom_20['start_time'] = pd.to_datetime(df_bottom_20['start_time'])

df_top_20['start_time'] = pd.to_datetime(df_top_20['start_time'])

# Create new column called 'time_diff' which is the difference between the start time of the gamble and the start time of the previous gamble
df_bottom_20['time_diff'] = df_bottom_20.groupby(['playerkey', 'session_time'])['start_time'].diff()
df_top_20['time_diff'] = df_top_20.groupby(['playerkey', 'session_time'])['start_time'].diff()

# Convert time_diff to seconds
df_bottom_20['time_diff'] = df_bottom_20['time_diff'].dt.total_seconds().fillna(0)
df_top_20['time_diff'] = df_top_20['time_diff'].dt.total_seconds().fillna(0)

### Filter visit 1

In [4]:
# Filter data frame by visit == 1
df_bottom_20 = df_bottom_20[df_bottom_20['visit'] == 1]
df_top_20 = df_top_20[df_top_20['visit'] == 1]

In [5]:
# Ude consecutive functions to find 2ws, 3ws, 4ws in a row in each dataset per visit per player
df_bottom_20 = pf.consecutive_wins(df_bottom_20, 'visit')
df_top_20 = pf.consecutive_wins(df_top_20, 'visit')


In [6]:
# Ude consecutive functions to find 2ws, 3ws, 4ws in a row in each dataset per session_time per player
df_bottom_20_s = pf.consecutive_wins(df_bottom_20, 'session_time')
df_top_20_s = pf.consecutive_wins(df_top_20, 'session_time')

In [7]:
# Create age ranges 
bins = [0, 24, 40, 55, 75, 150]
labels = ['18-24', '25-40', '41-55', '56-75', '76+']
generations = ['Gen Z', 'Millenials', 'Gen X', 'Baby Boomers', 'Silent']

# Use cut function to create age ranges for bottom 20% and top 20%
df_bottom_20['age_range'] = pd.cut(df_bottom_20['age'], bins=bins, labels=labels, right=False)
df_bottom_20['age_gen'] = pd.cut(df_bottom_20['age'], bins=bins, labels=generations, right=False)

df_top_20['age_range'] = pd.cut(df_top_20['age'], bins=bins, labels=labels, right=False)
df_top_20['age_gen'] = pd.cut(df_top_20['age'], bins=bins, labels=generations, right=False)

In [8]:
# Lets cound the number of times a player increase slot denominations
players_increase_slot_t20 = cf.count_increase(df_top_20, "increase_slotdeno", "playerkey", "slotdenomination")
players_decrease_slot_t20 = cf.count_decrease(df_top_20, "decrease_slotdeno", "playerkey", "slotdenomination")
players_increase_maxbet_t20 = cf.count_increase(df_top_20, "increase_maxbet", "playerkey", "maxbet")
players_decrease_maxbet_t20 = cf.count_decrease(df_top_20, "decrease_maxbet", "playerkey", "maxbet")

Count of players who increase_slotdeno : 1523
Count of times each player increase_slotdeno : {4: 2, 33: 66, 93: 1, 94: 1, 95: 2, 120: 1, 159: 2, 202: 1, 220: 1, 224: 2, 234: 1, 263: 1, 287: 4, 332: 2, 351: 1, 434: 2, 435: 4, 460: 114, 461: 1, 464: 1, 481: 1, 518: 1, 554: 1, 599: 3, 603: 3, 606: 2, 612: 1, 614: 2, 624: 1, 627: 7, 641: 1, 646: 2, 652: 3, 653: 2, 655: 1, 656: 1, 661: 5, 668: 1, 669: 1, 680: 1, 682: 1, 683: 3, 708: 9, 718: 18, 719: 244, 738: 1, 765: 1, 788: 199, 790: 1, 791: 1, 795: 2, 841: 1, 842: 1, 848: 2, 859: 1, 861: 4, 870: 3, 886: 23, 895: 129, 901: 16, 903: 1, 931: 2, 942: 1, 956: 1, 957: 1, 979: 1, 996: 8, 997: 2, 1013: 4, 1016: 1, 1017: 147, 1025: 3, 1035: 1, 1046: 2, 1054: 1, 1072: 5, 1090: 2, 1122: 1, 1123: 4, 1141: 1, 1161: 1, 1170: 1, 1199: 1, 1228: 1, 1229: 1, 1233: 7, 1243: 1, 1259: 2, 1269: 2, 1275: 2, 1286: 18, 1287: 3, 1302: 1, 1318: 9, 1321: 5, 1325: 3, 1331: 3, 1333: 8, 1337: 2, 1358: 13, 1366: 1, 1387: 2, 1398: 34, 1403: 109, 1427: 1, 1431: 5, 1438: 1

In [9]:
# Lets do the same but for bottom 20%
players_increase_slot_b20 = cf.count_increase(df_bottom_20, "increase_slotdeno", "playerkey", "slotdenomination")
players_decrease_slot_b20 = cf.count_decrease(df_bottom_20, "decrease_slotdeno", "playerkey", "slotdenomination")
players_increase_maxbet_b20 = cf.count_increase(df_bottom_20, "increase_maxbet", "playerkey", "maxbet")
players_decrease_maxbet_b20 = cf.count_decrease(df_bottom_20, "decrease_maxbet", "playerkey", "maxbet")

Count of players who increase_slotdeno : 215
Count of times each player increase_slotdeno : {779: 2, 789: 1, 916: 1, 1020: 1, 1085: 1, 1134: 1, 1186: 1, 1192: 1, 1255: 1, 1257: 1, 1258: 1, 1346: 1, 1386: 1, 1394: 2, 1411: 1, 1423: 1, 1635: 1, 1805: 1, 1807: 1, 1836: 2, 1841: 1, 1946: 3, 1962: 1, 1987: 1, 2010: 1, 2037: 1, 2072: 1, 2125: 2, 2136: 1, 2204: 1, 2234: 1, 2281: 1, 2415: 1, 2462: 1, 2484: 1, 2498: 1, 2627: 1, 2803: 1, 2874: 1, 2881: 1, 2979: 1, 2985: 1, 3004: 1, 3006: 2, 3086: 2, 3337: 1, 3357: 1, 3362: 1, 3417: 1, 3518: 1, 3522: 1, 3675: 1, 3755: 1, 3870: 1, 4428: 1, 4487: 2, 4541: 1, 4556: 1, 4565: 1, 4642: 1, 4678: 1, 4762: 1, 4789: 1, 4814: 1, 5133: 1, 5161: 2, 5266: 2, 5290: 1, 5317: 1, 5364: 1, 5475: 2, 5528: 1, 5532: 1, 5551: 1, 5566: 1, 5570: 1, 5595: 1, 5652: 1, 5665: 1, 5756: 1, 5838: 1, 5875: 1, 5961: 1, 6247: 1, 6395: 1, 6444: 1, 6462: 1, 6474: 2, 6475: 1, 6508: 1, 6510: 1, 6521: 1, 6849: 1, 6850: 1, 6866: 1, 6960: 1, 6997: 1, 7050: 1, 7107: 1, 7132: 1, 7256: 1, 7

In [10]:
# Lets crate a column called 'depletion_slope' which is the difference of 'playercashableamt' between the current gamble and the previous gamble
df_bottom_20['depletion_rate'] = df_bottom_20.groupby(['playerkey', 'session_time'])['playercashableamt'].diff().fillna(0)
df_top_20['depletion_rate'] = df_top_20.groupby(['playerkey', 'session_time'])['playercashableamt'].diff().fillna(0)


# Separate by time

In [11]:
# Create df_bottom_1min for 1 minute duration
df_bottom_1min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=1))

# Create df_bottom_2min for 2 minutes duration
df_bottom_2min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=2))

# Create df_bottom_3min for 3 minutes duration
df_bottom_3min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=3))

# Create df_bottom_4min for 4 minutes duration
df_bottom_4min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=4))

# Create df_bottom_5min for 5 minutes duration
df_bottom_5min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=5))

# Create df_bottom_10min for 10 minutes duration
df_bottom_10min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=10))

# Create df_bottom_15min for 15 minutes duration
df_bottom_15min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=15))

In [12]:
# Print the unique number of players in each dataset
print("Number of players in bottom 1min: ", df_bottom_1min['playerkey'].nunique())
print("Number of players in bottom 2min: ", df_bottom_2min['playerkey'].nunique())
print("Number of players in bottom 3min: ", df_bottom_3min['playerkey'].nunique())
print("Number of players in bottom 4min: ", df_bottom_4min['playerkey'].nunique())
print("Number of players in bottom 5min: ", df_bottom_5min['playerkey'].nunique())

Number of players in bottom 1min:  2868
Number of players in bottom 2min:  2868
Number of players in bottom 3min:  2868
Number of players in bottom 4min:  2868
Number of players in bottom 5min:  2868


In [13]:
# Create df_top_1min for 1 minute duration
df_top_1min = pf.filter_dataframe_by_time(df_top_20, pd.Timedelta(minutes=1))

# Create df_top_2min for 2 minutes duration
df_top_2min = pf.filter_dataframe_by_time(df_top_20, pd.Timedelta(minutes=2))

# Create df_top_3min for 3 minutes duration
df_top_3min = pf.filter_dataframe_by_time(df_top_20, pd.Timedelta(minutes=3))

# Create df_top_4min for 4 minutes duration
df_top_4min = pf.filter_dataframe_by_time(df_top_20, pd.Timedelta(minutes=4))

# Create df_bottom_5min for 5 minutes duration
df_top_5min = pf.filter_dataframe_by_time(df_top_20, pd.Timedelta(minutes=5))

# Create df_bottom_10min for 10 minutes duration
df_top_10min = pf.filter_dataframe_by_time(df_top_20, pd.Timedelta(minutes=10))

# Create df_bottom_15min for 15 minutes duration
df_top_15min = pf.filter_dataframe_by_time(df_top_20, pd.Timedelta(minutes=15))

In [14]:
# Print the unique number of players in each dataset
print("Number of players in top 1min: ", df_top_1min['playerkey'].nunique())
print("Number of players in top 2min: ", df_top_2min['playerkey'].nunique())
print("Number of players in top 3min: ", df_top_3min['playerkey'].nunique())
print("Number of players in top 4min: ", df_top_4min['playerkey'].nunique())
print("Number of players in top 5min: ", df_top_5min['playerkey'].nunique())

Number of players in top 1min:  2898
Number of players in top 2min:  2898
Number of players in top 3min:  2898
Number of players in top 4min:  2898
Number of players in top 5min:  2898


#### First 1 minute

In [15]:
# Simultaneous Play
df_b20_sim_v = pf.simultaneous_play(40000, df_bottom_1min, 'session_time')
df_t20_sim_v = pf.simultaneous_play(40000, df_top_1min, 'session_time')

# Concat the two dataframes
df_sim_v = pd.concat([df_b20_sim_v, df_t20_sim_v]).reset_index()

print(df_sim_v)
# Remove repeated instances
df_sim_no_repeat_v = df_sim_v.drop_duplicates(subset=['session_time', 'playerkey'])

print(df_sim_no_repeat_v)

     index  session_time  playerkey
0        0             1       6996
1        1             1       7932
2        2             1       9798
3        3             1      11924
4        4             1      12101
..     ...           ...        ...
103     95             1      15841
104     96             1      15930
105     97             1      16068
106     98             1      16080
107     99             1      16275

[108 rows x 3 columns]
     index  session_time  playerkey
0        0             1       6996
1        1             1       7932
2        2             1       9798
3        3             1      11924
4        4             1      12101
..     ...           ...        ...
103     95             1      15841
104     96             1      15930
105     97             1      16068
106     98             1      16080
107     99             1      16275

[108 rows x 3 columns]


In [17]:
# Create dataframe
df_all_1min = pf.merge_dfs_per_player(data_t=df_top_1min, data_b=df_bottom_1min, grouping='session_time', print_results=False)

# Print number of unique players
print("Number of unique players in df_all_1min: ", df_all_1min['playerkey'].nunique())

Number of unique players in df_all_1min:  5252


In [18]:
# Merge Sim play
df_all_1min['sim_play'] = df_all_1min[['playerkey', 'session_time']].apply(lambda x: tuple(x) in set(map(tuple, df_sim_no_repeat_v[['playerkey', 'session_time']].values)), axis=1)

print(df_all_1min.columns)

# # # Reorder Columns
desired_order = ['playerkey', 'session_time', 'rank', 'gender', 'age_range', 'age_gen',
                'beginning_amt', 'ending_amt', 'ending_balance', 'ave_slotdenom',
                'std_slotdenom', 'min_slotdenom', 'max_slotdenom', 'ave_theo_payback',
                'min_theo_payback', 'max_theo_payback', 'ave_wageramt', 'std_wageramt',
                'min_wager', 'max_wager', 'ave_p/b', 'std_p/b', 'max_p/b', 'max_profit', 'depletion_slope',
                '#inc_maxbet', '#dec_maxbet', 'first_wager', 'first_outcome', 'first_p/b', 'last_wager',
                'last_outcome', 'last_p/b', '#W', '#L', '#NH', '#D', 'w/min', 'l/min', 'nh/min', 'd/min', 
                'w/g', 'l/g', 'nh/g', 'd/g', '#2ws', '2ws_profit', '2ws_wgramt', '#3ws', 
                '3ws_profit','3ws_wgramt', '#4ws', '4ws_profit', '4ws_wgramt', '2ws/min', '3ws/min', '4ws/min',
                'ave_time_per_gamble', 'std_time_per_gamble', 'min_time_per_gamble', 'max_time_per_gamble',
                'total_duration', 'total_gambles', 'machines_changes', '#inc_slotdenom', '#dec_slotdenom',
                'unique_machines', 'ave_time_per_machine', 'sim_play','percentile']

df_all_1min = df_all_1min.reindex(columns=desired_order)

Index(['playerkey', 'session_time', 'gender', 'age_range', 'age_gen',
       'beginning_amt', 'ending_amt', 'ending_balance', 'ave_slotdenom',
       'std_slotdenom', 'min_slotdenom', 'max_slotdenom', 'ave_theo_payback',
       'min_theo_payback', 'max_theo_payback', 'ave_wageramt', 'std_wageramt',
       'min_wager', 'max_wager', 'ave_p/b', 'std_p/b', 'max_p/b', 'max_profit',
       'depletion_slope', '#inc_slotdenom', '#dec_slotdenom', '#inc_maxbet',
       '#dec_maxbet', 'first_wager', 'first_outcome', 'first_p/b',
       'last_wager', 'last_outcome', 'last_p/b', 'machines_changes', '#W',
       '#L', '#NH', '#D', 'w/g', 'l/g', 'nh/g', 'd/g', '#2ws', '2ws_profit',
       '2ws_wgramt', '#3ws', '3ws_profit', '3ws_wgramt', '#4ws', '4ws_profit',
       '4ws_wgramt', 'ave_time_per_gamble', 'std_time_per_gamble',
       'min_time_per_gamble', 'max_time_per_gamble', 'total_duration',
       'total_gambles', 'unique_machines', 'ave_time_per_machine', 'w/min',
       'l/min', 'nh/min', 'd/mi

In [None]:
df_all_1min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_1min.parquet')

#### First 2 minutes

In [19]:
# Create dataframe
df_all_2min = pf.merge_dfs_per_player(data_t=df_top_2min, data_b=df_bottom_2min, grouping='session_time', print_results=False)

# Print number of unique players
print("Number of unique players in df_all_2min: ", df_all_2min['playerkey'].nunique())
# Simultaneous Play
df_b20_sim_v = pf.simultaneous_play(40000, df_bottom_2min, 'session_time')
df_t20_sim_v = pf.simultaneous_play(40000, df_top_2min, 'session_time')

# Concat the two dataframes
df_sim_v = pd.concat([df_b20_sim_v, df_t20_sim_v]).reset_index()

# Remove repeated instances
df_sim_no_repeat_v = df_sim_v.drop_duplicates(subset=['session_time', 'playerkey'])

# Merge Sim play
df_all_2min['sim_play'] = df_all_2min[['playerkey', 'session_time']].apply(lambda x: tuple(x) in set(map(tuple, df_sim_no_repeat_v[['playerkey', 'session_time']].values)), axis=1)

# Reorder Columns

df_all_2min = df_all_2min.reindex(columns=desired_order)

df_all_2min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_2min.parquet')

Number of unique players in df_all_2min:  5498


#### First 3 minutes

In [None]:
# Create dataframe
df_all_3min = pf.merge_dfs_per_player(data_t=df_top_3min, data_b=df_bottom_3min, grouping='session_time', print_results=False)

# Simultaneous Play
df_b20_sim_v = pf.simultaneous_play(40000, df_bottom_3min, 'session_time')
df_t20_sim_v = pf.simultaneous_play(40000, df_top_3min, 'session_time')

# Concat the two dataframes
df_sim_v = pd.concat([df_b20_sim_v, df_t20_sim_v]).reset_index()

# Remove repeated instances
df_sim_no_repeat_v = df_sim_v.drop_duplicates(subset=['session_time', 'playerkey'])

# Merge Sim play
df_all_3min['sim_play'] = df_all_3min[['playerkey', 'session_time']].apply(lambda x: tuple(x) in set(map(tuple, df_sim_no_repeat_v[['playerkey', 'session_time']].values)), axis=1)

# Reorder Columns

df_all_3min = df_all_3min.reindex(columns=desired_order)

df_all_3min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_3min.parquet')

#### First 4 minutes

In [None]:
# Create dataframe
df_all_4min = pf.merge_dfs_per_player(data_t=df_top_4min, data_b=df_bottom_4min, grouping='session_time', print_results=False)

# Simultaneous Play
df_b20_sim_v = pf.simultaneous_play(40000, df_bottom_4min, 'session_time')
df_t20_sim_v = pf.simultaneous_play(40000, df_top_4min, 'session_time')

# Concat the two dataframes
df_sim_v = pd.concat([df_b20_sim_v, df_t20_sim_v]).reset_index()

# Remove repeated instances

df_sim_no_repeat_v = df_sim_v.drop_duplicates(subset=['session_time', 'playerkey'])

# Merge Sim play
df_all_4min['sim_play'] = df_all_4min[['playerkey', 'session_time']].apply(lambda x: tuple(x) in set(map(tuple, df_sim_no_repeat_v[['playerkey', 'session_time']].values)), axis=1)

# Reorder Columns

df_all_4min = df_all_4min.reindex(columns=desired_order)

df_all_4min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_4min.parquet')

#### First 5 minutes

In [None]:
# Create dataframe
df_all_5min = pf.merge_dfs_per_player(data_t=df_top_5min, data_b=df_bottom_5min, grouping='session_time', print_results=False)

# Simultaneous Play
df_b20_sim_v = pf.simultaneous_play(40000, df_bottom_5min, 'session_time')
df_t20_sim_v = pf.simultaneous_play(40000, df_top_5min, 'session_time')

# Concat the two dataframes
df_sim_v = pd.concat([df_b20_sim_v, df_t20_sim_v]).reset_index()

# Remove repeated instances
df_sim_no_repeat_v = df_sim_v.drop_duplicates(subset=['session_time', 'playerkey'])

# Merge Sim play
df_all_5min['sim_play'] = df_all_5min[['playerkey', 'session_time']].apply(lambda x: tuple(x) in set(map(tuple, df_sim_no_repeat_v[['playerkey', 'session_time']].values)), axis=1)

# Reorder Columns

df_all_5min = df_all_5min.reindex(columns=desired_order)

df_all_5min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_5min.parquet')

#### First 10 minutes

In [None]:

# Create dataframe
df_all_10min = pf.merge_dfs_per_player(data_t=df_top_10min, data_b=df_bottom_10min, grouping='session_time', print_results=False)

# Simultaneous Play
df_b20_sim_v = pf.simultaneous_play(40000, df_bottom_10min, 'session_time')
df_t20_sim_v = pf.simultaneous_play(40000, df_top_10min, 'session_time')

# Concat the two dataframes
df_sim_v = pd.concat([df_b20_sim_v, df_t20_sim_v]).reset_index()

# Remove repeated instances
df_sim_no_repeat_v = df_sim_v.drop_duplicates(subset=['session_time', 'playerkey'])

# Merge Sim play
df_all_10min['sim_play'] = df_all_10min[['playerkey', 'session_time']].apply(lambda x: tuple(x) in set(map(tuple, df_sim_no_repeat_v[['playerkey', 'session_time']].values)), axis=1)

# Reorder Columns

df_all_10min = df_all_10min.reindex(columns=desired_order)

df_all_10min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_10min.parquet')

#### First 15 minutes

In [None]:

# Create dataframe
df_all_15min = pf.merge_dfs_per_player(data_t=df_top_15min, data_b=df_bottom_15min, grouping='session_time', print_results=False)

# Simultaneous Play
df_b20_sim_v = pf.simultaneous_play(40000, df_bottom_15min, 'session_time')
df_t20_sim_v = pf.simultaneous_play(40000, df_top_15min, 'session_time')

# Concat the two dataframes
df_sim_v = pd.concat([df_b20_sim_v, df_t20_sim_v]).reset_index()

# Remove repeated instances
df_sim_no_repeat_v = df_sim_v.drop_duplicates(subset=['session_time', 'playerkey'])

# Merge Sim play
df_all_15min['sim_play'] = df_all_15min[['playerkey', 'session_time']].apply(lambda x: tuple(x) in set(map(tuple, df_sim_no_repeat_v[['playerkey', 'session_time']].values)), axis=1)

# Reorder Columns

df_all_15min = df_all_15min.reindex(columns=desired_order)

df_all_15min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_15min.parquet')