# Similarities and Differences of Top vs. Bottom 20% (Pt.1 - Age, Gender, Won/Lost Amount)

In [1]:
# Define libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import os
import plotting_fn as pf
import counting_fns as cf

month_file = '6_October'
cut_off = 10000
# Set working directory
os.chdir("/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/"+month_file)


In [2]:
# Read in data
df_bottom_20 = pd.read_parquet("top_vs_ntop_players.parquet")


# Eliminate players who maximum number of gambles is 1
df_bottom_20 = df_bottom_20.groupby('playerkey').filter(lambda x: x['gambles'].nunique() > 1)

# Reset index
df_bottom_20.index = np.arange(1, len(df_bottom_20) + 1)

# Round wageredamt and profit to 2 decimal places
df_bottom_20['wageredamt'] = df_bottom_20['wageredamt'].round(1)
df_bottom_20['profit'] = df_bottom_20['profit'].round(1)
df_bottom_20['percent_return'] = df_bottom_20['percent_return'].round(1)


In [3]:
# Create a new column 'result_type' that is a categorical variable which takes the value 'loss' if the change is negative and 'gain' if the change is positive, and 'draw' of change is 0
df_bottom_20['result_type'] = df_bottom_20['percent_return'].apply(lambda x: 'loss' if x == -100 else 'near-hit' if x < 0 else 'gain' if x > 0 else 'draw')

# Create dummy variables from 'result_type'
dummy_variables = pd.get_dummies(df_bottom_20['result_type']).rename(columns=lambda x: '#' + str(x[0].capitalize()))

# Add the dummy variables to the original DataFrame
df_bottom_20 = pd.concat([df_bottom_20, dummy_variables], axis=1).reset_index(drop=True)

# Convert starttime to delte format for operations
df_bottom_20['start_time'] = pd.to_datetime(df_bottom_20['start_time'])

# Create new column called 'time_diff' which is the difference between the start time of the gamble and the start time of the previous gamble
df_bottom_20['time_diff'] = df_bottom_20.groupby(['playerkey', 'session_time'])['start_time'].diff()

# Convert time_diff to seconds
df_bottom_20['time_diff'] = df_bottom_20['time_diff'].dt.total_seconds().fillna(0)

### Filter visit 1

In [4]:
# Filter data frame by visit == 1
df_bottom_20 = df_bottom_20[df_bottom_20['visit'] == 1]

In [5]:
# Ude consecutive functions to find 2ws, 3ws, 4ws in a row in each dataset per visit per player
df_bottom_20 = pf.consecutive_wins(df_bottom_20, 'visit')

In [6]:
# Ude consecutive functions to find 2ws, 3ws, 4ws in a row in each dataset per session_time per player
df_bottom_20_s = pf.consecutive_wins(df_bottom_20, 'session_time')

In [7]:
# Create age ranges 
bins = [0, 24, 40, 55, 75, 150]
labels = ['18-24', '25-40', '41-55', '56-75', '76+']
generations = ['Gen Z', 'Millenials', 'Gen X', 'Baby Boomers', 'Silent']

# Use cut function to create age ranges for bottom 20% and top 20%
df_bottom_20['age_range'] = pd.cut(df_bottom_20['age'], bins=bins, labels=labels, right=False)
df_bottom_20['age_gen'] = pd.cut(df_bottom_20['age'], bins=bins, labels=generations, right=False)


In [8]:
# Lets do the same but for bottom 20%
players_increase_slot_b20 = cf.count_increase(df_bottom_20, "increase_slotdeno", "playerkey", "slotdenomination")
players_decrease_slot_b20 = cf.count_decrease(df_bottom_20, "decrease_slotdeno", "playerkey", "slotdenomination")
players_increase_maxbet_b20 = cf.count_increase(df_bottom_20, "increase_maxbet", "playerkey", "maxbet")
players_decrease_maxbet_b20 = cf.count_decrease(df_bottom_20, "decrease_maxbet", "playerkey", "maxbet")

Count of players who increase_slotdeno : 1493
Count of times each player increase_slotdeno : {73: 8, 156: 1, 646: 2, 677: 9, 719: 1, 777: 2, 988: 1, 1068: 1, 1122: 1, 1136: 1, 1371: 1, 1376: 1, 1408: 1, 1504: 1, 2031: 1, 2082: 1, 2155: 2, 2222: 1, 2287: 1, 2331: 4, 2338: 1, 2791: 1, 3319: 1, 3385: 2, 3644: 2, 3653: 3, 3677: 1, 3955: 1, 4180: 2, 4190: 2, 4508: 4, 4860: 2, 4970: 1, 5088: 2, 5113: 2, 5243: 1, 5443: 1, 5859: 1, 5958: 1, 6050: 2, 6056: 2, 6081: 1, 6515: 1, 6794: 1, 6954: 1, 6988: 1, 7337: 2, 7420: 1, 7437: 1, 7774: 1, 7786: 1, 7960: 11, 8003: 1, 8114: 1, 8288: 1, 8345: 1, 8446: 3, 8538: 2, 8553: 1, 8554: 1, 8613: 1, 8827: 3, 9141: 1, 9201: 2, 9222: 1, 9310: 1, 9478: 1, 9608: 1, 9866: 1, 9946: 2, 10122: 1, 10598: 1, 11125: 2, 11218: 1, 11308: 1, 11427: 3, 11526: 1, 11589: 2, 11722: 1, 11821: 2, 11857: 1, 11868: 2, 11947: 1, 12191: 1, 12226: 1, 12314: 5, 12356: 7, 12437: 1, 12562: 1, 13312: 1, 13832: 1, 14443: 1, 15324: 1, 15387: 2, 15579: 1, 15714: 1, 15853: 3, 15868: 1, 161

In [9]:
# Lets crate a column called 'depletion_slope' which is the difference of 'playercashableamt' between the current gamble and the previous gamble
df_bottom_20['depletion_rate'] = df_bottom_20.groupby(['playerkey', 'session_time'])['playercashableamt'].diff().fillna(0)


# Separate by time

In [10]:
# Create df_bottom_1min for 1 minute duration
df_bottom_1min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=1))

# Create df_bottom_2min for 2 minutes duration
df_bottom_2min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=2))

# Create df_bottom_3min for 3 minutes duration
df_bottom_3min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=3))

# Create df_bottom_4min for 4 minutes duration
df_bottom_4min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=4))

# Create df_bottom_5min for 5 minutes duration
df_bottom_5min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=5))

# Create df_bottom_10min for 10 minutes duration
df_bottom_10min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=10))

# Create df_bottom_15min for 15 minutes duration
df_bottom_15min = pf.filter_dataframe_by_time(df_bottom_20, pd.Timedelta(minutes=15))

In [11]:
# Print the unique number of players in each dataset
print("Number of players in bottom 1min: ", df_bottom_1min['playerkey'].nunique())
print("Number of players in bottom 2min: ", df_bottom_2min['playerkey'].nunique())
print("Number of players in bottom 3min: ", df_bottom_3min['playerkey'].nunique())
print("Number of players in bottom 4min: ", df_bottom_4min['playerkey'].nunique())
print("Number of players in bottom 5min: ", df_bottom_5min['playerkey'].nunique())

Number of players in bottom 1min:  5548
Number of players in bottom 2min:  5548
Number of players in bottom 3min:  5548
Number of players in bottom 4min:  5548
Number of players in bottom 5min:  5548


#### First 1 minute

In [12]:
# Create dataframe
df_all_1min = pf.transform_ml(data_b=df_bottom_1min, grouping='session_time')

# Print number of unique players
print("Number of unique players in df_all_1min: ", df_all_1min['playerkey'].nunique())

# Get the IDs of players who have NaN values in any of the columns
list_discrepancy = df_all_1min[df_all_1min.isna().any(axis=1)]['playerkey'].unique()

# print list of players with NaN values
print(list_discrepancy)

# Eliminate list_discrepancy  from df_all_1min
df_all_1min = df_all_1min[~df_all_1min['playerkey'].isin(list_discrepancy)]

# Print number of unique players
print("Number of unique players w/o discrepancies df_all_1min: ", df_all_1min['playerkey'].nunique())

# # # Reorder Columns
desired_order = ['playerkey', 'session_time', 'gender', 'age_range', 'age_gen',
                'beginning_amt', 'ending_amt', 'ending_balance', 'ave_slotdenom',
                'std_slotdenom', 'min_slotdenom', 'max_slotdenom', 'ave_theo_payback',
                'min_theo_payback', 'max_theo_payback', 'ave_wageramt', 'std_wageramt',
                'min_wager', 'max_wager', 'ave_p/b', 'std_p/b', 'max_p/b', 'max_profit', 'depletion_slope',
                '#inc_maxbet', '#dec_maxbet', 'first_wager', 'first_outcome', 'first_p/b', 'last_wager',
                'last_outcome', 'last_p/b', '#W', '#L', '#NH', '#D', 'w/min', 'l/min', 'nh/min', 'd/min', 
                'w/g', 'l/g', 'nh/g', 'd/g', '#2ws', '2ws_profit', '2ws_wgramt', '#3ws', 
                '3ws_profit','3ws_wgramt', '#4ws', '4ws_profit', '4ws_wgramt', '2ws/min', '3ws/min', '4ws/min',
                'ave_time_per_gamble', 'min_time_per_gamble', 'max_time_per_gamble',
                'total_duration', 'total_gambles', 'machines_changes', '#inc_slotdenom', '#dec_slotdenom',
                'unique_machines', 'ave_time_per_machine','classification']

df_all_1min = df_all_1min.reindex(columns=desired_order)

Number of unique players in df_all_1min:  5548
[ 2222  4092  4617  5969  6081  7118  7418  7774  7786  7967  8694 11581
 16539 16589 20058 22643 27611 28397 31691 31877 34589 35343 41743 41780
 41897 42049 42055 42133 42165 42170 42527 42538 42601 42610 42629 42639
 42721 42746 42760 42843 42914 42974 42998 43074 43115 43141 43155 43255
 43256 43284 43308 43345 43361 43372 43377 43379 43410 43444 43448 43528
 43622 43666 43735 43748 43754 43803 43825 43830 43835 43849 43867 43946
 43982 44004 44019 44026 44204 44217 44247 44283 44330 44396 44405 44417
 44420 44433 44450 44464 44479 44507 44590 44592 44659 44753 44805 44831
 44858 44873 44887 44915 44916 44931 44961 44980 45044 45081 45109 45119
 45121 45126 45178 45194 45240 45299 45302 45331 45334 45346 45352 45385
 45408 45457 45494 45520 45546 45677 45696 45737 45770 45811 45839 45875
 45876 45905 45907 45914 45922 45927 45959 45994 46023 46030 46044 46130
 46131 46149 46190 46193 46201 46257 46260 46261 46285 46320 46332 46362
 464

#### First 2 minutes

In [13]:
# Create dataframe
df_all_2min = pf.transform_ml(data_b=df_bottom_2min, grouping='session_time')

# Print number of unique players
print("Number of unique players in df_all_2min: ", df_all_2min['playerkey'].nunique())

# Get the IDs of players who have NaN values in any of the columns
list_discrepancy_2 = df_all_2min[df_all_2min.isna().any(axis=1)]['playerkey'].unique()

# print list of players with NaN values
print(list_discrepancy_2)

# Eliminate list_discrepancy  from df_all_2min
df_all_2min = df_all_2min[~df_all_2min['playerkey'].isin(list_discrepancy_2)]

# Print number of unique players
print("Number of unique players w/o discrepancies df_all_2min: ", df_all_2min['playerkey'].nunique())

# Reorder Columns
df_all_2min = df_all_2min.reindex(columns=desired_order)

Number of unique players in df_all_2min:  5548
[ 2222  4092  4617  7774  7786  7967 16539 16589 28397 31877 34589 35343
 41780 41897 42049 42055 42527 42610 42629 42639 42721 42746 42974 42998
 43115 43141 43155 43345 43372 43377 43410 43444 43448 43748 43754 43825
 43830 43835 43867 43982 44004 44019 44283 44405 44450 44464 44590 44592
 44659 44805 44858 44873 44887 44931 44980 45119 45121 45126 45194 45299
 45331 45352 45408 45696 45737 45770 45905 45907 45914 45922 45927 45959
 46023 46044 46131 46149 46201 46257 46261 46403 46407 46426 46430 46450
 46501 46508 46676 46744 46750 46780 46783 46797 46883 46905 46919 46953
 46965 46992 47068 47257 47337 47399 47478 47498 47501 47530 47585 47612
 47630 47642 47681 47690 47695 47700 47719 47824 47863 47918 47938 47985]
Number of unique players w/o discrepancies df_all_2min:  5428


#### First 3 minutes

In [14]:
# Create dataframe
df_all_3min = pf.transform_ml(data_b=df_bottom_3min, grouping='session_time')

# Print number of unique players
print("Number of unique players in df_all_3min: ", df_all_3min['playerkey'].nunique())

# Get the IDs of players who have NaN values in any of the columns
list_discrepancy_3 = df_all_3min[df_all_3min.isna().any(axis=1)]['playerkey'].unique()

# print list of players with NaN values
print(list_discrepancy_3)

# Eliminate list_discrepancy  from df_all_3min
df_all_3min = df_all_3min[~df_all_3min['playerkey'].isin(list_discrepancy_3)]

# Print number of unique players
print("Number of unique players w/o discrepancies df_all_3min: ", df_all_3min['playerkey'].nunique())

# Reorder Columns
df_all_3min = df_all_3min.reindex(columns=desired_order)

Number of unique players in df_all_3min:  5548
[ 2222  4092  7774  7786  7967 16539 16589 28397 31877 34589 35343 41780
 41897 42049 42610 42639 42721 42746 42974 42998 43115 43141 43155 43344
 43372 43410 43448 43718 43754 43830 43835 43867 43982 44019 44405 44450
 44592 44858 44873 44931 44980 45119 45121 45194 45299 45331 45352 45696
 45737 45770 45905 45907 45914 45927 45959 46023 46044 46131 46149 46257
 46407 46426 46430 46450 46501 46508 46676 46744 46750 46780 46783 46797
 46905 46919 46953 46965 46992 47068 47257 47337 47399 47478 47498 47585
 47612 47630 47642 47681 47695 47700 47824 47863 47918 47938 47985]
Number of unique players w/o discrepancies df_all_3min:  5453


#### First 4 minutes

In [15]:
# Create dataframe
df_all_4min = pf.transform_ml(data_b=df_bottom_4min, grouping='session_time')

# Print number of unique players
print("Number of unique players in df_all_4min: ", df_all_4min['playerkey'].nunique())

# Get the IDs of players who have NaN values in any of the columns
list_discrepancy_4 = df_all_4min[df_all_4min.isna().any(axis=1)]['playerkey'].unique()

# print list of players with NaN values
print(list_discrepancy_4)

# Eliminate list_discrepancy  from df_all_4min
df_all_4min = df_all_4min[~df_all_4min['playerkey'].isin(list_discrepancy_4)]

# Print number of unique players
print("Number of unique players w/o discrepancies df_all_4min: ", df_all_4min['playerkey'].nunique())

# Reorder Columns
df_all_4min = df_all_4min.reindex(columns=desired_order)

Number of unique players in df_all_4min:  5548
[ 2222  4092  7774  7786  7967 16539 16589 28397 31877 34589 35343 41780
 41897 42049 42610 42639 42721 42746 42998 43115 43141 43155 43344 43410
 43448 43718 43754 43830 43835 43867 43982 44405 44592 44873 44931 44980
 45119 45194 45299 45331 45352 45737 45770 45905 45907 45927 45959 46023
 46149 46257 46403 46407 46426 46450 46501 46508 46676 46744 46750 46780
 46783 46905 46919 46953 46965 46992 47068 47337 47399 47478 47498 47612
 47630 47642 47695 47700 47824 47863 47918 47938 47985]
Number of unique players w/o discrepancies df_all_4min:  5467


#### First 5 minutes

In [16]:
# Create dataframe
df_all_5min = pf.transform_ml(data_b=df_bottom_5min, grouping='session_time')

# Print number of unique players
print("Number of unique players in df_all_5min: ", df_all_5min['playerkey'].nunique())

# Get the IDs of players who have NaN values in any of the columns
list_discrepancy_5 = df_all_5min[df_all_5min.isna().any(axis=1)]['playerkey'].unique()

# print list of players with NaN values
print(list_discrepancy_5)

# Eliminate list_discrepancy  from df_all_5min
df_all_5min = df_all_5min[~df_all_5min['playerkey'].isin(list_discrepancy_5)]

# Print number of unique players
print("Number of unique players w/o discrepancies df_all_5min: ", df_all_5min['playerkey'].nunique())

# Reorder Columns
df_all_5min = df_all_5min.reindex(columns=desired_order)

Number of unique players in df_all_5min:  5548
[ 2222  4092  7774  7786  7967 16539 16589 28397 31877 34589 35343 41780
 41897 42049 42610 42721 42746 42998 43141 43155 43344 43410 43448 43718
 43754 43830 43835 43867 43982 44405 44592 44702 44873 44931 44951 44980
 45119 45194 45331 45352 45737 45770 45905 45907 45959 46023 46149 46257
 46403 46407 46426 46450 46501 46508 46676 46744 46750 46780 46783 46905
 46953 47068 47337 47399 47478 47498 47612 47630 47642 47695 47700 47824
 47863 47918 47938 47985]
Number of unique players w/o discrepancies df_all_5min:  5472


#### First 10 minutes

In [17]:
# Create dataframe
df_all_10min = pf.transform_ml(data_b=df_bottom_10min, grouping='session_time')

# Print number of unique players
print("Number of unique players in df_all_10min: ", df_all_10min['playerkey'].nunique())

# Get the IDs of players who have NaN values in any of the columns
list_discrepancy_10 = df_all_10min[df_all_10min.isna().any(axis=1)]['playerkey'].unique()

# print list of players with NaN values
print(list_discrepancy_10)

# Eliminate list_discrepancy  from df_all_10min
df_all_10min = df_all_10min[~df_all_10min['playerkey'].isin(list_discrepancy_10)]

# Print number of unique players
print("Number of unique players w/o discrepancies df_all_10min: ", df_all_10min['playerkey'].nunique())

df_all_10min = df_all_10min.reindex(columns=desired_order)

Number of unique players in df_all_10min:  5548
[ 2222  4092  7774  7786  7967 16539 16589 19888 30747 34589 35343 41780
 41897 42049 42610 42721 42746 42964 42998 43141 43155 43344 43448 43718
 43830 43835 43867 44405 44702 44873 44931 44951 45194 45331 45352 45429
 45737 45770 45907 45959 46023 46132 46149 46226 46257 46397 46403 46407
 46426 46450 46501 46508 46656 46676 46780 46783 46883 46905 46953 47068
 47337 47478 47498 47524 47612 47630 47652 47758 47824 47863 47918 47938
 47985]
Number of unique players w/o discrepancies df_all_10min:  5475


#### First 15 minutes

In [18]:
# Create dataframe
df_all_15min = pf.transform_ml(data_b=df_bottom_15min, grouping='session_time')

# Print number of unique players
print("Number of unique players in df_all_15min: ", df_all_15min['playerkey'].nunique())

# Get the IDs of players who have NaN values in any of the columns
list_discrepancy_15 = df_all_15min[df_all_15min.isna().any(axis=1)]['playerkey'].unique()

# print list of players with NaN values
print(list_discrepancy_15)

# Eliminate list_discrepancy  from df_all_10min
df_all_15min = df_all_15min[~df_all_15min['playerkey'].isin(list_discrepancy_15)]

# Print number of unique players
print("Number of unique players w/o discrepancies df_all_15min: ", df_all_15min['playerkey'].nunique())

# Reorder Columns
df_all_15min = df_all_15min.reindex(columns=desired_order)

Number of unique players in df_all_15min:  5548
[ 2222  3990  4092  7774  7786  7967 13312 16539 16589 19888 30747 31877
 34589 35343 41780 41897 42049 42610 42721 42746 42964 42998 43141 43155
 43344 43448 43718 43830 43835 43867 44405 44433 44702 44873 44951 45194
 45331 45352 45429 45737 45770 45907 45959 46023 46132 46149 46226 46397
 46403 46407 46426 46450 46501 46508 46656 46676 46780 46783 46883 46905
 46953 47068 47337 47451 47478 47524 47612 47630 47652 47758 47824 47863
 47938]
Number of unique players w/o discrepancies df_all_15min:  5475


## Eliminate Discrepancies

In [19]:
# add the ndarrays together to get the total number of player
total_discrepancies = np.concatenate((list_discrepancy, list_discrepancy_2, list_discrepancy_3, list_discrepancy_4, list_discrepancy_5, list_discrepancy_10, list_discrepancy_15), axis=0)

# Print number of unique players
print("Number of unique players in total_discrepancies: ", len(total_discrepancies))

# Transform into a set to get unique values
total_unique_discrepancies = list(set(total_discrepancies))

# Print number of unique players
print("Number of unique players in total_unique_discrepancies: ", len(total_unique_discrepancies))

Number of unique players in total_discrepancies:  739
Number of unique players in total_unique_discrepancies:  238


In [20]:
# Eliminate total_unique_discrepancies from all the dataframes
df_all_1min = df_all_1min[~df_all_1min['playerkey'].isin(total_unique_discrepancies)]
df_all_2min = df_all_2min[~df_all_2min['playerkey'].isin(total_unique_discrepancies)]
df_all_3min = df_all_3min[~df_all_3min['playerkey'].isin(total_unique_discrepancies)]
df_all_4min = df_all_4min[~df_all_4min['playerkey'].isin(total_unique_discrepancies)]
df_all_5min = df_all_5min[~df_all_5min['playerkey'].isin(total_unique_discrepancies)]
df_all_10min = df_all_10min[~df_all_10min['playerkey'].isin(total_unique_discrepancies)]
df_all_15min = df_all_15min[~df_all_15min['playerkey'].isin(total_unique_discrepancies)]

# Print number of unique players
print("Number of unique players in df_all_1min: ", df_all_1min['playerkey'].nunique())
print("Number of unique players in df_all_2min: ", df_all_2min['playerkey'].nunique())
print("Number of unique players in df_all_3min: ", df_all_3min['playerkey'].nunique())
print("Number of unique players in df_all_4min: ", df_all_4min['playerkey'].nunique())
print("Number of unique players in df_all_5min: ", df_all_5min['playerkey'].nunique())
print("Number of unique players in df_all_10min: ", df_all_10min['playerkey'].nunique())
print("Number of unique players in df_all_15min: ", df_all_15min['playerkey'].nunique())


Number of unique players in df_all_1min:  5310
Number of unique players in df_all_2min:  5310
Number of unique players in df_all_3min:  5310
Number of unique players in df_all_4min:  5310
Number of unique players in df_all_5min:  5310
Number of unique players in df_all_10min:  5310
Number of unique players in df_all_15min:  5310


In [21]:
# Save the dataframes to parquet
df_all_1min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_1min_top_vs_ntop_players.parquet')
df_all_2min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_2min_top_vs_ntop_players.parquet')
df_all_3min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_3min_top_vs_ntop_players.parquet')
df_all_4min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_4min_top_vs_ntop_players.parquet')
df_all_5min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_5min_top_vs_ntop_players.parquet')
df_all_10min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_10min_top_vs_ntop_players.parquet')
df_all_15min.to_parquet('/Users/mau/Library/CloudStorage/Dropbox/Mac/Documents/Dissertation/Chapter 2/Entire_Data/By month/'+month_file+'/Ending Balances/Per_Player/df_15min_top_vs_ntop_players.parquet')