## COD API, Data visualization tests

### Sankey / Flow Chart tests

In [None]:
raw = pd.DataFrame(match)
raw.head(5)

In [None]:
# load offline data
with open('match.pkl', 'rb') as f:
    match = pickle.load(f)
df = pd.DataFrame(match)
display(df.head(2))
print(df.keys())

#### Which columns matter, data exploration

In [None]:
# Build our players kills etc data
def formatToCircle(df):
    player_col = df['player'].apply(pd.Series)
    player_stats_col = df['playerStats'].apply(pd.Series)

    keep_cols = [
        'username',
        'team',
        'duration',
        'teamSurvivalTime',
        'circle',
        'timePlayed',  
        'teamPlacement',
        'kills'
    ]
    
    df = pd.concat([df, player_col, player_stats_col], axis = 1)
    
    #depending on match length, kills made in last circles, number of cols objectiveBrDown can vary, but API's max is always 6
    cols_objective = sorted(df.columns[df.columns.str.startswith('objectiveBrDownEnemy')].tolist())
    new_objective_labels = {col:"kills_c"+str(idx+1) for idx, col in enumerate(cols_objective)}
    keep_cols = keep_cols + list(new_objective_labels.values())
    
    # TeamTimeSurvival in ms to circle reached
    bins = [0,  480000, 790000, 1035000, 1205000, 1335000, 1735000]
    labels = ['c1', 'c2', 'c3', 'c4', 'c5', 'c6',]
    df['circle'] = pd.cut(df['teamSurvivalTime'], bins=bins, labels=labels)

    df = df.rename(new_objective_labels, axis=1)
    return df[keep_cols]

df = formatToCircle(df)
display('sorted by kills', df.sort_values('kills', ascending = False).head(6))
display('sorted by team placement', df.sort_values('teamPlacement', ascending = True).head(6))
display('sorted by team survival time', df.sort_values(['teamSurvivalTime', 'timePlayed', 'kills'], ascending = False).head(6))

##### Quick Notes
- overall sure about if displayed timing = api timings, or just I must include a circle 0 that goes from 0 to gas moving in countdown
- objectiveBrDown : API max is 6, but in game goes to 8-9. Circle 6 may be aggregation circle 5 or 6, to 9
- game duration : API format is milliseconds, fixed len of +- 26-27mn (+- 163000 ms) for each game BR mode. Games len can be 26:32 or 27:17, or 27:20 etc...
- timePlayed : API format is seconds. Playtime of a player BUT this behavior happens (rarely though) : time counter still running. I.e. can have a 28mn playtime but placement of 49. Probably a player dont leave a certain screen, and/or stay connected to watch other players cams / or Acti bug
- timeplayed : include pregame lobby ??, or duplicated time with gulag time ? but most probably includes post win (credits) time . or my conversion is wrong... noticed a timeplayed top 2 can be > timeplayed placed 1st.
- teamSurvivalTime : API format milliseconds. Effective playtime of a team, i.e if one least one player stays alive.
- whats the max range of objective down ennemy ? Does it corresponds to timings ?

#### Convert time/duration values so we can read them

In [None]:
df_human = df.sort_values(['teamSurvivalTime', 'timePlayed', 'kills'], ascending = False)
df_human['duration'] = df_human['duration'].apply(lambda x: pd.to_datetime(x, unit='ms').strftime('%M:%S')) # API duration is in ms
df_human['timePlayed'] = df_human['timePlayed'].apply(lambda x: pd.to_datetime(x, unit='s').strftime('%M:%S')) # API timePlayed is in seconds
df_human['teamSurvivalTime'] = df_human['teamSurvivalTime'].apply(lambda x: pd.to_datetime(x, unit='ms').strftime('%M:%S'))
display('sorted by team survival time', df_human.head(10))
display('least survival time', df_human.tail(5))

In [None]:
df_human[(df_human['username'] == 'gentil_renard') | (df_human['username'] == 'nicoyzovitch')]

#### Data Preparation, playtime/duration, to what "circle" is reached

##### In-game circle timings : 
1. circle 1 outlined, 3mn30 countdown, "gas moving in" announcement, 4mn30 countdown
2. circle 2 : 1mn30 countdown, "gas moving in" announcement, 3mn40 countdown
3. circle 3 : 1mn15 countdown, "gas moving in" announcement", 2mn50 countdown
4. circle 4 : 1mn countdown, "gas moving in" announcement, 1mn50 countdown
5. circle 5 : 1 mn countdown, "gas moving in" announcement, 1mn10 countdown
6. circle 6 : 45sec countdown, "gas moving in" announcement, 50sec countdown
7. circle 7 : 30sec countdown, "gas moving in" announcement, 50sec countdown
8. circle 8 : 10sec countdown, "gas moving in" announcement, 30sec countdown

##### Table : cumulative length mn and milliseconds: <br>

circle | timer, mn | timer, ms
:------|:-----------|---------------------------------------------------
circle 1 | : 0 to 8mn  | 0 to 480000
circle 2 | : 8 to 13mn10  | 480000 to 790000
circle 3 | : 13mn10 to 17mn15 | 790000 to 1035000
circle 4 | : 17mn15 to 20mn05 | 1035000 to 1205000
circle 5 | : 20mn05 to 22mn15 | 1205000 to 1335000
circle 6 | : 22mn15 to 23mn50 | 1335000 to 1430000
circle 7 | : 23mn50 to 25mn10 | 1430000 to 1510000
circle 8 | : 25mn10 to 25mn50 | 1510000 to  1550000

In [None]:
26*60000+30*1000

##### Survival time to circle reached

In [None]:
df = pd.DataFrame(match)
df = formatToCircle(df)
df.sort_values('teamSurvivalTime', ascending=False).head(12)

#### Convert into Sankey format

In [None]:
# We could try with players, but we will work with teams + TeamSurvivalTime) which seems more reliable
def groupByTeam(df):
    agg_func = {
        'duration':'first',
        'username':', '.join,
        'teamSurvivalTime':'first',
        'timePlayed':'max',
        'teamPlacement':'first',
        'kills':'sum',
        'circle':'first'
    }
    obj_cols = sorted(df.columns[df.columns.str.startswith('kills_c')].tolist())
    for col in obj_cols:
        agg_func[col] = 'sum'

    df = df.groupby('team').agg(agg_func).reset_index()

    filter_keep = ['team', 'circle', 'teamPlacement'] + obj_cols
    return df.filter(filter_keep, axis=1).sort_values('teamPlacement')


In [None]:
df = pd.DataFrame(match)
df = formatToCircle(df)
display(df.head(2))
df = groupByTeam(df)
display(df.sort_values('circle').head(3))
display(df.sort_values('circle').tail(3))

In [None]:
df_data = df1_alive.append(df1_dead).append(df2_alive).append(df2_dead).append(df3_alive).append(df3_dead).append(df4_alive).append(df4_dead).append(df5_alive).append(df5_dead).append(df6_top1).append(df6_top10).append(df6_rest)

##### A function to wrap the above

#### PLOTTTT

In [None]:
df = pd.DataFrame(match)
df = formatToCircle(df)
display(df.head(12))

In [None]:
df_extended = pd.get_dummies(data=df['circle'], columns=['team'], drop_first=False)
df = pd.concat([df, df_extended], axis=1)
display(df.head(3))


# + Visualization choices : 
# - We will use the a col named 'start' instead of using'team'. This way we will have a single node 'start' instead of n nodes of n teams. This lighten our chart.
df['start'] = 'start'
# - Flow (lines) weight is the number of kills (could also be the cum sum). Some teams have 0 kills at a given circle, and wouldn't render
#   We replace 0 values with 1 and to keep some representativity we add 1 kill to other team.
obj_cols = sorted(df.columns[df.columns.str.startswith('kills_c')].tolist())
df[obj_cols] += 1
df[obj_cols] = df[obj_cols].fillna(1)


df.c1 = df.c1.replace(1, 'dead_at_c1')
df.c2 = df.c2.replace(1, 'dead_at_c2')
df.c3 = df.c3.replace(1, 'dead_at_c3')
df.c4 = df.c4.replace(1, 'dead_at_c4')
df.c5 = df.c5.replace(1, 'dead_at_c5')
df.c6 = df.c6.replace(1, 'dead_at_c6')

cols = ['c1', 'c2', 'c3', 'c4', 'c5', 'c6']

for idx, col in enumerate(cols):
    for previous_col in cols[0:idx]:
        if previous_col:
            df.loc[df[col]== 'dead_at_' + col, previous_col] = 'alive_at_' + previous_col
df.head()

In [None]:
# circle 1, 1st level of our Sankey Diagram
# all teams starts with circle 1 displayed, at end of circle 1 (also start of circle 2!) they're either alive or dead
df_circle1 = df[['start', 'c1', 'kills_c1']]
df_circle1.columns = ['source', 'target', 'value']
display(df_circle1.head(2))

# circle 2
# all teams "alive at circle 1" moves on and get to circle 2, they're either alive or dead at end of circle 2
df_circle2 = df[df.c1 == 'alive_at_c1'][['c1', 'c2', 'kills_c2']]
df_circle2.columns = ['source', 'target', 'value']
display(df_circle2.head(2))

# circle 3
df_circle3 = df[df.c2 == 'alive_at_c2'][['c2', 'c3', 'kills_c3']]
df_circle3.columns = ['source', 'target', 'value']
display(df_circle3.head(2))

# circle 4
df_circle4 = df[df.c3 == 'alive_at_c3'][['c3', 'c4', 'kills_c4']]
df_circle4.columns = ['source', 'target', 'value']
display(df_circle4.head(2))

# circle 5
df_circle5 = df[df.c4 == 'alive_at_c4'][['c4', 'c5', 'kills_c5']]
df_circle5.columns = ['source', 'target', 'value']
display(df_circle5.head(2))

# circle 6 is the final circle (in the API, ingame can go up to 9). Teams will be marked "dead" as they obviously all die at lvl 6 or beyond, except the winner.
df_circle6 = df[df.c5 == "alive_at_c5"][['c5', 'c6', 'kills_c6']]
df_circle6.columns = ['source', 'target', 'value']
display(df_circle6.head(2))

df_data = df_circle1.append(df_circle2).append(df_circle3).append(df_circle4).append(df_circle5).append(df_circle6)
df_data = df_data.replace(0,1)
df_data

In [None]:
nodes = df_data['source'].values.tolist() + df_data['target'].values.tolist()
df_data['sourceId'] = [nodes.index(source) for source in df_data['source']]
df_data['targetId'] = [nodes.index(target) for target in df_data['target']]
df_data.head(20)

In [None]:
df_data.tail()

In [None]:
# creating the sankey diagram

fig = go.Figure(data=[go.Sankey(
    arrangement = "snap",
    node = {
        'pad':2,
        'color':'yellow'},  # 10 Pixels
    link = dict(
      source = df_data['sourceId'], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = df_data['targetId'],
      value = df_data['value'] 
  ))])

fig.show()

In [None]:
# creating the sankey diagram

fig = go.Figure(data=[go.Sankey(
    arrangement = "snap",
    node = {
        "label": ["START", "CIRCLE 1 ALIVE"],
        "x": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7],
        "y": [0.1, 0.6, 0.4, 0.5, 0.6, 0.6, 0.8, 0.6, 0.7, 0.7, 0.8, 0.7],
        'pad':10,
        'color':'yellow'},  # 10 Pixels
    link = dict(
      source = df_data['sourceId'], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = df_data['targetId'],
      value = df_data['value']
  ))])

fig.show()