# Purpose of notebook
This notebook is to continue to explore the data, in particular the time-series and bi-modal distributions from previous version. 

Topics covered:
- data normalization by round
- data cleaning/flattening


In [1]:
import json
import pandas as pd

## Loading the data

In [2]:
# Load JSON data from file
with open('dev_data.json', 'r') as f:
    data = json.load(f)

# Access an element to retrieve the keys
sample_element = data[0]
columns = sample_element.keys()
print("Columns:", columns)

num_rows = len(data)
print("Number of rows:", num_rows)

Columns: dict_keys(['id', 'game_id', 'map_id', 'processed', 'process_date', 'created_at', 'updated_at', 'match_id', 'result', 'score', 'statistics', 'totals', 'ten_minute_averages', 'character_id', 'user_id', 'match_length', 'video_id', 'card_url', 'total_time', 'totals_to_date', 'start_time', 'end_time', 'start_frame', 'end_frame', 'status', 'report', 'analysis_processed', 'analysis_status'])
Number of rows: 534


In [3]:
# Observing a random entry
data[533]

{'id': 1461,
 'game_id': 3,
 'map_id': 134,
 'processed': True,
 'process_date': None,
 'created_at': '2023-02-27T21:05:53.784Z',
 'updated_at': '2023-03-01T16:36:13.672Z',
 'match_id': None,
 'result': 'win',
 'score': '13-7',
 'statistics': {'map': 'Lotus',
  'score': '13-7',
  'allies': ['0', '1', '2', '3', '4'],
  'player': '2',
  'result': 'win',
  'rounds': {'0': {'alive': {'0': [[194, True], [56987, False]],
     '1': [[194, True], [78984, False]],
     '2': [[194, True], [74584, False]],
     '3': [[194, True], [48388, False]],
     '4': [[194, True], [63186, False]],
     '5': [[194, True],
      [56587, False],
      [64386, True],
      [77384, False],
      [79384, True]],
     '6': [[194, True]],
     '7': [[194, True]],
     '8': [[194, True]],
     '9': [[194, True]]},
    'elims': [[48388,
      {'source': '7',
       'target': '3',
       'assisted': [],
       'headshot': False,
       'wallbang': False,
       'elim_number': 1,
       'first_blood': True,
       'tea

In [4]:
# Observing specifically the stats col
data[0]['statistics']

{'map': 'Pearl',
 'score': '13-3',
 'allies': ['0', '1', '2', '3', '4'],
 'player': '1',
 'result': 'win',
 'rounds': {'7': {'alive': {'0': [[0, True], [4800, False]],
    '1': [[0, True], [53800, False]],
    '2': [[0, True], [57000, False]],
    '3': [[0, True], [112400, False]],
    '4': [[0, True], [112400, False], [124400, True]],
    '5': [[0, True], [41600, False], [48600, True], [53800, False]],
    '6': [[0, True], [112400, False]],
    '7': [[0, True], [57600, False]],
    '8': [[0, True], [112400, False]],
    '9': [[0, True], [42400, False]]},
   'elims': [[42400,
     {'source': '4',
      'target': '9',
      'assisted': [],
      'headshot': True,
      'wallbang': False,
      'elim_number': 1,
      'first_blood': True,
      'team_credit': 'ally'}],
    [54000,
     {'source': '6',
      'target': '1',
      'assisted': [],
      'headshot': True,
      'wallbang': False,
      'elim_number': 1,
      'first_blood': False,
      'team_credit': 'opponent'}],
    [57600

## Create Rounds DF
- Create DF
- Go to statistics col

In [5]:
from pandas import json_normalize

# Create df
df = pd.read_json('dev_data.json')
print(df.shape)

# Drop the all cols besides 'statistics' and 'user_id'
df_stats = df[['user_id','statistics']]
print(df_stats.shape)
df_stats

(534, 28)
(534, 2)


Unnamed: 0,user_id,statistics
0,1006,"{'map': 'Pearl', 'score': '13-3', 'allies': ['..."
1,1006,"{'map': 'Haven', 'score': '9-13', 'allies': ['..."
2,1006,"{'map': 'Split', 'score': '13-4', 'allies': ['..."
3,1011,"{'map': 'Fracture', 'score': '14-12', 'allies'..."
4,1011,"{'map': 'Haven', 'score': '13-10', 'allies': [..."
...,...,...
529,1010,"{'map': 'Ascent', 'score': '13-8', 'allies': [..."
530,1011,"{'map': 'Bind', 'score': '14-12', 'allies': ['..."
531,1011,"{'map': 'unknown', 'score': '13-11', 'allies':..."
532,1018,"{'map': 'Split', 'score': '13-4', 'allies': ['..."


### Flattening the stats col by 1 level

In [6]:
flat1_df_stats = pd.json_normalize(df_stats['statistics'], max_level=0)
flat1_df_stats = pd.concat([df_stats['user_id'], flat1_df_stats], axis=1)
print(flat1_df_stats.shape)
flat1_df_stats

(534, 34)


Unnamed: 0,user_id,map,score,allies,player,result,rounds,status,totals,version,...,best_weapon_type,best_weapon_elims,detections_totals,most_used_primary,analysis_processed,opponents_onscreen,most_used_secondary,best_weapon_type_elims,most_used_primary_seconds,most_used_secondary_seconds
0,1006,Pearl,13-3,"[0, 1, 2, 3, 4]",1,win,"{'7': {'alive': {'0': [[0, True], [4800, False...",success: completed - valorant final,"{'elims': 9, 'deaths': 3, 'assists': 2, 'heali...",1.0.0,...,primary,4,"{'7': {'final_time': 19850, 'initial_size': {'...",vandal,True,"{'5250': 0, '19600': 1, '22150': 0, '28700': 1...",sheriff,5,309,109
1,1006,Haven,9-13,"[0, 1, 2, 3, 4]",4,loss,"{'0': {'alive': {'0': [[95000, True], [140000,...",success: completed - valorant final,"{'elims': 21, 'deaths': 17, 'assists': 0, 'hea...",1.0.0,...,primary,9,"{'26': {'final_time': 46900, 'initial_size': {...",vandal,True,"{'14200': 0, '46450': 1, '50800': 0, '60100': ...",ghost,14,298,94
2,1006,Split,13-4,"[0, 1, 2, 3, 4]",3,win,"{'0': {'alive': {'0': [[3600, True], [50600, F...",success: completed - valorant final,"{'elims': 32, 'deaths': 8, 'assists': 3, 'heal...",1.0.0,...,primary,4,"{'55': {'final_time': 78100, 'initial_size': {...",vandal,True,"{'1600': 0, '77900': 1, '78000': 0, '78100': 1...",sheriff,12,192,195
3,1011,Fracture,14-12,"[0, 1, 2, 3, 4]",0,win,"{'0': {'alive': {'0': [[110199, True], [175399...",success: completed - valorant final,"{'elims': 14, 'deaths': 12, 'assists': 12, 'he...",1.0.0,...,primary,6,"{'51': {'final_time': 117249, 'initial_size': ...",vandal,True,"{'33799': 0, '116999': 1, '117299': 0, '125449...",frenzy,10,1455,137
4,1011,Haven,13-10,"[0, 1, 2, 3, 4]",2,win,"{'0': {'alive': {'0': [[73399, True], [153799,...",success: completed - valorant final,"{'elims': 15, 'deaths': 16, 'assists': 6, 'hea...",1.0.0,...,primary,8,"{'33': {'final_time': 91399, 'initial_size': {...",vandal,True,"{'6199': 0, '91199': 1, '92299': 0, '92349': 1...",frenzy,9,1131,291
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
529,1010,Ascent,13-8,"[0, 1, 2, 3, 4]",0,win,"{'0': {'alive': {'0': [[49789, True], [71389, ...",success: completed - valorant postprocess,"{'elims': 12, 'deaths': 11, 'assists': 9, 'hea...",,...,primary,4,,vandal,,,ghost,7,507,157
530,1011,Bind,14-12,"[0, 1, 2, 3, 4]",0,win,"{'0': {'alive': {'0': [[44988, True], [88188, ...",success: completed - valorant postprocess,"{'elims': 24, 'deaths': 19, 'assists': 8, 'hea...",,...,primary,8,,phantom,,,frenzy,12,1268,105
531,1011,unknown,13-11,"[0, 1, 2, 3, 4]",0,win,"{'0': {'alive': {'0': [[9, True], [113209, Fal...",success: completed - valorant postprocess,"{'elims': 23, 'deaths': 16, 'assists': 6, 'hea...",,...,primary,8,,vandal,,,ghost,12,664,139
532,1018,Split,13-4,"[0, 1, 2, 3, 4]",3,win,"{'0': {'alive': {'0': [[199, True], [168177, F...",success: completed - valorant postprocess,"{'elims': 11, 'deaths': 14, 'assists': 6, 'hea...",,...,primary,3,,phantom,,,sheriff,6,294,39


In [7]:
flat1_df_stats.columns

Index(['user_id', 'map', 'score', 'allies', 'player', 'result', 'rounds',
       'status', 'totals', 'version', 'end_time', 'gametype', 'opponents',
       'processed', 'ally_score', 'player_ids', 'round_info', 'start_time',
       'best_weapon', 'round_totals', 'player_totals', 'opponent_score',
       'allies_onscreen', 'analysis_status', 'best_weapon_type',
       'best_weapon_elims', 'detections_totals', 'most_used_primary',
       'analysis_processed', 'opponents_onscreen', 'most_used_secondary',
       'best_weapon_type_elims', 'most_used_primary_seconds',
       'most_used_secondary_seconds'],
      dtype='object')

In [8]:
flat1_df_stats['best_weapon_type_elims']

0       5
1      14
2      12
3      10
4       9
       ..
529     7
530    12
531    12
532     6
533     9
Name: best_weapon_type_elims, Length: 534, dtype: int64

### Removing some cols we don't care about from the ROUND perspective
- Keep in mind the 'totals', 'result' are from the MATCH perspective
- even though the map is from match perspective I need it so I am adding it in (similar to userID)
- weapon stuff is all from match perspective, so I am dropping (some exists in rounds data anyway)
- I think its possible to do something with 'allies_onscreen', 'opponents_onscreen', and 'detections_totals' but I am dropping for now


In [9]:
flat1_df_stats = flat1_df_stats.drop(['totals','score', 'allies','result', 'status','gametype','version','end_time','opponents','processed','ally_score','start_time','opponent_score','detections_totals','best_weapon_elims','most_used_primary_seconds','analysis_processed','most_used_secondary_seconds','best_weapon_type_elims','opponents_onscreen','allies_onscreen','analysis_status','player_totals','best_weapon_type','best_weapon','most_used_secondary','most_used_primary','best_weapon_type_elims'], axis=1)
print(flat1_df_stats.shape)
flat1_df_stats

(534, 7)


Unnamed: 0,user_id,map,player,rounds,player_ids,round_info,round_totals
0,1006,Pearl,1,"{'7': {'alive': {'0': [[0, True], [4800, False...","{'0': {'role': 'ally', 'character': 'phoenix'}...","{'7': {'score': '4-3', 'ult_used': False, 'all...","{'7': {'elims': 0, 'deaths': 1, 'assists': 0, ..."
1,1006,Haven,4,"{'0': {'alive': {'0': [[95000, True], [140000,...","{'0': {'role': 'ally', 'character': 'kay/o'}, ...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 0, 'deaths': 1, 'assists': 0, ..."
2,1006,Split,3,"{'0': {'alive': {'0': [[3600, True], [50600, F...","{'0': {'role': 'ally', 'character': 'skye'}, '...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 1, 'deaths': 0, 'assists': 0, ..."
3,1011,Fracture,0,"{'0': {'alive': {'0': [[110199, True], [175399...","{'0': {'role': 'self', 'character': 'breach'},...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 0, 'deaths': 1, 'assists': 1, ..."
4,1011,Haven,2,"{'0': {'alive': {'0': [[73399, True], [153799,...","{'0': {'role': 'ally', 'character': 'omen'}, '...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 0, 'deaths': 1, 'assists': 1, ..."
...,...,...,...,...,...,...,...
529,1010,Ascent,0,"{'0': {'alive': {'0': [[49789, True], [71389, ...","{'0': {'role': 'self', 'character': 'astra'}, ...","{'0': {'score': '0-0', 'ally_side': 'attacker'...","{'0': {'elims': 0, 'deaths': 1, 'assists': 0, ..."
530,1011,Bind,0,"{'0': {'alive': {'0': [[44988, True], [88188, ...","{'0': {'role': 'self', 'character': 'viper'}, ...","{'0': {'score': '0-0', 'ally_side': 'defender'...","{'0': {'elims': 0, 'deaths': 1, 'assists': 1, ..."
531,1011,unknown,0,"{'0': {'alive': {'0': [[9, True], [113209, Fal...","{'0': {'role': 'self', 'character': 'viper'}, ...","{'0': {'score': '0-0', 'ally_side': 'unknown',...","{'0': {'elims': 1, 'deaths': 1, 'assists': 0, ..."
532,1018,Split,3,"{'0': {'alive': {'0': [[199, True], [168177, F...","{'0': {'role': 'ally', 'character': 'yoru'}, '...","{'0': {'score': '0-0', 'ally_side': 'defender'...","{'0': {'elims': 0, 'deaths': 1, 'assists': 0, ..."


In [10]:
flat1_df_stats['player_ids'][0]

{'0': {'role': 'ally', 'character': 'phoenix'},
 '1': {'role': 'self', 'character': 'jett'},
 '2': {'role': 'ally', 'character': 'gekko'},
 '3': {'role': 'ally', 'character': 'cypher'},
 '4': {'role': 'ally', 'character': 'astra'},
 '5': {'role': 'opponent', 'character': 'harbor'},
 '6': {'role': 'opponent', 'character': 'astra'},
 '7': {'role': 'opponent', 'character': 'jett'},
 '8': {'role': 'opponent', 'character': 'phoenix'},
 '9': {'role': 'opponent', 'character': 'killjoy'}}

### Flattening the player_ids col by 1 level
- this is getting all the teammates and opponents, as well as mapping the 'player' to an ally

In [11]:
# Create an empty list to hold the expanded player ids
expanded_player_ids_list = []

for i, player_ids in enumerate(flat1_df_stats['player_ids']):
    mapping = {}
    for player_id, player_info in player_ids.items():
        # If the role is 'self', map it to 'ally'
        role = 'ally' if player_info['role'] == 'self' else player_info['role']
        column_name = f"{role}{player_id}_character"
        mapping[column_name] = player_info['character']
    
    # If mapping is not empty
    if mapping:
        expanded_player_ids_list.append(mapping)
        
# Create the expanded_player_ids DataFrame
expanded_player_ids = pd.DataFrame(expanded_player_ids_list)

# Concatenate the original DataFrame with the expanded columns
flat2_df_stats_chars = pd.concat([flat1_df_stats.drop('player_ids', axis=1), expanded_player_ids], axis=1)

print(flat2_df_stats_chars.shape)
flat2_df_stats_chars

(534, 16)


Unnamed: 0,user_id,map,player,rounds,round_info,round_totals,ally0_character,ally1_character,ally2_character,ally3_character,ally4_character,opponent5_character,opponent6_character,opponent7_character,opponent8_character,opponent9_character
0,1006,Pearl,1,"{'7': {'alive': {'0': [[0, True], [4800, False...","{'7': {'score': '4-3', 'ult_used': False, 'all...","{'7': {'elims': 0, 'deaths': 1, 'assists': 0, ...",phoenix,jett,gekko,cypher,astra,harbor,astra,jett,phoenix,killjoy
1,1006,Haven,4,"{'0': {'alive': {'0': [[95000, True], [140000,...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 0, 'deaths': 1, 'assists': 0, ...",kay/o,yoru,jett,chamber,gekko,brimstone,killjoy,gekko,sova,jett
2,1006,Split,3,"{'0': {'alive': {'0': [[3600, True], [50600, F...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 1, 'deaths': 0, 'assists': 0, ...",skye,omen,killjoy,jett,breach,sage,raze,skye,omen,cypher
3,1011,Fracture,0,"{'0': {'alive': {'0': [[110199, True], [175399...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 0, 'deaths': 1, 'assists': 1, ...",breach,brimstone,cypher,raze,jett,raze,brimstone,cypher,harbor,breach
4,1011,Haven,2,"{'0': {'alive': {'0': [[73399, True], [153799,...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 0, 'deaths': 1, 'assists': 1, ...",omen,killjoy,fade,jett,skye,killjoy,astra,sova,jett,reyna
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
529,1010,Ascent,0,"{'0': {'alive': {'0': [[49789, True], [71389, ...","{'0': {'score': '0-0', 'ally_side': 'attacker'...","{'0': {'elims': 0, 'deaths': 1, 'assists': 0, ...",astra,kay/o,sova,killjoy,jett,omen,killjoy,reyna,kay/o,jett
530,1011,Bind,0,"{'0': {'alive': {'0': [[44988, True], [88188, ...","{'0': {'score': '0-0', 'ally_side': 'defender'...","{'0': {'elims': 0, 'deaths': 1, 'assists': 1, ...",viper,raze,skye,chamber,astra,raze,brimstone,viper,chamber,skye
531,1011,unknown,0,"{'0': {'alive': {'0': [[9, True], [113209, Fal...","{'0': {'score': '0-0', 'ally_side': 'unknown',...","{'0': {'elims': 1, 'deaths': 1, 'assists': 0, ...",viper,sova,chamber,jett,kay/o,reyna,brimstone,jett,sova,chamber
532,1018,Split,3,"{'0': {'alive': {'0': [[199, True], [168177, F...","{'0': {'score': '0-0', 'ally_side': 'defender'...","{'0': {'elims': 0, 'deaths': 1, 'assists': 0, ...",yoru,sage,raze,brimstone,skye,jett,skye,cypher,reyna,omen


In [12]:
flat2_df_stats_chars.columns


Index(['user_id', 'map', 'player', 'rounds', 'round_info', 'round_totals',
       'ally0_character', 'ally1_character', 'ally2_character',
       'ally3_character', 'ally4_character', 'opponent5_character',
       'opponent6_character', 'opponent7_character', 'opponent8_character',
       'opponent9_character'],
      dtype='object')

In [13]:
flat2_df_stats_chars['rounds'][0]

{'7': {'alive': {'0': [[0, True], [4800, False]],
   '1': [[0, True], [53800, False]],
   '2': [[0, True], [57000, False]],
   '3': [[0, True], [112400, False]],
   '4': [[0, True], [112400, False], [124400, True]],
   '5': [[0, True], [41600, False], [48600, True], [53800, False]],
   '6': [[0, True], [112400, False]],
   '7': [[0, True], [57600, False]],
   '8': [[0, True], [112400, False]],
   '9': [[0, True], [42400, False]]},
  'elims': [[42400,
    {'source': '4',
     'target': '9',
     'assisted': [],
     'headshot': True,
     'wallbang': False,
     'elim_number': 1,
     'first_blood': True,
     'team_credit': 'ally'}],
   [54000,
    {'source': '6',
     'target': '1',
     'assisted': [],
     'headshot': True,
     'wallbang': False,
     'elim_number': 1,
     'first_blood': False,
     'team_credit': 'opponent'}],
   [57600,
    {'source': '3',
     'target': '7',
     'assisted': [],
     'headshot': True,
     'wallbang': False,
     'elim_number': 1,
     'first_b

In [14]:
flat2_df_stats_chars.shape

(534, 16)

### Flattening the round_info/round_totals cols by 1 level
- This is where the DF expands into rounds being the rows instead of matches being the rows
- Have to parse the round info/round totals at the same time

In [15]:
# Initialize an empty dictionary to store map names and unique areas
map_areas = {}
for index, row in flat2_df_stats_chars.iterrows():
    # for all the rounds in each match
    for round_number, round_data in row['round_info'].items():
        # get the current/original row.
        new_row = row.to_dict()
        # exclude the original complex columns that we're flattening
        new_row.pop('rounds', None)
        new_row.pop('round_info', None)
        
        # get current map
        cur_map = new_row['map']

        # Initialize a new set for this map if it doesn't exist in map_areas
        if cur_map not in map_areas:
            map_areas[cur_map] = set()
            
        # Add round number to the new row
        new_row['round_number'] = round_number

        # Flatten rounds data into the new row
        for key, value in row['rounds'][round_number].items():
            if key == 'map_region':
                # value should be a list of lists where each sublist's second element is an area
                for sublist in value:
                    # Add the area to the set corresponding to the current map
                    map_areas[cur_map].add(sublist[1])

map_areas

{'Pearl': {'A Art',
  'A Dugout',
  'A Flowers',
  'A Link',
  'A Main',
  'A Restaurant',
  'A Secret',
  'A Site',
  'Attacker Side Spawn',
  'B Club',
  'B Hall',
  'B Link',
  'B Main',
  'B Ramp',
  'B Screen',
  'B Site',
  'B Tower',
  'B Tunnel',
  'Defender Side Records',
  'Defender Side Spawn',
  'Defender Side Water',
  'Mid Connector',
  'Mid Doors',
  'Mid Plaza',
  'Mid Shops',
  'Mid Top'},
 'Haven': {'A Garden',
  'A Link',
  'A Lobby',
  'A Long',
  'A Short',
  'A Site',
  'A Tower',
  'Attacker Side Spawn',
  'B Back',
  'B Site',
  'C Cubby',
  'C Link',
  'C Lobby',
  'C Long',
  'C Site',
  'C Window',
  'Defender Side Spawn',
  'Mid Courtyard',
  'Mid Doors',
  'Mid Window'},
 'Split': {'A Back',
  'A Lobby',
  'A Main',
  'A Rafters',
  'A Ramps',
  'A Screens',
  'A Sewer',
  'A Site',
  'A Tower',
  'Attacker Side Spawn',
  'B Alley',
  'B Back',
  'B Link',
  'B Lobby',
  'B Main',
  'B Rafters',
  'B Site',
  'B Stairs',
  'B Tower',
  'Defender Side Spawn'

In [16]:
flat2_df_stats_chars.shape

(534, 16)

In [17]:
flat2_df_stats_chars.head(10).shape

(10, 16)

In [18]:
flat2_df_stats_chars.iloc[10:20]

Unnamed: 0,user_id,map,player,rounds,round_info,round_totals,ally0_character,ally1_character,ally2_character,ally3_character,ally4_character,opponent5_character,opponent6_character,opponent7_character,opponent8_character,opponent9_character
10,1011,Split,1,"{'0': {'alive': {'0': [[110599, True], [166999...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 0, 'deaths': 1, 'assists': 1, ...",raze,killjoy,astra,sage,skye,raze,sage,astra,cypher,skye
11,1011,Lotus,3,"{'0': {'alive': {'0': [[107199, True], [189399...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 0, 'deaths': 1, 'assists': 0, ...",yoru,raze,breach,killjoy,omen,omen,killjoy,jett,breach,yoru
12,1011,Icebox,3,"{'0': {'alive': {'0': [[0, True], [48399, Fals...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 0, 'deaths': 1, 'assists': 0, ...",sage,jett,sova,killjoy,viper,harbor,sova,viper,killjoy,jett
13,1011,Haven,3,"{'0': {'alive': {'0': [[107199, True], [153799...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 2, 'deaths': 1, 'assists': 0, ...",kay/o,omen,jett,breach,cypher,jett,cypher,fade,breach,omen
14,1011,Pearl,2,"{'0': {'alive': {'0': [[112799, True], [170999...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 1, 'deaths': 1, 'assists': 0, ...",skye,sova,viper,phoenix,sage,astra,fade,cypher,neon,jett
15,1011,Icebox,3,"{'0': {'alive': {'0': [[482199, True], [527199...","{'0': {'score': '0-0', 'ult_used': False, 'all...","{'0': {'elims': 1, 'deaths': 1, 'assists': 0, ...",skye,viper,jett,killjoy,sova,jett,viper,reyna,sova,killjoy
16,1013,unknown,0,"{'21': {'alive': {'0': [[199200, True], [22920...","{'21': {'score': '11-10', 'ult_used': False, '...","{'21': {'elims': 0, 'deaths': 0, 'assists': 0,...",raze,jett,cypher,sova,kay/o,raze,kay/o,cypher,jett,viper
17,1013,unknown,2,"{'12': {'alive': {'0': [[57400, True], [102400...","{'12': {'score': '6-6', 'ult_used': False, 'al...","{'12': {'elims': 2, 'deaths': 1, 'assists': 0,...",jett,kay/o,brimstone,raze,viper,jett,kay/o,skye,raze,viper
18,1013,unknown,1,"{'7': {'alive': {'0': [[67600, True]], '1': [[...","{'7': {'score': '2-5', 'ult_used': False, 'all...","{'7': {'elims': 0, 'deaths': 0, 'assists': 0, ...",jett,raze,kay/o,brimstone,viper,jett,raze,skye,kay/o,viper
19,1013,unknown,1,{},{},{},jett,raze,kay/o,skye,brimstone,jett,raze,kay/o,cypher,skye


In [None]:
def split_list_based_on_time(lst, spike_time):
    before_spike = [entry for entry in lst if entry[0] <= spike_time]
    after_spike = [entry for entry in lst if entry[0] > spike_time]
    return before_spike, after_spike

In [57]:
new_rows = []

# for all the matches in flat2_df_stats_chars...
for index, row in flat2_df_stats_chars.iterrows():
# for i in range(1):
    # for all the rounds in each match
    for round_number, round_data in row['round_info'].items():
        # get the current/original row.
        new_row = row.to_dict()
        
        # get current map
        cur_map = new_row['map']
        
        # get current player
        cur_player = new_row['player']
        
        # Add round number to the new row
        new_row['round_number'] = round_number
        
        # Flatten round_info data into the new row
        for key, value in round_data.items():
            new_row[f'round_info_{key}'] = value
            
        # get side
        side = new_row['round_info_ally_side']
        
        # get won 
        won = new_row['round_info_round_won']
        
        # Flatten rounds data into the new row
        for key, value in row['rounds'][round_number].items():
            
            ## UNIQUE ##
            if key == 'spike_planted':
                rounds_spike_planted = value
                print(value)

                # Initialize default values
                spike_yn = False
                time = value[0][0]

                # Iterate over the list to find the first instance of spike planted
                for item in value:
                    if item[1]:  # Check if spike was planted
                        spike_yn = item[1]
                        time = item[0]
                        break  # Stop iterating after finding the first instance

                new_row['spike_planted'] = spike_yn
                new_row['spike_time'] = time                

            if key == 'map_region':
                rounds_map_region = value
                
                # % map covered
                avalible_places = map_areas[cur_map]
                visited_places = set()

                for entry in rounds_map_region:
                    place = entry[1]
                    if place in avalible_places:
                        visited_places.add(place)

                metric = len(visited_places) / len(avalible_places)
                
                new_row[f'ally{cur_player}_prec_map_covered'] = metric
                
                # 'movement' metric (based on transisions between areas)
                transitions = set()
                previous_place = None
                movement_metric = 0

                for entry in rounds_map_region:
                    place = entry[1]
                    if previous_place is not None:
                        transition = (previous_place, place)
                        transitions.add(transition)
                    previous_place = place

                movement_metric = len(transitions)
                f'ally{cur_player}_movement_metric'
                new_row[f'ally{cur_player}_movement_metric'] = movement_metric
            
            ## MOST TIME/MAX ##
            if key == 'inv_primary':
                rounds_inv_primary = value
                
                current_gun = None
                longest_duration = 0
                start_time = rounds_inv_primary[0][0]
                end_time = rounds_inv_primary[-1][0]

                longest_gun = None

                for entry in rounds_inv_primary:
                    timestamp = entry[0]
                    gun = entry[1]

                    if current_gun is None:
                        current_gun = gun
                    elif gun != current_gun:
                        duration = timestamp - start_time
                        if duration > longest_duration:
                            longest_duration = duration
                            longest_gun = current_gun
                            gun_time = (longest_gun, longest_duration)

                        current_gun = gun
                        start_time = timestamp
                  
                new_row[f'ally{cur_player}_longest_gun_primary'] = longest_gun
                
            if key == 'inv_secondary':
                rounds_inv_secondary = value
                
                current_gun = None
                longest_duration = 0
                start_time = rounds_inv_secondary[0][0]
                end_time = rounds_inv_secondary[-1][0]

                longest_gun = None

                for entry in rounds_inv_secondary:
                    timestamp = entry[0]
                    gun = entry[1]

                    if current_gun is None:
                        current_gun = gun
                    elif gun != current_gun:
                        duration = timestamp - start_time
                        if duration > longest_duration:
                            longest_duration = duration
                            longest_gun = current_gun
                            gun_time = (longest_gun, longest_duration)

                        current_gun = gun
                        start_time = timestamp
                   
                new_row[f'ally{cur_player}_longest_gun_secondary'] = longest_gun
                
            if key == 'inv_state':
                rounds_inv_state = value

                current_inv_state = None
                longest_duration = 0
                longest_inv_state = None

                if rounds_inv_state:  # Check if the list is not empty
                    start_time = rounds_inv_state[0][0]
                    end_time = rounds_inv_state[-1][0]

                    for entry in rounds_inv_state:
                        timestamp = entry[0]
                        inv_state = entry[1]

                        if current_inv_state is None:
                            current_inv_state = inv_state
                        elif inv_state != current_inv_state:
                            duration = timestamp - start_time
                            if duration > longest_duration:
                                longest_duration = duration
                                longest_inv_state = current_inv_state

                            current_inv_state = inv_state
                            start_time = timestamp

                new_row[f'ally{cur_player}_longest_inv_state'] = longest_inv_state

            
            ## COUNT ##
            if key == 'ult_state':
                rounds_ult_state = value
                
                for player, ultimate_data in rounds_ult_state.items():
                    ultimate_usage = 0
                    previous_state = None
                    for entry in ultimate_data:
                        ultimate_state = entry[1]
                        if previous_state is not None and previous_state is True and ultimate_state is False:
                            ultimate_usage += 1
                        previous_state = ultimate_state
                    if int(player) < 5:
                        new_row[f'ally{player}_ultimate_usage'] = ultimate_usage
                    else:
                        new_row[f'opponent{int(player)-5}_ultimate_usage'] = ultimate_usage
            
            ## COUNT ##
            if key == 'elims':
                rounds_elims = value
                # Initialize the stats for all players
                player_stats = {
                    str(player): {"elims": 0, "deaths": 0, "assists": 0, "headshots": 0, "wallbangs": 0, "first_bloods": 0} 
                    for player in range(10)}
                
                ally_deaths = 0
                opponent_deaths = 0
                
                # Then, for each elimination:
                for elimination in rounds_elims:
                    elim_data = elimination[1]
                    source = elim_data['source']
                    target = elim_data['target']
                    assisted = elim_data['assisted']

                    # Increase elim count for the source player
                    player_stats[source]['elims'] += 1

                    # Increase death count for the target player
                    player_stats[target]['deaths'] += 1
                    
                    if int(target) < 5:  # If the target is an ally
                        ally_deaths += 1
                    else:  # If the target is an opponent
                        opponent_deaths += 1

                    # Increase assist count for all assisted players
                    for assist_player in assisted:
                        player_stats[assist_player]['assists'] += 1

                    # If the elimination was a headshot, increase headshot count for the source player
                    if elim_data['headshot']:
                        player_stats[source]['headshots'] += 1

                    # If the elimination was a wallbang, increase wallbang count for the source player
                    if elim_data['wallbang']:
                        player_stats[source]['wallbangs'] += 1

                    # If the elimination was the first blood, increase first_blood count for the source player
                    if elim_data['first_blood']:
                        player_stats[source]['first_bloods'] += 1

                    # Add these stats to the new_row:
                    for player, stats in player_stats.items():
                        prefix = 'ally' if int(player) < 5 else 'opponent'
                        player_num = player if int(player) < 5 else str(int(player) - 5)
                        for stat, count in stats.items():
                            new_row[f'{prefix}{player_num}_{stat}'] = count
                    
                    new_row['all_ally_dead'] = ally_deaths >= 5  # All allies are dead if there are 5 or more ally deaths
                    new_row['all_opponent_dead'] = opponent_deaths >= 5  # All opponents are dead if there are 5 or more opponent deaths
                    
                    def get_win_condition(row):
                        if not row['spike_planted']:
                            return "elimination"
                        elif row['side'] == 'attacker':
                            return "elimination" if row['all_opponent_dead'] else "spike"
                        else:  # row['side'] == 'defender'
                            return "spike" if not row['all_ally_dead'] else "elimination"

            # Then, after you have parsed and set all the other data in new_row, you would add:

            new_row['win_condition'] = get_win_condition(new_row)
            
            ## AVERAGES ## 
            if key == 'ammo_reserve':
                rounds_ammo_reserve = value
                sum_ammo_reserve = 0
                num_entries = len(rounds_ammo_reserve)

                if num_entries > 0:
                    for entry in rounds_ammo_reserve:
                        ammo_reserve_value = entry[1]
                        sum_ammo_reserve += ammo_reserve_value

                    average_ammo_reserve = sum_ammo_reserve / num_entries

                else:
                    average_ammo_reserve = 0
                    
            
                new_row[f'ally{cur_player}_avg_ammo_reserve'] = average_ammo_mag
                
            if key == 'ammo_mag':
                rounds_ammo_mag = value
                sum_ammo_mag = 0
                num_entries = len(rounds_ammo_mag)
                
                if num_entries > 0:
                    for entry in rounds_ammo_mag:
                        ammo_ammo_mag = entry[1]
                        sum_ammo_mag += ammo_ammo_mag

                    average_ammo_mag = sum_ammo_mag/ num_entries
                else:
                    average_ammo_mag = 0
                
                new_row[f'ally{cur_player}_avg_ammo_mag'] = average_ammo_mag
                
            if key == 'credits':
                rounds_credits = value
                sum_credits = 0
                num_entries = len(rounds_credits)
                
                if num_entries > 0:
                    for entry in rounds_credits:
                        credits_value = entry[1]
                        sum_credits += credits_value

                    average_credits = sum_credits / num_entries
                else:
                    average_credits = 0
                
                new_row[f'ally{cur_player}_avg_credits'] = average_credits
                
            if key == 'shield':
                rounds_shield = value
                sum_shield = 0
                num_entries = len(rounds_shield)
                
                if num_entries > 0:
                    for entry in rounds_shield:
                        shield_value = entry[1]
                        sum_shield += shield_value

                    average_shield = sum_shield / num_entries
                else:
                    average_shield = 0
                
                new_row[f'ally{cur_player}_avg_shield'] = average_shield
            
            if key == 'health':
                rounds_health = value
                for player, health_data in rounds_health.items():
                    total_health = 0
                    num_entries = len(health_data)
                    if num_entries > 0:
                        for entry in health_data:
                            health_value = entry[1]
                            total_health += health_value
                        average_health = total_health / num_entries
                    else:
                        average_health = 0
                    new_row[f'ally{player}_avg_health'] = average_health
            
            ## MAXS ## 
            if key == 'loadout_value':
                rounds_loadout_value = value
                max_loadout_value = max(rounds_loadout_value, key=lambda x: x[1])
                max_loadout_value = max_loadout_value[1]
                
                new_row[f'ally{cur_player}_max_loadout_value'] = max_loadout_value
            
            ## TOTALS ##         
            if key == 'ability_charges_1':
                rounds_ability_charges_1 = value
                total_ability_usage_1 = 0

                for i in range(len(rounds_ability_charges_1) - 1):
                    if rounds_ability_charges_1[i + 1][1] == 1:
                        total_ability_usage_1 += 1
                
                new_row[f'ally{cur_player}_total_ability_usage_1'] = total_ability_usage_1
                
            if key == 'ability_charges_2':
                rounds_ability_charges_2 = value
                total_ability_usage_2 = 0

                for i in range(len(rounds_ability_charges_2) - 1):
                    if rounds_ability_charges_2[i + 1][1] == 1:
                        total_ability_usage_2 += 1
                
                new_row[f'ally{cur_player}_total_ability_usage_2'] = total_ability_usage_2
                
            if key == 'ability_charges_3':
                rounds_ability_charges_3 = value
                total_ability_usage_3 = 0

                for i in range(len(rounds_ability_charges_3) - 1):
                    if rounds_ability_charges_3[i + 1][1] == 1:
                        total_ability_usage_3 += 1
                
                new_row[f'ally{cur_player}_total_ability_usage_3'] = total_ability_usage_3
                
            if key == 'ability_charges_4':
                rounds_ability_charges_4 = value
                total_ability_usage_4 = 0

                for i in range(len(rounds_ability_charges_4) - 1):
                    if rounds_ability_charges_4[i + 1][1] == 1:
                        total_ability_usage_4 += 1
                
                new_row[f'ally{cur_player}_total_ability_usage_4'] = total_ability_usage_4
                
            if key == 'firing_state':
                firing_state = value

                total_fire_time = 0
                start_fire = 0

                for i in range(len(firing_state) - 1):
                    if firing_state[i + 1][1] == True:
                        start_fire = firing_state[i + 1][0]
                    elif firing_state[i + 1][1] == False:
                        end_fire = firing_state[i + 1][0]
                        firing = end_fire - start_fire
                        total_fire_time += firing
                
                new_row[f'ally{cur_player}_total_firing_time'] = total_fire_time
            else:
                new_row[f'rounds_{key}'] = value


        # Getting rid of cols that we just parsed or dont need 
        new_row.pop('rounds_firing_state', None)
        new_row.pop('rounds_ability_charges_1', None)
        new_row.pop('rounds_ability_charges_2', None)
        new_row.pop('rounds_ability_charges_3', None)
        new_row.pop('rounds_ability_charges_4', None)
        new_row.pop('rounds_assisted_count', None)
        new_row.pop('rounds_assists_count', None)
        new_row.pop('rounds_loadout_value', None)
        new_row.pop('rounds_ammo_reserve', None)
        new_row.pop('rounds_map_region', None)
        new_row.pop('rounds_phases', None)
        new_row.pop('rounds_ammo_mag', None)
        new_row.pop('rounds_credits', None)
        new_row.pop('rounds_shield', None)
        new_row.pop('rounds_credits', None)
        new_row.pop('rounds_inv_primary', None)
        new_row.pop('rounds_inv_secondary', None)
        new_row.pop('rounds_inv_state', None)
        new_row.pop('rounds_health', None)
        new_row.pop('rounds_ult_state', None)
        new_row.pop('round_totals', None)
        new_row.pop('rounds_alive', None)
        new_row.pop('round_info_ally_score', None)
        new_row.pop('round_info_opponent_score', None)
        new_row.pop('rounds_elims', None)
        new_row.pop('rounds_elims_count', None)
        new_row.pop('rounds_spike_planted', None)
        # exclude the original complex columns that we're flattening
        new_row.pop('rounds', None)
        new_row.pop('round_info', None)
        # Getting rid of cols that we dont want/need    
        new_row.pop('round_info_score', None)
        new_row.pop('round_info_round_scored', None)
        new_row.pop('round_info_buy_start', None)
        new_row.pop('round_info_ult_used', None)
        new_row.pop('round_info_spike_planted', None)
    
        # fixing the naming of the players
        def adjust_keys(new_row, cur_player):
            adjusted_row = {}
            cur_player = int(cur_player)

            allies_count = 5  # define total number of allies, including 'self'

            for key, value in new_row.items():
                if key.startswith(f'ally{cur_player}'):
                    adjusted_key = key.replace(f'ally{cur_player}', 'self')
                elif key.startswith('ally'):
                    # correctly parse the ally number as an integer and remaining part of the key
                    ally_number = int(key[4:].split('_')[0])
                    ally_remaining = '_'.join(key.split('_')[1:])

                    # calculate new ally number
                    new_ally_number = (ally_number - cur_player) % allies_count

                    # construct new key with updated ally number
                    adjusted_key = 'ally' + str(new_ally_number) + '_' + ally_remaining
                else:
                    adjusted_key = key

                adjusted_row[adjusted_key] = value

            return adjusted_row

        new_row = adjust_keys(new_row, cur_player)
        
        # Append this new row to the list
        new_rows.append(new_row)

# Convert the list of new rows into a dataframe
new_df = pd.DataFrame(new_rows)

KeyError: 'spike_planted'

### Data cleaning
- I noticed some NaNs. This happens when there is missing information
- self_longest_gun_primary, self_longest_inv_state, self_longest_gun_secondaryis coming up as None but i think it should be 'none', so i am replacing with that
- After investigating it seems like the elims data isnt always captured, its only 123 rows so im going to drop it

In [40]:
new_df['self_longest_gun_primary'] = new_df['self_longest_gun_primary'].fillna('none')
new_df['self_longest_gun_secondary'] = new_df['self_longest_gun_secondary'].fillna('none')
new_df['self_longest_inv_state'] = new_df['self_longest_inv_state'].fillna('none')

In [41]:
cols_with_nans = new_df.columns[new_df.isnull().any()].tolist()

# Print the list of columns with NaN values
print(cols_with_nans)

['round_info_round_won', 'ally4_elims', 'ally4_deaths', 'ally4_assists', 'ally4_headshots', 'ally4_wallbangs', 'ally4_first_bloods', 'self_elims', 'self_deaths', 'self_assists', 'self_headshots', 'self_wallbangs', 'self_first_bloods', 'ally1_elims', 'ally1_deaths', 'ally1_assists', 'ally1_headshots', 'ally1_wallbangs', 'ally1_first_bloods', 'ally2_elims', 'ally2_deaths', 'ally2_assists', 'ally2_headshots', 'ally2_wallbangs', 'ally2_first_bloods', 'ally3_elims', 'ally3_deaths', 'ally3_assists', 'ally3_headshots', 'ally3_wallbangs', 'ally3_first_bloods', 'opponent0_elims', 'opponent0_deaths', 'opponent0_assists', 'opponent0_headshots', 'opponent0_wallbangs', 'opponent0_first_bloods', 'opponent1_elims', 'opponent1_deaths', 'opponent1_assists', 'opponent1_headshots', 'opponent1_wallbangs', 'opponent1_first_bloods', 'opponent2_elims', 'opponent2_deaths', 'opponent2_assists', 'opponent2_headshots', 'opponent2_wallbangs', 'opponent2_first_bloods', 'opponent3_elims', 'opponent3_deaths', 'oppon

In [42]:
# Find rows with NaN values
nan_rows = new_df[new_df.isnull().any(axis=1)]

# Print the rows with NaN values
nan_rows

Unnamed: 0,user_id,map,player,ally4_character,self_character,ally1_character,ally2_character,ally3_character,opponent5_character,opponent6_character,...,self_avg_ammo_reserve,self_total_firing_time,self_longest_gun_secondary,self_max_loadout_value,spike_planted,spike_time,self_total_ability_usage_1,self_total_ability_usage_2,self_total_ability_usage_3,self_total_ability_usage_4
235,1011,Lotus,3,breach,killjoy,omen,yoru,raze,omen,killjoy,...,15.785714,8400,none,2600.0,False,281999,0,0,0,0
238,1011,Lotus,3,breach,killjoy,omen,yoru,raze,omen,killjoy,...,25.857143,5200,none,4100.0,False,559599,0,0,0,1
240,1011,Lotus,3,breach,killjoy,omen,yoru,raze,omen,killjoy,...,21.157895,9400,none,3900.0,False,760999,0,0,0,0
244,1011,Lotus,3,breach,killjoy,omen,yoru,raze,omen,killjoy,...,24.500000,2200,none,4500.0,False,1058399,0,1,0,0
357,1013,unknown,2,kay/o,brimstone,raze,viper,jett,jett,kay/o,...,12.500000,0,classic,928.0,False,310600,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6359,1012,Fracture,0,brimstone,killjoy,skye,sage,jett,killjoy,breach,...,0.000000,0,none,0.0,False,13,0,0,0,0
6958,998,Ascent,1,sage,brimstone,fade,chamber,raze,reyna,fade,...,20.666667,0,none,4650.0,False,537007,0,0,0,0
7663,1001,Icebox,4,harbor,sova,jett,killjoy,viper,killjoy,jett,...,21.500000,0,none,4200.0,False,103357,0,0,0,0
7670,1001,Icebox,4,harbor,sova,jett,killjoy,viper,killjoy,jett,...,25.000000,0,none,4600.0,False,333358,0,0,0,0


In [43]:
# nan_rows = new_df[new_df['self_longest_inv_state'].isnull()]
# nan_rows['self_longest_inv_state']

In [44]:
clean_df = new_df.dropna()
clean_df = clean_df.reset_index(drop=True)

In [45]:
# Find rows with NaN values
nan_rows = clean_df[clean_df.isnull().any(axis=1)]

# Print the rows with NaN values
nan_rows

Unnamed: 0,user_id,map,player,ally4_character,self_character,ally1_character,ally2_character,ally3_character,opponent5_character,opponent6_character,...,self_avg_ammo_reserve,self_total_firing_time,self_longest_gun_secondary,self_max_loadout_value,spike_planted,spike_time,self_total_ability_usage_1,self_total_ability_usage_2,self_total_ability_usage_3,self_total_ability_usage_4


In [46]:
clean_df.shape

(7923, 110)

In [49]:
new_df['spike_planted']

<bound method Series.unique of 0       False
1       False
2       False
3       False
4       False
        ...  
8041    False
8042    False
8043    False
8044    False
8045    False
Name: spike_planted, Length: 8046, dtype: bool>

In [37]:
# Parsing initial DataFrame
for index, row in clean_df.iterrows():
    if row['spike_planted']:
        print('h')
#         # Spike was planted, so we split the data
#         pre_spike_end = row['spike_time'] 
#         df_pre_spike = clean_df[(clean_dfdf['round_info_round_start'] <= pre_spike_end)]
#         df_post_spike = clean_df[(clean_df['round_info_round_end'] > pre_spike_end)]
        
#         df_pre_spike['phase'] = 'pre_spike'
#         df_post_spike['phase'] = 'post_spike'

#     else:
#         # Spike was not planted, so all data belongs to pre-spike phase
#         df_pre_spike = clean_df.copy()
#         df_pre_spike['phase'] = 'pre_spike'

# # Combining dataframes
# df_spike = pd.concat([df_pre_spike, df_post_spike], ignore_index=True)


In [None]:
df_spike

In [34]:
df_pre_spike

Unnamed: 0,user_id,map,player,ally4_character,self_character,ally1_character,ally2_character,ally3_character,opponent5_character,opponent6_character,...,self_total_firing_time,self_longest_gun_secondary,self_max_loadout_value,spike_planted,spike_time,self_total_ability_usage_1,self_total_ability_usage_2,self_total_ability_usage_3,self_total_ability_usage_4,phase
0,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,2200,none,3434.0,False,0,0,1,1,0,pre_spike
1,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,8600,classic,5100.0,False,125000,1,0,1,1,pre_spike
2,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,9800,classic,5100.0,False,204600,1,1,1,0,pre_spike
3,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,7400,classic,5100.0,False,304600,1,0,1,0,pre_spike
4,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,4400,classic,6750.0,False,369400,0,1,1,0,pre_spike
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7918,1018,Lotus,2,killjoy,sage,viper,breach,yoru,fade,harbor,...,4999,none,4300.0,False,1283821,1,4,3,0,pre_spike
7919,1018,Lotus,2,killjoy,sage,viper,breach,yoru,fade,harbor,...,7798,none,4500.0,False,1375208,2,2,2,0,pre_spike
7920,1018,Lotus,2,killjoy,sage,viper,breach,yoru,fade,harbor,...,10199,none,4700.0,False,1439799,2,1,1,0,pre_spike
7921,1018,Lotus,2,killjoy,sage,viper,breach,yoru,fade,harbor,...,10399,none,4700.0,False,1504791,2,1,2,0,pre_spike


In [35]:
df_post_spike

NameError: name 'df_post_spike' is not defined

In [373]:
clean_df.to_csv('clean_data_norm_round_total.csv', index=False)

In [374]:
df1 = pd.read_csv('clean_data_norm_round_total.csv')
df1

Unnamed: 0,user_id,map,player,ally4_character,self_character,ally1_character,ally2_character,ally3_character,opponent5_character,opponent6_character,...,self_longest_gun_primary,self_avg_ammo_reserve,self_total_firing_time,self_longest_gun_secondary,self_max_loadout_value,spike_planted,self_total_ability_usage_1,self_total_ability_usage_2,self_total_ability_usage_3,self_total_ability_usage_4
0,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,vandal,24.000000,2200,none,3434.0,False,0,1,1,0
1,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,vandal,19.400000,8600,classic,5100.0,False,1,0,1,1
2,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,vandal,18.222222,9800,classic,5100.0,False,1,1,1,0
3,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,none,16.800000,7400,classic,5100.0,False,1,0,1,0
4,1006,Pearl,1,phoenix,jett,gekko,cypher,astra,harbor,astra,...,vandal,14.142857,4400,classic,6750.0,False,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7918,1018,Lotus,2,killjoy,sage,viper,breach,yoru,fade,harbor,...,none,21.000000,4999,none,4300.0,False,1,4,3,0
7919,1018,Lotus,2,killjoy,sage,viper,breach,yoru,fade,harbor,...,vandal,17.352941,7798,none,4500.0,False,2,2,2,0
7920,1018,Lotus,2,killjoy,sage,viper,breach,yoru,fade,harbor,...,none,17.785714,10199,none,4700.0,False,2,1,1,0
7921,1018,Lotus,2,killjoy,sage,viper,breach,yoru,fade,harbor,...,vandal,16.117647,10399,none,4700.0,False,2,1,2,0
