In [1]:
# Import Splinter and BeautifulSoup
import os
import pandas as pd
import json
import time
import re
from datetime import datetime

### list all files

In [2]:
def get_file_dates(a):
    strs = re.findall('(\d+)', a)
    if len(strs) >= 1:
        return int(strs[0])
    else:
        return 0

In [121]:
all_files = os.listdir()
filenames = []
for file in all_files:
    if 'player_mapstat' in file:
        filenames.append(file)
filenames = sorted(filenames, key=get_file_dates, reverse=True)
filenames

['player_mapstat_20171231_20171215.csv',
 'player_mapstat_20171215_20171201.csv',
 'player_mapstat_20171201_20171115.csv',
 'player_mapstat_20171115_20171101.csv',
 'player_mapstat_20171101_20171015.csv',
 'player_mapstat_20171015_20171001.csv',
 'player_mapstat_20171001_20170915.csv',
 'player_mapstat_20170915_20170901.csv',
 'player_mapstat_20170901_20170815.csv',
 'player_mapstat_20170815_20170801.csv',
 'player_mapstat_20170731_20170715.csv',
 'player_mapstat_20170715_20170701.csv',
 'player_mapstat_20170701_20170615.csv',
 'player_mapstat_20170615_20170601.csv',
 'player_mapstat_20170601_20170515.csv',
 'player_mapstat_20170515_20170501.csv',
 'player_mapstat_20170501_20170415.csv',
 'player_mapstat_20170415_20170401.csv',
 'player_mapstat_20170401_20170315.csv',
 'player_mapstat_20170315_20170301.csv',
 'player_mapstat_20170301_20170215.csv',
 'player_mapstat_20170215_20170201.csv',
 'player_mapstat_20170201_20170115.csv',
 'player_mapstat_20170115_20170105.csv',
 'player_mapstat

In [4]:
winner_filename = 'winners.csv'
winner_columns = ['winner', 'winner_id']
all_team_filename = 'all_teams.csv'
team_columns = ['teamname']

In [28]:
def append_info_(df, filename, col):
    # append winners to 'winners.csv'
    if os.path.exists(filename):
        ddf = pd.read_csv(filename)
    else:
        ddf = pd.DataFrame(columns=col)
        
    ddf = pd.concat([ddf, df[col]])
    ddf = ddf.drop_duplicates()
    ddf.to_csv(filename, index=False)
#    df = df[col].drop_duplicates()
#    df.to_csv(filename, mode='a', header=False)

# delete/create csv files 
def create_team_map():
    # 
    df = pd.DataFrame(columns=winner_columns)
    df.to_csv(winner_filename, index=False)
    #
    df = pd.DataFrame(columns=team_columns)
    df.to_csv(all_team_filename, index=False)

# append all winners to 'winners.csv' and 'all_teams.csv'
# append all teams to 'winners.csv' and 'all_teams.csv'
def append_team_map(df):
    # append winners to 'winners.csv'
    append_info_(df, winner_filename, winner_columns)
    # append all teams to 'all_team.csv'
    append_info_(df, all_team_filename, team_columns)

# return teams NOT in winers.
def get_missing_teams():
    # all teams
    if not os.path.exists(all_team_filename):
        return pd.DataFrame(columns=team_columns)
    all_team_df = pd.read_csv(all_team_filename)
    all_team_df = all_team_df.drop_duplicates(ignore_index=True)

    # winners:
    if not os.path.exists(winner_filename):
        return all_team_df[team_columns] 
    winner_df = pd.read_csv(winner_filename)
    winner_df = winner_df.drop_duplicates(ignore_index=True)

    winners = winner_df['winner'].tolist()
    b = all_team_df['teamname'].isin(winners)
    missing_df = all_team_df[b == False]
    return missing_df

## all winners must be in teamname
def get_ghost_winners(df):
    all_teams = set(df['teamname'].tolist())
    b = df['winner'].isin(all_teams)
    ghost_winners_df = df.loc[b == False]
    return ghost_winners_df

# 
def input_player_mapstat(filename):
    df = pd.read_csv(filename)
    df['winner'] = list(map(lambda x: x.replace('%20', ' '), df['winner']))
#    ghost_df = get_ghost_winners(df)
#    if ghost_df.shape[0]:
#        print(ghost_df)
#    else:
#    append_team_map(df)
    return df

## work bench
#### assemble files

In [107]:
input_files = [
 'player_mapstat_20161229_20161201.csv',
 'player_mapstat_20161130_20161101.csv',
 'player_mapstat_20161031_20161001.csv',
 'player_mapstat_20161001_20160901.csv',
 'player_mapstat_20160913_20160901.csv',
 'player_mapstat_20160907_20160901.csv',
 'player_mapstat_20160904_20160901.csv',
 'player_mapstat_20160901_20160801.csv',
 'player_mapstat_20160731_20160701.csv',
 'player_mapstat_20160701_20160601.csv',
 'player_mapstat_20160601_20160501.csv',
 'player_mapstat_20160501_20160401.csv',
 'player_mapstat_20160401_20160301.csv',
 'player_mapstat_20160301_20160201.csv',
 'player_mapstat_20160201_20160103.csv'
]

In [108]:
first = True
ddf = None
start_date = None
for file in input_files:
    df = input_player_mapstat(file)
    size = df.shape[0]
    start = df[['date']].iloc[0]
    if first:
        start_date = start
    end = df[['date']].iloc[size -1]
    s_df = df.drop_duplicates()
    print(f'{start[0]} {end[0]}  {df.shape[0]} {s_df.shape[0]} {file}')
    if first:
        ddf = s_df
        first = False
    else:
        ddf = pd.concat([ddf, s_df])
ddf = ddf.drop_duplicates()

2016-12-29 2016-12-01  5242 5242 player_mapstat_20161229_20161201.csv
2016-11-30 2016-11-01  11480 11480 player_mapstat_20161130_20161101.csv
2016-10-31 2016-10-01  15525 15515 player_mapstat_20161031_20161001.csv
2016-10-01 2016-09-11  8787 8787 player_mapstat_20161001_20160901.csv
2016-09-13 2016-09-01  4796 4796 player_mapstat_20160913_20160901.csv
2016-09-07 2016-09-07  50 50 player_mapstat_20160907_20160901.csv
2016-09-04 2016-09-04  200 200 player_mapstat_20160904_20160901.csv
2016-09-01 2016-08-01  10747 10747 player_mapstat_20160901_20160801.csv
2016-07-31 2016-07-01  8093 8093 player_mapstat_20160731_20160701.csv
2016-07-01 2016-06-01  7023 7023 player_mapstat_20160701_20160601.csv
2016-06-01 2016-05-01  8628 8628 player_mapstat_20160601_20160501.csv
2016-05-01 2016-04-01  8806 8806 player_mapstat_20160501_20160401.csv
2016-04-01 2016-03-01  10507 10507 player_mapstat_20160401_20160301.csv
2016-03-01 2016-02-01  8918 8918 player_mapstat_20160301_20160201.csv
2016-02-01 2016-01

In [12]:
output_filename = 'player_mapstat_2016.csv'
output_filename

'player_mapstat_2016.csv'

In [13]:
ddf.to_csv(output_filename, index=False)

In [110]:
new_ddf = ddf.copy()

### winner name fix
#### unify the teamname and winner

In [57]:
def get_condidate(winner, ghost_df):
    team_df = ghost_df.loc[ghost_df['winner'] == winner]
    teams = list(set(team_df['teamname'].tolist()))

    g_strs = re.split('%..', winner)

    candidate = ""
    m = -1
    for team in teams:
        n = 0
        for g_str in g_strs:
            if len(g_str) == 0:
                continue
            if g_str in team:
                n += len(g_str)
        if n > m:
            m = n
            candidate = team
    condidate = {'winner' : winner, 'team' : candidate, 'confidence' : m}
    return condidate

In [105]:
def check_ghosts(df):
    ghost_df = get_ghost_winners(df)
    ghost_df = ghost_df[['teamname', 'winner']]
    ghost_df = ghost_df.drop_duplicates()
    
    ghosts = list(set(ghost_df['winner'].tolist()))

    condidates = []
    for winner in ghosts:
        condidate = get_condidate(winner, ghost_df)
        condidates.append(condidate)

    for condidate in condidates:
        w = condidate['winner']
        name = condidate['team']
        conf = condidate['confidence']
        print(f"{conf}: '{w}', '{name}'")
    return condidates

In [87]:
def fix_winners(condidates, ddf):
    winners = ddf['winner'].tolist()
    for condidate in condidates:
        winners = [condidate['team'] if e == condidate['winner'] else e for e in winners]
    ddf['winner'] = winners
    return ddf

In [156]:
filename = 'player_mapstat_2018.csv'

In [157]:
new_ddf = pd.read_csv(filename)
new_ddf.tail(3)

Unnamed: 0,player_id,player,match_id,date,kills,hs,assists,flash_assists,kdratio,deaths,...,adr,fkdiff,rating,teamname,winner,winner_id,total_rounds,mapstatid,win_rounds,map
180157,11358,0i,2318425,2018-01-04,10,7,4,1,4,18,...,59,-2,0.68,New4,ViCi,7606,22.0,59512,16,Inferno
180158,10722,gas,2318425,2018-01-04,10,5,1,0,1,20,...,57,-2,0.63,New4,ViCi,7606,22.0,59512,16,Inferno
180159,11574,xiaosaGe,2318425,2018-01-04,10,5,2,0,2,20,...,69,-4,0.53,New4,ViCi,7606,22.0,59512,16,Inferno


In [147]:
c = check_ghosts(new_ddf)
c

[]

In [140]:
new_ddf = fix_winners(c, new_ddf)
new_ddf.head(3)

Unnamed: 0,player_id,player,match_id,date,kills,hs,assists,flash_assists,kdratio,deaths,...,adr,fkdiff,rating,teamname,winner,winner_id,total_rounds,mapstatid,win_rounds,map
0,7026,DD,2318399,2017-12-31,18,10,4,1,4,10,...,82,3,1.53,TYLOO,TYLOO,4863,23.0,59511,16,Overpass
1,7131,BnTeT,2318399,2017-12-31,19,9,7,2,7,16,...,103,3,1.43,TYLOO,TYLOO,4863,23.0,59511,16,Overpass
2,8605,somebody,2318399,2017-12-31,21,11,0,0,0,13,...,80,2,1.42,TYLOO,TYLOO,4863,23.0,59511,16,Overpass


In [141]:
new_ddf.to_csv(filename, index=False)

# check the difference between two versions

In [158]:
filename = 'player_mapstat_2018.csv.zip'

In [159]:
new_df = pd.read_csv(filename)
new_df.shape

(180160, 21)

In [160]:
old_df = pd.read_csv('../Resources/' + filename)
old_df.shape

(180160, 21)

In [164]:
l = []
for i in range(old_df.shape[0] - 1):
    p1 = old_df.iloc[i]
    b = not p1.equals(new_df.iloc[i])
    l.append(b)
    if (b):
        print(f'idx={i}, is_equal={not b}')

idx=58617, is_equal=False
idx=58618, is_equal=False
idx=58619, is_equal=False
idx=58620, is_equal=False
idx=58621, is_equal=False
idx=58622, is_equal=False
idx=58623, is_equal=False
idx=58624, is_equal=False
idx=58625, is_equal=False
idx=58626, is_equal=False
idx=58637, is_equal=False
idx=58638, is_equal=False
idx=58639, is_equal=False
idx=58640, is_equal=False
idx=58641, is_equal=False
idx=58642, is_equal=False
idx=58643, is_equal=False
idx=58644, is_equal=False
idx=58645, is_equal=False
idx=58646, is_equal=False
idx=59288, is_equal=False
idx=59289, is_equal=False
idx=59290, is_equal=False
idx=59291, is_equal=False
idx=59292, is_equal=False
idx=59293, is_equal=False
idx=59294, is_equal=False
idx=59295, is_equal=False
idx=59296, is_equal=False
idx=59297, is_equal=False
idx=87614, is_equal=False
idx=87615, is_equal=False
idx=87616, is_equal=False
idx=87617, is_equal=False
idx=87618, is_equal=False
idx=87619, is_equal=False
idx=87620, is_equal=False
idx=87621, is_equal=False
idx=87622, i

In [179]:
old_ddf = old_df.loc[l]
old_ddf.shape

(121, 21)

In [169]:
new_ddf = new_df.loc[l]
new_ddf.shape

(121, 21)

In [170]:
old_ddf.head(3)

Unnamed: 0,player_id,player,match_id,date,kills,hs,assists,flash_assists,kdratio,deaths,...,adr,fkdiff,rating,teamname,winner,winner_id,total_rounds,mapstatid,win_rounds,map
58617,11733,BRNZ1K,2326217,2018-09-05,16,8,1,0,1,16,...,68,-1,0.96,ex-Wild,N%C3%A3o Tem Biscoito,9415,21.0,73323,16,Overpass
58618,10832,ossada,2326217,2018-09-05,10,3,3,0,3,18,...,65,0,0.7,ex-Wild,N%C3%A3o Tem Biscoito,9415,21.0,73323,16,Overpass
58619,2514,landic,2326217,2018-09-05,10,4,4,0,4,19,...,57,-2,0.64,ex-Wild,N%C3%A3o Tem Biscoito,9415,21.0,73323,16,Overpass


In [171]:
new_ddf.head(3)

Unnamed: 0,player_id,player,match_id,date,kills,hs,assists,flash_assists,kdratio,deaths,...,adr,fkdiff,rating,teamname,winner,winner_id,total_rounds,mapstatid,win_rounds,map
58617,11733,BRNZ1K,2326217,2018-09-05,16,8,1,0,1,16,...,68,-1,0.96,ex-Wild,Não Tem Biscoito,9415,21.0,73323,16,Overpass
58618,10832,ossada,2326217,2018-09-05,10,3,3,0,3,18,...,65,0,0.7,ex-Wild,Não Tem Biscoito,9415,21.0,73323,16,Overpass
58619,2514,landic,2326217,2018-09-05,10,4,4,0,4,19,...,57,-2,0.64,ex-Wild,Não Tem Biscoito,9415,21.0,73323,16,Overpass


In [180]:
old_dddf = old_ddf.drop(columns=['winner'])

In [176]:
new_dddf = new_ddf.drop(columns=['winner'])

In [181]:
new_dddf.equals(old_dddf)

True

### teamname and team_id

In [282]:
filenames = [
    'player_mapstat_2016.csv.zip',
    'player_mapstat_2017.csv.zip',
    'player_mapstat_2018.csv.zip',
    'player_mapstat_2019.csv.zip',
    'player_mapstat_2020.csv.zip'
]

In [283]:
def team_groupby_count(player_df):
    df = player_df.groupby(['winner', 'winner_id'])['winner_id'].count().to_frame()
    df.columns = ['count']
    df = df.reset_index()
    return df

def team_groupby_sum(player_df):
    df = player_df.groupby(['winner', 'winner_id'])['count'].sum().to_frame()
    df.columns = ['count']
    df = df.reset_index()
    return df

In [284]:
winner_columns = ['winner', 'winner_id', 'count']
team_columns = ['team', 'team_id', 'count']

In [285]:
team_df = pd.DataFrame(columns=winner_columns)

In [286]:
team_df.columns = winner_columns
acc = 0
for filename in filenames:
    input_df = pd.read_csv(filename)
    df = team_groupby_count(input_df)

    df = pd.concat([team_df, df])
    team_df = team_groupby_sum(df)

    acc += input_df.shape[0]
    sum_ = team_df['count'].sum()
    print(f'sum={sum_} {acc}')

sum=113381 113381
sum=282340 282340
sum=462500 462500
sum=613042 613042
sum=673181 673181


In [287]:
team_df

Unnamed: 0,winner,winner_id,count
0,!nsurgents,6831,10
1,#FREEIBP,7952,60
2,#SKAM,7676,30
3,$HAMELE$$,7545,10
4,.Norway,7243,80
...,...,...,...
2862,yzn,7845,10
2863,zARLANS,7182,360
2864,zdarova,10468,30
2865,zxcv,8323,30


In [288]:
team_df.to_csv('teams.csv', index=False)

### check uniqueness

In [260]:
## duplicated team names
b = team_df.duplicated(['winner'], keep=False)
duplicate_df = team_df.loc[b]
duplicate_df

Unnamed: 0,winner,winner_id,count
28,2K,8781,30
29,2K,10518,40
132,Adaptation,5873,150
133,Adaptation,8342,50
146,Akatsuki,10467,150
...,...,...,...
2633,ezkatka,7487,10
2690,k1ck,4469,770
2691,k1ck,6222,320
2738,nGse,7116,30


In [271]:
## teams duplicated team_id count
did_df = team_df.groupby(['winner'])['winner_id'].count().to_frame()
did_df = did_df.reset_index()
did_df.columns = ['winner', 'winner_id_count']
did_df = did_df.loc[did_df['winner_id_count'] > 1]
did_df.sort_values(by=['winner_id_count'], ascending=False, inplace=True)
did_df 

Unnamed: 0,winner,winner_id_count
1599,Phoenix,6
583,Dynasty,3
1042,Invictus,3
1029,Infinity,3
645,Empire,3
...,...,...
1018,Imperium,2
1028,Infamous,2
1035,Inside,2
1039,Instinct,2


In [272]:
did_df.loc[did_df['winner_id_count'] > 2]

Unnamed: 0,winner,winner_id_count
1599,Phoenix,6
583,Dynasty,3
1042,Invictus,3
1029,Infinity,3
645,Empire,3


In [253]:
## duplicated team_id
b = team_df.duplicated(['winner_id'], keep=False)
duplicate_df = team_df.loc[b]
duplicate_df

Unnamed: 0,winner,winner_id,count


## check all teams

In [297]:
def team_groupby_count_(player_df):
    df = player_df.groupby(['teamname'])['teamname'].count().to_frame()
    df.columns = ['count']
    df = df.reset_index()
    return df

def team_groupby_sum_(player_df):
    df = player_df.groupby(['teamname'])['count'].sum().to_frame()
    df.columns = ['count']
    df = df.reset_index()
    return df

In [298]:
team_df = pd.read_csv('teams.csv')
winners = list(set(team_df['winner'].tolist()))
team_df

Unnamed: 0,winner,winner_id,count
0,!nsurgents,6831,10
1,#FREEIBP,7952,60
2,#SKAM,7676,30
3,$HAMELE$$,7545,10
4,.Norway,7243,80
...,...,...,...
2862,yzn,7845,10
2863,zARLANS,7182,360
2864,zdarova,10468,30
2865,zxcv,8323,30


In [299]:
all_teams_df = pd.DataFrame(columns=['teamname', 'count'])
for filename in filenames:
    input_df = pd.read_csv(filename)
    df = team_groupby_count_(input_df)

    df = pd.concat([all_teams_df, df])
    all_teams_df = team_groupby_sum_(df)
    
print(f'total {len(all_teams_df)} teams.')

total 4083 teams.


In [295]:
unknown = []
teams = all_teams_df['teamname'].tolist()
b = teams 
for team in list(all_teams):
    if team in winners:
        continue
    unknown_teams.append(team)
len(unknown_teams)

1290

In [296]:
unknown_teams[0:10]

['.4sv',
 'HASZTASZ',
 'Nova Academy',
 'burds treadmill',
 'ClickHost',
 'DWFN',
 'ClownS',
 'Rising Note',
 'CYBERIA',
 'EXTREMEFAST']

In [177]:
d1 = ddf.iloc[0:120]
d2 = d1.drop_duplicates()
print(f'{d1.shape[0]}, {d2.shape[0]}')

120, 100


In [69]:
b = ddf['winner'].isin(all_teams)
b

16597    False
9572     False
14131    False
14109    False
16649    False
         ...  
2553     False
3669     False
8242     False
15204    False
8323     False
Name: winner, Length: 17158, dtype: bool

In [72]:
ghost_winners_df = ddf[winner_columns].loc[b == False]
if ghost_winners_df.shape[0]:
    print(ghost_winners_df)

                        winner  winner_id
16597  blood, sweat, and tears      10692
9572   blood, sweat, and tears      10692
14131  blood, sweat, and tears      10692
14109  blood, sweat, and tears      10692
16649  blood, sweat, and tears      10692
...                        ...        ...
2553                 Illuminar       8813
3669                 Illuminar       8813
8242                 Illuminar       8813
15204                Illuminar       8813
8323                 Illuminar       8813

[17158 rows x 2 columns]


In [71]:
ddf[winner_columns]

Unnamed: 0,winner,winner_id
16597,"blood, sweat, and tears",10692
9572,"blood, sweat, and tears",10692
14131,"blood, sweat, and tears",10692
14109,"blood, sweat, and tears",10692
16649,"blood, sweat, and tears",10692
...,...,...
2553,Illuminar,8813
3669,Illuminar,8813
8242,Illuminar,8813
15204,Illuminar,8813


In [63]:
ghost_winner_df = check_winner(ddf)
ghost_winner_df

In [28]:
# check team without id:
winner_df = pd.read_csv(winner_filename)
winner_df = winner_df.drop_duplicates(ignore_index=True)
winner_df

Unnamed: 0,winner,winner_id
0,"blood, sweat, and tears",10692
1,Keep the Comms Up,10691
2,TeamOne,6947
3,AA,10676
4,Polar Ace,9497
...,...,...
136,BIG Academy,10254
137,Tenerife Titans,10162
138,9INE,10278
139,Warriors International,10719


In [40]:
all_team_df = pd.read_csv(all_team_filename)
all_team_df = all_team_df.drop_duplicates(ignore_index=True)
all_team_df

Unnamed: 0,teamname
0,"blood, sweat, and tears"
1,Levitate
2,Keep the Comms Up
3,Infinity
4,TeamOne
...,...
169,Morning Light
170,Under 21
171,Big Frames
172,Generation of Miracles


In [54]:
winners = winner_df['winner'].tolist()
b = all_team_df['teamname'].isin(winners)
missing_df = all_team_df[b == False]
missing_df

Unnamed: 0,teamname
20,pro100
64,Aalborg Rebels
73,Vintage
96,Rhein Neckar
99,Izako Boars
105,WiLD
106,ajuri
107,ECLOT
108,SJ
114,RED Canids


In [19]:
all_teams = ddf[['teamname']].drop_duplicates()
all_teams

Unnamed: 0,teamname
16597,"blood, sweat, and tears"
16647,Levitate
14613,Keep the Comms Up
11524,Infinity
17585,TeamOne
...,...
9884,Morning Light
16902,Under 21
11948,Big Frames
18032,Generation of Miracles


In [15]:
all_teams_df = pd.DataFrame(data={"teamname": list(all_teams)})
all_teams_df

Unnamed: 0,teamname
0,GODSENT
1,Baecon
2,HOLLYWOOD
3,Morning Light
4,CLEANTmix
...,...
169,Oceanus
170,Heretics
171,Aalborg Rebels
172,Heroic


In [16]:
all_teams_df.to_csv("all_teams.csv", index=False)

In [17]:
all_teams_df = pd.read_csv('all_teams.csv')
all_teams_df

Unnamed: 0,teamname
0,GODSENT
1,Baecon
2,HOLLYWOOD
3,Morning Light
4,CLEANTmix
...,...
169,Oceanus
170,Heretics
171,Aalborg Rebels
172,Heroic


In [25]:
def clean_file(df_input):
    df = df_input.copy()
    df['winner'] = list(map(lambda x: x.replace('%20', ' '), df['winner']))

16597    blood, sweat, and tears
9572     blood, sweat, and tears
14131    blood, sweat, and tears
14109    blood, sweat, and tears
16649    blood, sweat, and tears
                  ...           
2553                   Illuminar
3669                   Illuminar
8242                   Illuminar
15204                  Illuminar
8323                   Illuminar
Name: winner, Length: 17158, dtype: object

In [None]:
for idx
    if '%20' in winner:
        # replace(old, new, count)
        new_str = winner.replace('%20', ' ')
        print(f'{winner} => {new_str}')


In [35]:
teamnames = ddf['teamname']

In [36]:
for winner in df['winner']:
    if winner not in ddf['teamname']:
        print(f"winner '{winner}'")


winner 'blood, sweat, and tears'
winner 'blood, sweat, and tears'
winner 'blood, sweat, and tears'
winner 'blood, sweat, and tears'
winner 'blood, sweat, and tears'
winner 'blood, sweat, and tears'
winner 'blood, sweat, and tears'
winner 'blood, sweat, and tears'
winner 'blood, sweat, and tears'
winner 'blood, sweat, and tears'
winner 'Keep the Comms Up'
winner 'Keep the Comms Up'
winner 'Keep the Comms Up'
winner 'Keep the Comms Up'
winner 'Keep the Comms Up'
winner 'Keep the Comms Up'
winner 'Keep the Comms Up'
winner 'Keep the Comms Up'
winner 'Keep the Comms Up'
winner 'Keep the Comms Up'
winner 'TeamOne'
winner 'TeamOne'
winner 'TeamOne'
winner 'TeamOne'
winner 'TeamOne'
winner 'TeamOne'
winner 'TeamOne'
winner 'TeamOne'
winner 'TeamOne'
winner 'TeamOne'
winner 'AA'
winner 'AA'
winner 'AA'
winner 'AA'
winner 'AA'
winner 'AA'
winner 'AA'
winner 'AA'
winner 'AA'
winner 'AA'
winner 'Polar Ace'
winner 'Polar Ace'
winner 'Polar Ace'
winner 'Polar Ace'
winner 'Polar Ace'
winner 'Polar A

winner 'Apeks'
winner 'Apeks'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Apeks'
winner 'Apeks'
winner 'Apeks'
winner 'Apeks'
winner 'Apeks'
winner 'Apeks'
winner 'Apeks'
winner 'Apeks'
winner 'Apeks'
winner 'Apeks'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Unicorns of Love'
winner 'Apeks'
winner 'Apeks'
winner 'Ap

winner 'PACT'
winner 'PACT'
winner 'PACT'
winner 'PACT'
winner 'PACT'
winner 'PACT'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'HAVU'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winner 'Ninjas To Be'
winn

winner 'mens'
winner 'mens'
winner 'mens'
winner 'mens'
winner 'mens'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'forZe'
winner 'Sp

winner 'Paradox'
winner 'Paradox'
winner 'Paradox'
winner 'Paradox'
winner 'Paradox'
winner 'Paradox'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'SKADE'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX'
winner 'ALTERNATE aTTaX

winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'TYLOO'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winner 'ViCi'
winne

winner 'forZe'
winner 'ENCE'
winner 'ENCE'
winner 'ENCE'
winner 'ENCE'
winner 'ENCE'
winner 'ENCE'
winner 'ENCE'
winner 'ENCE'
winner 'ENCE'
winner 'ENCE'
winner 'HellRaisers'
winner 'HellRaisers'
winner 'HellRaisers'
winner 'HellRaisers'
winner 'HellRaisers'
winner 'HellRaisers'
winner 'HellRaisers'
winner 'HellRaisers'
winner 'HellRaisers'
winner 'HellRaisers'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Sprout'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 'Heretics'
winner 

winner 'CR4ZY'
winner 'CR4ZY'
winner 'CR4ZY'
winner 'CR4ZY'
winner 'CR4ZY'
winner 'CR4ZY'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'AVEZ'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground Zero'
winner 'Ground

In [40]:
teamname = teamnames.iloc[0]
teamname

'blood, sweat, and tears'

In [42]:
if teamname not in teamnames:
    print(f"teamname '{teamname}'")

teamname 'blood, sweat, and tears'


In [43]:
ddf.shape[0]

17158

In [44]:
winners = df['winner'].count()

In [45]:
winners

17158

In [68]:
team_name_id_map = {}
for i in range(ddf.shape[0]):
    name = df['winner'].iloc[i]
    team_name_id_map[name] = int(df['winner_id'].iloc[i])
for i in range(ddf.shape[0]):
    name = df['winner'].iloc[i]
    id_ = df['winner_id'].iloc[i]
    if team_name_id_map[name] != id_:
        print(f'index={i}, name={name}, id={id_}')

In [51]:
losers = []
for i in range(ddf.shape[0]):
    name = df['teamname'].iloc[i]
    if name not in team_name_id_map.keys():
        if name not in losers:
            losers.append(name)
            print(f'index={i}, name={name}')
losers

index=320, name=pro100
index=1960, name=Aalborg Rebels
index=2360, name=Vintage
index=4617, name=Rhein Neckar
index=4652, name=Izako Boars
index=4977, name=WiLD
index=5232, name=ajuri
index=5277, name=ECLOT
index=5482, name=SJ
index=6707, name=RED Canids
index=6797, name=Sinisters
index=7952, name=PC419
index=8177, name=Forbidden
index=8182, name=Control
index=8272, name=really weird
index=8327, name=Buffdaddy's Paypal
index=8517, name=Hawks
index=8567, name=Baecon
index=9867, name=Quantum Bellator Fire
index=10567, name=ETHEREAL
index=10652, name=ztrewq
index=10662, name=Unique
index=10677, name=FTW
index=12206, name=Asterius
index=12651, name=RAMS
index=13511, name=Redemption POA
index=13696, name=CLEANTmix
index=13981, name=Lucid Dream
index=14926, name=Elites
index=15671, name=Morning Light
index=16423, name=Big Frames
index=17038, name=Generation of Miracles
index=17083, name=Juggernauts


['pro100',
 'Aalborg Rebels',
 'Vintage',
 'Rhein Neckar',
 'Izako Boars',
 'WiLD',
 'ajuri',
 'ECLOT',
 'SJ',
 'RED Canids',
 'Sinisters',
 'PC419',
 'Forbidden',
 'Control',
 'really weird',
 "Buffdaddy's Paypal",
 'Hawks',
 'Baecon',
 'Quantum Bellator Fire',
 'ETHEREAL',
 'ztrewq',
 'Unique',
 'FTW',
 'Asterius',
 'RAMS',
 'Redemption POA',
 'CLEANTmix',
 'Lucid Dream',
 'Elites',
 'Morning Light',
 'Big Frames',
 'Generation of Miracles',
 'Juggernauts']

In [53]:
team_name_id_map

{'blood, sweat, and tears': 10692,
 'Keep the Comms Up': 10691,
 'TeamOne': 6947,
 'AA': 10676,
 'Polar Ace': 9497,
 'MIBR': 9215,
 'Syman': 8772,
 'CR4ZY': 10150,
 'Ambush': 7674,
 'Lyngby Vikings': 8963,
 'Nordavind': 8769,
 'sAw': 10567,
 'Endpoint': 7234,
 'Complexity': 5005,
 'OG': 10503,
 'FATE': 9863,
 'Singularity': 6978,
 'forZe': 8135,
 'HellRaisers': 5310,
 'Virtus.pro': 5378,
 'GODSENT': 6902,
 'AGO': 8068,
 'AGF': 8704,
 'GamerLegion': 9928,
 'Sprout': 8637,
 'Natus Vincere': 4608,
 'Espada': 8669,
 'Spirit': 7020,
 'SKADE': 10386,
 'Gambit Youngsters': 9976,
 'ORDER': 8668,
 'Hard4U': 10573,
 'Renegades': 6211,
 'Nemiga': 7969,
 'Moscow Five Academy': 10627,
 'Thunder Logic': 9615,
 'New England Whalers': 9888,
 'Heroic': 7175,
 'HAVU': 7865,
 'North': 7533,
 'Sigma': 10372,
 'Ignite': 8706,
 'FaZe': 6667,
 'ENCE': 4869,
 'BIG': 7532,
 'Vitality': 9565,
 'Unicorns of Love': 9812,
 'Apeks': 9806,
 'PACT': 8248,
 'AVANT': 5293,
 'Chiefs': 6010,
 'NiP': 4411,
 'MAD Lions': 8

In [89]:
team_df = pd.read_json(json.dumps(team_name_id_map), orient='index')
team_df

Unnamed: 0,0
"blood, sweat, and tears",10692
Keep the Comms Up,10691
TeamOne,6947
AA,10676
Polar Ace,9497
...,...
BIG Academy,10254
Tenerife Titans,10162
9INE,10278
Warriors International,10719


In [91]:
team_df = team_df.reset_index()
team_df

Unnamed: 0,index,0
0,"blood, sweat, and tears",10692
1,Keep the Comms Up,10691
2,TeamOne,6947
3,AA,10676
4,Polar Ace,9497
...,...,...
136,BIG Academy,10254
137,Tenerife Titans,10162
138,9INE,10278
139,Warriors International,10719


In [92]:
team_df.columns = ['team_name', 'team_id']
team_df

Unnamed: 0,team_name,team_id
0,"blood, sweat, and tears",10692
1,Keep the Comms Up,10691
2,TeamOne,6947
3,AA,10676
4,Polar Ace,9497
...,...,...
136,BIG Academy,10254
137,Tenerife Titans,10162
138,9INE,10278
139,Warriors International,10719


In [93]:
team_filename = 'team_id_name.csv'
team_df.to_csv(team_filename, index=False)

In [94]:
team_ddf = pd.read_csv(team_filename)
team_ddf

Unnamed: 0,team_name,team_id
0,"blood, sweat, and tears",10692
1,Keep the Comms Up,10691
2,TeamOne,6947
3,AA,10676
4,Polar Ace,9497
...,...,...
136,BIG Academy,10254
137,Tenerife Titans,10162
138,9INE,10278
139,Warriors International,10719


In [85]:
a = 'someone' in team_ddf['team_name'].values
print(a)

False


In [84]:
a = 'blood, sweat, and tears' in team_ddf['team_name'].values
print(a)

True


In [None]:
df.append(df2, ignore_index=True)