In [96]:
import cspython.scraper
import pandas as pd
import numpy as np

In [None]:
series = cspython.scraper.scrape_series_data('BIG', '2017-01-01', '2017-02-01', verbose=False)

Each element of list returned by scrape_series_data is a dictionary containing data 1 series played by the team. They are in reverse chornological order. The elements of each series dictionary are:

    url:        the url on hltv of the Overview of the entire series
    demo_url:   the url of the demo, hosted on hltv later
    stats_url:  a url containing more detailed data about the data 
                (the scrapped data from this page is contained in stats_data)
    teams:      a dataframe containing the overall stats for each team in the series
    vetos:      the vetos in chronological order
    match_info: score and map name for each match
    team_a_b:   this provides order to the teams and match_info scores 
    

In [97]:
def match_score_dataframe(series):
    score = pd.DataFrame(columns = ['map_name', 'winner', series['team_a_b'][0], 
                                    series['team_a_b'][1]], 
                         index=range(len(series['match_info'])))
    for idx, match in enumerate(series['match_info']):
        
        map_name = match['map_name']
        score_a = int(match['scores'][0])
        score_b = int(match['scores'][1])
        if score_a > score_b:
            winner = series['team_a_b'][0]
        elif score_a < score_b:
            winner = series['team_a_b'][1]
        else:
            winner = 'draw'
        score.loc[idx, :]  = map_name, winner, score_a, score_b
    return score

In [193]:
series[2]['match_info']

[{'map_name': 'Cache', 'scores': [u'16', u'11']},
 {'map_name': 'Train', 'scores': [u'16', u'11']}]

In [201]:
def round_by_round_dataframe(match_stats_data, map_name):
    winners = create_winner_column(match_stats_data['team_scores'])
    team_ending_df = create_team_ending_df(match_stats_data['team_endings'], winners)
    raw = pd.concat([team_ending_df, winners], axis=1)
    final = pd.DataFrame(columns = ['map', 'round_num', 'half'], index=raw.index)
    final.loc[:,'map'] = map_name
    final.loc[:,'round_num']=raw.index
    final.ix[:15, 'half'] = 1
    final.ix[15:, 'half'] = 2
    return pd.concat([final, raw], axis=1)

In [202]:
def create_winner_column(team_scores):
    team_a = pd.Series(team_scores['team_a']).apply(lambda x: x[0])
    team_b = pd.Series(team_scores['team_b']).apply(lambda x: x[0])
    raw_scores = pd.concat([team_a, team_b], axis=1)
    raw_scores = raw_scores.loc[(raw_scores.loc[:,0]!='')|(raw_scores.loc[:,1]!=''), :]
    
    team_a = raw_scores.iloc[0,0]
    team_b = raw_scores.iloc[0,1]
    
    raw_scores = raw_scores.iloc[1:,:]
    
    winner_col = pd.DataFrame(columns=['winner', team_a + '_wins', team_b + '_wins'], index=raw_scores.index)
    w_a=0
    w_b=0
    for idx, row in raw_scores.iterrows():
        if row.iloc[0]=='':
            winner = team_b
            w_b+=1
        else:
            winner = team_a
            w_a+=1
        winner_col.loc[idx,['winner', team_a + '_wins', team_b + '_wins']] = winner, w_a, w_b
    return winner_col

def create_team_ending_df(team_endings, winners):
    team_a_name, team_b_name = winners.columns[1][:-5], winners.columns[2][:-5]
    
    team_a = pd.Series(team_endings['team_a']).apply(lambda x: x[0])
    team_b = pd.Series(team_endings['team_b']).apply(lambda x: x[0])
    raw_endings = pd.concat([team_a, team_b], axis=1)
    raw_endings = raw_endings.loc[(raw_endings.loc[:,0]!='emptyHistory')|(raw_endings.loc[:,1]!='emptyHistory'), :]
    raw_endings = raw_endings.iloc[1:,:]
    endings = pd.DataFrame(columns=['ending', 'CT', 'T', 'side_winner'], index=raw_endings.index)
    
    if (('t_win' in team_a.iloc[:15]) 
        or ('ct_win' in team_a.iloc[:15])
        or ('ct_win' in team_a.iloc[15:]) 
        or ('t_win' in team_a.iloc[15:])):
        endings.ix[:15, 'T'] = team_b_name
        endings.ix[:15, 'CT'] = team_a_name
        endings.ix[15:, 'T'] = team_a_name
        endings.ix[15:, 'CT'] = team_b_name
    else:
        endings.ix[:15, 'T'] = team_a_name
        endings.ix[:15, 'CT'] = team_b_name
        endings.ix[15:, 'T'] = team_b_name
        endings.ix[15:, 'CT'] = team_a_name
        
    endings.loc[:,'ending'] = raw_endings.apply(lambda x: x.iloc[0] if x.iloc[0] != 'emptyHistory' else x.iloc[1], axis=1)
    endings.loc[:, 'side_winner'] = endings.apply(lambda x: 'T' if x.loc['T'] == winners.loc[x.name, 'winner'] else 'CT', axis=1)
    return endings

In [203]:
round_by_round_dataframe(series[2]['stats_data'][0], 'Cache')

Unnamed: 0,map,round_num,half,ending,CT,T,side_winner,winner,FlipSid3_wins,BIG_wins
1,Cache,1,1,bomb_defused,BIG,FlipSid3,CT,BIG,0,1
2,Cache,2,1,ct_win,BIG,FlipSid3,CT,BIG,0,2
3,Cache,3,1,bomb_exploded,BIG,FlipSid3,T,FlipSid3,1,2
4,Cache,4,1,bomb_defused,BIG,FlipSid3,CT,BIG,1,3
5,Cache,5,1,t_win,BIG,FlipSid3,T,FlipSid3,2,3
6,Cache,6,1,t_win,BIG,FlipSid3,T,FlipSid3,3,3
7,Cache,7,1,t_win,BIG,FlipSid3,T,FlipSid3,4,3
8,Cache,8,1,ct_win,BIG,FlipSid3,CT,BIG,4,4
9,Cache,9,1,ct_win,BIG,FlipSid3,CT,BIG,4,5
10,Cache,10,1,t_win,BIG,FlipSid3,T,FlipSid3,5,5


In [204]:
series[0].keys()

['url',
 'demo_url',
 'teams',
 'vetos',
 'match_data',
 'stats_url',
 'team_a_b',
 'match_info',
 'stats_data']

In [199]:
class Series(object):
    def __init__(self, scrapped_data):
        pass
    def self.round_by_round_dataframe(match_stats_data, map_name):
    winners = create_winner_column(match_stats_data['team_scores'])
    team_ending_df = create_team_ending_df(match_stats_data['team_endings'], winners)
    raw = pd.concat([team_ending_df, winners], axis=1)
    final = pd.DataFrame(columns = ['map', 'round_num', 'half'], index=raw.index)
    final.loc[:,'map'] = map_name
    final.loc[:,'round_num']=raw.index
    final.ix[:15, 'half'] = 1
    final.ix[15:, 'half'] = 2
    return pd.concat([final, raw], axis=1)

'Cache'