In [1]:
from csgo.parser import DemoParser
import pandas as pd
import operator
pd.set_option('display.max_columns', None)

In [2]:
demo_parser = DemoParser(demofile = "../../../CSGO_Demofiles/gambit-vs-natus-vincere-m1-dust2.dem", demo_id = "GA-NaVi-BLAST2021", parse_rate=128)
data_df = demo_parser.parse(return_type="df")

02:13:43 [INFO] Go version>=1.14.0
02:13:43 [INFO] Initialized CSGODemoParser with demofile C:\Users\aagrawal-22\CSGO_Demofiles\gambit-vs-natus-vincere-m1-dust2.dem
02:13:43 [INFO] Setting demo id to GA-NaVi-BLAST2021
02:13:43 [INFO] Setting parse rate to 128
02:13:43 [INFO] Running Golang parser from C:\Users\aagrawal-22\Anaconda3\lib\site-packages\csgo-0.1-py3.8.egg\csgo\parser\
02:13:43 [INFO] Looking for file at C:\Users\aagrawal-22\CSGO_Demofiles\gambit-vs-natus-vincere-m1-dust2.dem
02:14:12 [INFO] Wrote demo parse output to GA-NaVi-BLAST2021.json
02:14:12 [INFO] Reading in JSON from GA-NaVi-BLAST2021.json
02:14:13 [INFO] JSON data loaded, available in the `json` attribute to parser
02:14:13 [INFO] Successfully parsed JSON output
02:14:13 [INFO] Successfully returned JSON output
02:14:13 [INFO] Parsed rounds to Pandas DataFrame
02:14:13 [INFO] Parsed kills to Pandas DataFrame
02:14:13 [INFO] Parsed damages to Pandas DataFrame
02:14:13 [INFO] Parsed grenades to Pandas DataFrame
02:

In [3]:
def extract_numeric_filters(filters,key): 
# function that extracts the logical operators and numerical values from each value of the specified key
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
    # param key: the key in param filters to check the values for
# raises an Exception if there is an invalid logic operator or numerical value in a value of the specified key 
# returns a list with all of the logical operators and a list with all of the numerical values found in the key's values
    sign_list,val_list=[],[] 
    for index in filters[key]:
        if not isinstance(index,str):
            raise ValueError("Filter(s) for column "+f'"{key}"'+" must be of type string")
        i,sign=0,""
        while i<len(index) and not index[i].isdecimal(): 
            sign+=index[i] 
            end_index=i 
            i+=1
        if sign not in ('==','!=','<=','>=','<','>'): 
            raise Exception("Invalid logical operator in filters for "+f'"{key}"'+" column") 
        sign_list.append(sign) 
        try:
            val_list.append(float(index[end_index+1:])) 
        except ValueError as ve:
            raise Exception("Invalid numerical value in filters for "+f'"{key}"'+" column") from ve    
    return sign_list,val_list 

In [4]:
def check_filters(df,filters):
# function that checks if the filters are valid by iterating over the values for each key and checking their type for columns of object and boolean types 
# and calling the function extract_numeric_filters for columns of integer and float types
    # param df: the dataframe to be filtered
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
# raises Exceptions if there is a type mismatch for string and boolean columns or through the function extract_numeric_filters 
    for key in filters:
        if df.dtypes[key]=="O":
            for index in filters[key]: 
                if not isinstance(index,str): 
                    raise ValueError("Filter(s) for column "+f'"{key}"'+" must be of type string")
        elif df.dtypes[key]=="bool":
            for index in filters[key]: 
                if not isinstance(index,bool): 
                    raise ValueError("Filter(s) for column "+f'"{key}"'+" must be of type boolean")
        else:
            extract_numeric_filters(filters,key)  

In [5]:
def logical_operation(df,col,sign,val):
# function that filters the given dataframe given a logical operator and a numerical value
    # param df: the dataframe to be filtered
    # param col: the column of the dataframe to be filtered 
    # param sign: the logical operator 
    # param val: the numerical value to filter the column by 
# returns a filtered copy of the dataframe
    operations={"==":operator.eq(df[col],val),"!=":operator.ne(df[col],val),
                "<=":operator.le(df[col],val),">=":operator.ge(df[col],val),
                "<":operator.lt(df[col],val),">":operator.gt(df[col],val)}
    filtered_dataframe=df.loc[operations[sign]]
    return filtered_dataframe

In [6]:
def filter_dataframe(df,filters): 
# function that filters the given dataframe given filters
    # param df: the dataframe to be filtered
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
# returns a filtered copy of the dataframe
    df_copy=df.copy() 
    check_filters(df_copy,filters) 
    for key in filters:
        if df_copy.dtypes[key]=='O' or df_copy.dtypes[key]=='bool': 
            df_copy=df_copy.loc[df_copy[key].isin(filters[key])]
        else:
            i=0
            for sign in extract_numeric_filters(filters,key)[0]:
                val=extract_numeric_filters(filters,key)[1][i]
                df_copy=logical_operation(df_copy,key,extract_numeric_filters(filters,key)[0][i],val)
                i+=1
    return df_copy   

In [7]:
def calculate_statistics(df,filters,col_to_groupby,col_to_agg,agg,new_col_names): 
# function that filters, groups, and aggregates the data in a copy of the given dataframe
    # param df: the dataframe to be filtered
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
        # keys must be of type string and values must be of type list
    # param col_to_groupby: a list of the column(s) of the dataframe to group by
        # column names must be of type string
    # param col_to_agg: a list of the column(s) of the dataframe to aggregate 
        # column names must be of type string
    # param agg: a list of the the function(s) to aggregate the columns by 
        # functions must be of type string and in a list within the larger list, example: [["count"],["max","min"]]
    # param new_col_names: a list of the new column names 
        # new column names must be of type string
# raises Exceptions through the function filter_dataframe
# returns a filtered, grouped, and aggregated copy of the given dataframe 
    df_copy=filter_dataframe(df,filters)
    agg_dict=dict(zip(col_to_agg,agg))
    df_copy=df_copy.groupby(col_to_groupby).agg(agg_dict).reset_index()
    df_copy.columns=new_col_names
    return df_copy

In [8]:
# example: from "Kills" dataframe, filter rows where "AttackerTeam" is "Natus Vincere", groupby "AttackerName",
# aggregate "AttackerName", use the aggregate function "count", and change the column names to "PlayerName" and "Kills" 
calculate_statistics(data_df["Kills"],{"AttackerTeam":["Natus Vincere"]},["AttackerName"],["AttackerName"],
                     [["count"]],["PlayerName","Kills"])

Unnamed: 0,PlayerName,Kills
0,Boombl4,12
1,Perfecto,10
2,b1t,24
3,electronic,18
4,s1mple,22
