In [1]:
from csgo.parser import DemoParser
import pandas as pd
import operator
pd.set_option('display.max_columns', None)

In [2]:
demo_parser = DemoParser(demofile = "../../../CSGO_Demofiles/gambit-vs-natus-vincere-m1-dust2.dem", demo_id = "GA-NaVi-BLAST2021", parse_rate=128)
data = demo_parser.parse()
data_df = demo_parser.parse(return_type="df")

02:47:02 [INFO] Go version>=1.14.0
02:47:02 [INFO] Initialized CSGODemoParser with demofile C:\Users\aagrawal-22\CSGO_Demofiles\gambit-vs-natus-vincere-m1-dust2.dem
02:47:02 [INFO] Setting demo id to GA-NaVi-BLAST2021
02:47:02 [INFO] Setting parse rate to 128
02:47:02 [INFO] Running Golang parser from C:\Users\aagrawal-22\Anaconda3\lib\site-packages\csgo-0.1-py3.8.egg\csgo\parser\
02:47:02 [INFO] Looking for file at C:\Users\aagrawal-22\CSGO_Demofiles\gambit-vs-natus-vincere-m1-dust2.dem
02:47:30 [INFO] Wrote demo parse output to GA-NaVi-BLAST2021.json
02:47:30 [INFO] Reading in JSON from GA-NaVi-BLAST2021.json
02:47:30 [INFO] JSON data loaded, available in the `json` attribute to parser
02:47:30 [INFO] Successfully parsed JSON output
02:47:30 [INFO] Successfully returned JSON output
02:47:30 [INFO] Running Golang parser from C:\Users\aagrawal-22\Anaconda3\lib\site-packages\csgo-0.1-py3.8.egg\csgo\parser\
02:47:30 [INFO] Looking for file at C:\Users\aagrawal-22\CSGO_Demofiles\gambit-vs

In [3]:
def extract_numeric_filters(filters,key): 
# function that extracts the logical operators and numerical values from each value of the specified key
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
    # param key: the key in param filters to check the values for
# raises an Exception if there is an invalid logic operator or numerical value in a value of the specified key 
# returns a list with all of the logical operators and a list with all of the numerical values found in the key's values
    sign_list,val_list=[],[] 
    for index in filters[key]:
        if not isinstance(index,str):
            raise ValueError("Filter(s) for column "+f'"{key}"'+" must be of type string")
        i,sign=0,""
        while i<len(index) and not index[i].isdecimal(): 
            sign+=index[i] 
            end_index=i 
            i+=1
        if sign not in ('==','!=','<=','>=','<','>'): 
            raise Exception("Invalid logical operator in filters for "+f'"{key}"'+" column") 
        sign_list.append(sign) 
        try:
            val_list.append(float(index[end_index+1:])) 
        except ValueError as ve:
            raise Exception("Invalid numerical value in filters for "+f'"{key}"'+" column") from ve    
    return sign_list,val_list 

In [4]:
def check_filters(df,filters):
# function that checks if the filters are valid by iterating over the values for each key and checking their type for columns of object and boolean types and calling the function extract_numeric_filters for columns of integer and float types
    # param df: the dataframe to be filtered
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
# raises Exceptions if there is a type mismatch for string and boolean columns or through the function extract_numeric_filters 
    for key in filters:
        if df.dtypes[key]=="O":
            for index in filters[key]: 
                if not isinstance(index,str): 
                    raise ValueError("Filter(s) for column "+f'"{key}"'+" must be of type string")
        elif df.dtypes[key]=="bool":
            for index in filters[key]: 
                if not isinstance(index,bool): 
                    raise ValueError("Filter(s) for column "+f'"{key}"'+" must be of type boolean")
        else:
            extract_numeric_filters(filters,key)  

In [5]:
def logical_operation(df,col,sign,val):
# function that filters the given dataframe given a logical operator and a numerical value
    # param df: the dataframe to be filtered
    # param col: the column of the dataframe to be filtered 
    # param sign: the logical operator 
    # param val: the numerical value to filter the column by 
# returns a filtered copy of the dataframe
    operations={"==":operator.eq(df[col],val),"!=":operator.ne(df[col],val),
                "<=":operator.le(df[col],val),">=":operator.ge(df[col],val),
                "<":operator.lt(df[col],val),">":operator.gt(df[col],val)}
    filtered_dataframe=df.loc[operations[sign]]
    return filtered_dataframe

In [6]:
def filter_dataframe(df,filters): 
# function that filters the given dataframe given filters
    # param df: the dataframe to be filtered
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
# returns a filtered copy of the dataframe
    df_copy=df.copy() 
    check_filters(df_copy,filters) 
    for key in filters:
        if df_copy.dtypes[key]=='O' or df_copy.dtypes[key]=='bool': 
            df_copy=df_copy.loc[df_copy[key].isin(filters[key])]
        else:
            i=0
            for sign in extract_numeric_filters(filters,key)[0]:
                val=extract_numeric_filters(filters,key)[1][i]
                df_copy=logical_operation(df_copy,key,extract_numeric_filters(filters,key)[0][i],val)
                i+=1
    return df_copy   

In [7]:
def calculate_statistics(df,filters,col_to_groupby,col_to_agg,agg,new_col_names): 
# function that filters, groups, and aggregates the data in a copy of the given dataframe
    # param df: the dataframe to be filtered
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
        # keys must be of type string and values must be of type list
    # param col_to_groupby: a list of the column(s) of the dataframe to group by
        # column names must be of type string
    # param col_to_agg: a list of the column(s) of the dataframe to aggregate 
        # column names must be of type string
    # param agg: a list of the the function(s) to aggregate the columns by 
        # functions must be of type string and in a list within the larger list, example: [["count"],["max","min"]]
    # param new_col_names: a list of the new column names 
        # new column names must be of type string
# raises Exceptions through the function filter_dataframe
# returns a filtered, grouped, and aggregated copy of the given dataframe 
    df_copy=filter_dataframe(df,filters)
    agg_dict=dict(zip(col_to_agg,agg))
    df_copy=df_copy.groupby(col_to_groupby).agg(agg_dict).reset_index()
    df_copy.columns=new_col_names
    return df_copy

In [8]:
# example: from "Kills" dataframe, filter rows where "AttackerTeam" is "Natus Vincere", groupby "AttackerName",
# aggregate "AttackerName", use the aggregate function "size", and change the column names to "PlayerName" and "Kills" 
calculate_statistics(data_df["Kills"],{"AttackerTeam":["Natus Vincere"]},["AttackerName"],["AttackerName"],[["size"]],["PlayerName","Kills"])

Unnamed: 0,PlayerName,Kills
0,Boombl4,12
1,Perfecto,10
2,b1t,24
3,electronic,18
4,s1mple,22


In [9]:
def kdr(kills_df,kills_filters,deaths_filters):
# function that creates a KDR dataframe given a kills dataframe and filters
    # param kills_df: the dataframe with the kills data
    # param kills_filters: a dictionary where the keys are the columns to filter the kills dataframe by and the values are the filters
    # param deaths_filters: a dictionary where the keys are the columns to filter the kills dataframe by and the values are the filters
    # need two sets of filters for the case of filtering by team: kills_filters={"AttackerTeam":["Team"]}, deaths_filters={"VictimTeam":["Team"]}
# returns a dataframe
    total_kills=calculate_statistics(kills_df,kills_filters,["AttackerName"],["AttackerName"],[["size"]],["PlayerName","Kills"])
    total_deaths=calculate_statistics(kills_df,deaths_filters,["VictimName"],["VictimName"],[["size"]],["PlayerName","Deaths"])
    kdr=pd.merge(total_kills,total_deaths)
    kdr["KDR"]=kdr["Kills"]/kdr["Deaths"]
    kdr.sort_values(by="KDR",ascending=False,inplace=True)
    return kdr

In [10]:
kdr(data_df["Kills"],{},{})

02:48:07 [INFO] NumExpr defaulting to 8 threads.


Unnamed: 0,PlayerName,Kills,Deaths,KDR
8,s1mple,22,14,1.571429
4,b1t,24,18,1.333333
2,Hobbit,24,19,1.263158
0,Ax1Le,18,16,1.125
5,electronic,18,18,1.0
9,sh1ro,14,14,1.0
6,interz,14,15,0.933333
1,Boombl4,12,16,0.75
3,Perfecto,10,16,0.625
7,nafany,11,21,0.52381


In [11]:
def adr(rounds_df,damages_df,damages_filters):
# function that creates an ADR dataframe given a rounds dataframe, damages dataframe and filters
    # param rounds_df: the dataframe with the rounds data
    # param damages_df: the dataframe with the damages data 
    # param damages_filters: a dictionary where the keys are the columns to filter the damages dataframe by and the values are the filters
# returns a dataframe
    total_rounds=len(rounds_df)
    damages_copy=damages_df.copy()
    damages_copy["RawDamage"]=damages_copy["HpDamage"]+damages_copy["ArmorDamage"]
    damages_copy["NormDamage"]=damages_copy["HpDamageTaken"]+damages_copy["ArmorDamage"]
    adr=calculate_statistics(damages_copy,damages_filters,["AttackerName"],["RawDamage", "NormDamage"],[["sum"],["sum"]],["PlayerName","RawADR","NormADR"])
    adr["RawADR"]=adr["RawADR"]/total_rounds
    adr["NormADR"]=adr["NormADR"]/total_rounds
    adr.sort_values(by="RawADR",ascending=False,inplace=True)
    return adr

In [12]:
adr(data_df["Rounds"],data_df["Damages"],{})

Unnamed: 0,PlayerName,RawADR,NormADR
4,b1t,144.6,78.366667
2,Hobbit,132.966667,101.633333
8,s1mple,119.5,96.366667
5,electronic,94.733333,79.0
0,Ax1Le,83.433333,67.5
9,sh1ro,81.533333,62.033333
6,interz,74.766667,65.066667
1,Boombl4,73.533333,66.3
7,nafany,70.2,62.166667
3,Perfecto,51.6,37.7


In [13]:
def headshot_percentage(kills_df,kills_filters):
# function that creates a headshot percentage dataframe given a kills dataframe and filters
    # param kills_df: the dataframe with the kills data
    # param kills_filters: a dictionary where the keys are the columns to filter the kills dataframe by and the values are the filters
# returns a dataframe
    headshot_percentage=calculate_statistics(kills_df,kills_filters,["AttackerName"],["IsHeadshot"],[["mean"]],["PlayerName","HeadshotPct"])
    headshot_percentage.sort_values(by="HeadshotPct",ascending=False,inplace=True)
    return headshot_percentage

In [14]:
headshot_percentage(data_df["Kills"],{})

Unnamed: 0,PlayerName,HeadshotPct
3,Perfecto,0.7
2,Hobbit,0.666667
0,Ax1Le,0.555556
5,electronic,0.5
6,interz,0.5
4,b1t,0.458333
7,nafany,0.454545
8,s1mple,0.272727
1,Boombl4,0.25
9,sh1ro,0.142857


In [15]:
def utility_damage(damages_df,grenades_df,damages_filters,grenades_filters):
# function that creates a utility damage dataframe given a damages dataframe, grenades dataframe, and filters
    # param damages_df: the dataframe with the damages data
    # param grenades_df: the dataframe with the grenades data
    # param damages_filters: a dictionary where the keys are the columns to filter the damages dataframe by and the values are the filters
    # param grenades_filters: a dictionary where the keys are the columns to filter the grenades dataframe by and the values are the filters
# returns a dataframe
    damages_copy=damages_df.loc[damages_df["Weapon"].isin(["HE Grenade","Incendiary Grendade","Molotov"])]
    damages_copy["RawDamage"]=damages_copy["HpDamage"]+damages_copy["ArmorDamage"]
    utility_damage=calculate_statistics(damages_copy,damages_filters,["AttackerName"],["RawDamage"],[["sum"]],["PlayerName","UtilityDamage"])
    grenades_copy=grenades_df.loc[grenades_df["GrenadeType"].isin(["HE Grenade","Incendiary Grendade","Molotov"])]
    nades_thrown=calculate_statistics(grenades_copy,grenades_filters,["PlayerName"],["PlayerName"],[["size"]],["PlayerName","NadesThrown"])
    utility_damage_statistics=pd.merge(utility_damage,nades_thrown)
    utility_damage_statistics["DmgPerNade"]=utility_damage_statistics["UtilityDamage"]/utility_damage_statistics["NadesThrown"]
    utility_damage_statistics.sort_values(by="UtilityDamage",ascending=False,inplace=True)
    return utility_damage_statistics

In [16]:
utility_damage(data_df["Damages"],data_df["Grenades"],{},{})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  damages_copy["RawDamage"]=damages_copy["HpDamage"]+damages_copy["ArmorDamage"]


Unnamed: 0,PlayerName,UtilityDamage,NadesThrown,DmgPerNade
1,Boombl4,301,23,13.086957
2,Hobbit,221,19,11.631579
7,nafany,165,17,9.705882
5,electronic,115,17,6.764706
9,sh1ro,95,9,10.555556
8,s1mple,81,7,11.571429
3,Perfecto,57,22,2.590909
0,Ax1Le,52,15,3.466667
6,interz,37,20,1.85
4,b1t,24,14,1.714286


In [17]:
def weapon_type(weapon):
# function that returns the weapon type of a weapon
    # param weapon: a name of a weapon in string format
# returns a string
    if weapon in ["Knife"]:
        return "Melee Kills"
    elif weapon in ["CZ-75 Auto","Desert Eagle","Dual Berettas","Five-SeveN","Glock-18","P2000","P250","R8 Revolver","Tec-9","USP-S"]:
        return "Pistol Kills"
    elif weapon in ["MAG-7","Nova","Sawed-Off","XM1014"]:
        return "Shotgun"
    elif weapon in ["MAC-10","MP5-SD","MP7","MP9","P90","PP-Bizon","UMP-45"]:
        return "SMG Kills"
    elif weapon in ["AK-47","AUG","FAMAS","Galil AR","M4A1-S","M4A4","SG 553"]:
        return "Assault Rifle Kills"
    elif weapon in ["M249","Negev"]:
        return "Machine Gun Kills"
    elif weapon in ["AWP","G3SG1","SCAR-20","SSG 08"]:
        return "Sniper Rifle Kills"
    else:
        return "Utility Kills"

In [18]:
def kills_by_weapon_type(kills_df,kills_filters): # incomplete function
# function that creates a dataframe with kill data by weapon type 
    # param kills_df: the dataframe with the kills data
    # param kills_filters: a dictionary where the keys are the columns to filter the kills dataframe by and the values are the filters
# returns a dataframe
    kills_by_weapon_type=kills_df.copy()
    kills_by_weapon_type["Kills Type"]=kills_by_weapon_type.apply(lambda row:weapon_type(row["Weapon"]),axis=1)
    kills_by_weapon_type=calculate_statistics(kills_by_weapon_type,kills_filters,["AttackerName","Kills Type"],["AttackerName"],[["size"]],["PlayerName","Kills Type","Kills"])
    kills_by_weapon_type=kills_by_weapon_type.pivot(index="PlayerName",columns="Kills Type",values="Kills")
    i=0
    for col in ["Melee Kills","Pistol Kills","Shotgun Kills","SMG Kills","Assault Rifle Kills","Machine Gun Kills","Sniper Rifle Kills","Utility Kills"]:
        if not col in kills_by_weapon_type.columns:
            kills_by_weapon_type.insert(i,col,0)
        kills_by_weapon_type[col]=kills_by_weapon_type[col].fillna(0)
        kills_by_weapon_type[col]=kills_by_weapon_type[col].astype(int)
        i+=1
    kills_by_weapon_type["Total Kills"]=kills_by_weapon_type.iloc[0:kills_by_weapon_type.shape[0]].sum(axis=1)
    kills_by_weapon_type.reset_index(inplace=True)
    kills_by_weapon_type=kills_by_weapon_type.rename_axis(None, axis=1)
    return kills_by_weapon_type

In [19]:
kills_by_weapon_type(data_df["Kills"],{})

Unnamed: 0,PlayerName,Melee Kills,Assault Rifle Kills,Shotgun Kills,Pistol Kills,SMG Kills,Machine Gun Kills,Sniper Rifle Kills,Utility Kills,Total Kills
0,Ax1Le,0,13,0,4,1,0,0,0,18
1,Boombl4,0,10,0,0,0,0,2,0,12
2,Hobbit,0,19,0,4,1,0,0,0,24
3,Perfecto,0,8,0,2,0,0,0,0,10
4,b1t,0,8,0,3,1,0,12,0,24
5,electronic,0,14,0,4,0,0,0,0,18
6,interz,0,12,0,0,2,0,0,0,14
7,nafany,0,7,0,4,0,0,0,0,11
8,s1mple,0,5,0,3,0,0,13,1,22
9,sh1ro,0,0,0,1,0,0,11,2,14
