In [1]:
import pandas as pd
import pytest
import operator
#from csgo.analytics.statistics import (extract_numeric_filters,check_filters,logical_operation,filter_dataframe,calculate_statistics)

In [2]:
def extract_numeric_filters(filters,key): 
# function that extracts the logical operators and numerical values from each value of the specified key
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
    # param key: the key in param filters to check the values for
# raises an Exception if there is an invalid logic operator or numerical value in a value of the specified key 
# returns a list with all of the logical operators and a list with all of the numerical values found in the key's values
    sign_list,val_list=[],[] 
    for index in filters[key]:
        if not isinstance(index,str):
            raise ValueError("Filter(s) for column "+f'"{key}"'+" must be of type string")
        i,sign=0,""
        while i<len(index) and not index[i].isdecimal(): 
            sign+=index[i] 
            end_index=i 
            i+=1
        if sign not in ('==','!=','<=','>=','<','>'): 
            raise Exception("Invalid logical operator in filters for "+f'"{key}"'+" column") 
        sign_list.append(sign) 
        try:
            val_list.append(float(index[end_index+1:])) 
        except ValueError as ve:
            raise Exception("Invalid numerical value in filters for "+f'"{key}"'+" column") from ve    
    return sign_list,val_list 

In [3]:
def check_filters(df,filters):
# function that checks if the filters are valid by iterating over the values for each key and checking their type for columns of object and boolean types 
# and calling the function extract_numeric_filters for columns of integer and float types
    # param df: the dataframe to be filtered
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
# raises Exceptions if there is a type mismatch for string and boolean columns or through the function extract_numeric_filters 
    for key in filters:
        if df.dtypes[key]=="O":
            for index in filters[key]: 
                if not isinstance(index,str): 
                    raise ValueError("Filter(s) for column "+f'"{key}"'+" must be of type string")
        elif df.dtypes[key]=="bool":
            for index in filters[key]: 
                if not isinstance(index,bool): 
                    raise ValueError("Filter(s) for column "+f'"{key}"'+" must be of type boolean")
        else:
            extract_numeric_filters(filters,key)  

In [4]:
def logical_operation(df,col,sign,val):
# function that filters the given dataframe given a logical operator and a numerical value
    # param df: the dataframe to be filtered
    # param col: the column of the dataframe to be filtered 
    # param sign: the logical operator 
    # param val: the numerical value to filter the column by 
# returns a filtered copy of the dataframe
    operations={"==":operator.eq(df[col],val),"!=":operator.ne(df[col],val),
                "<=":operator.le(df[col],val),">=":operator.ge(df[col],val),
                "<":operator.lt(df[col],val),">":operator.gt(df[col],val)}
    filtered_dataframe=df.loc[operations[sign]]
    return filtered_dataframe

In [5]:
def filter_dataframe(df,filters): 
# function that filters the given dataframe given filters
    # param df: the dataframe to be filtered
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
# returns a filtered copy of the dataframe
    df_copy=df.copy() 
    check_filters(df_copy,filters) 
    for key in filters:
        if df_copy.dtypes[key]=='O' or df_copy.dtypes[key]=='bool': 
            df_copy=df_copy.loc[df_copy[key].isin(filters[key])]
        else:
            i=0
            for sign in extract_numeric_filters(filters,key)[0]:
                val=extract_numeric_filters(filters,key)[1][i]
                df_copy=logical_operation(df_copy,key,extract_numeric_filters(filters,key)[0][i],val)
                i+=1
    return df_copy   

In [6]:
def calculate_statistics(df,filters,col_to_groupby,col_to_agg,agg,new_col_names): 
# function that filters, groups, and aggregates the data in a copy of the given dataframe
    # param df: the dataframe to be filtered
    # param filters: a dictionary where the keys are the columns to filter the dataframe by and the values are the filters
        # keys must be of type string and values must be of type list
    # param col_to_groupby: a list of the column(s) of the dataframe to group by
        # column names must be of type string
    # param col_to_agg: a list of the column(s) of the dataframe to aggregate 
        # column names must be of type string
    # param agg: a list of the the function(s) to aggregate the columns by 
        # functions must be of type string and in a list within the larger list, example: [["count"],["max","min"]]
    # param new_col_names: a list of the new column names 
        # new column names must be of type string
# raises Exceptions through the function filter_dataframe
# returns a filtered, grouped, and aggregated copy of the given dataframe 
    df_copy=filter_dataframe(df,filters)
    agg_dict=dict(zip(col_to_agg,agg))
    df_copy=df_copy.groupby(col_to_groupby).agg(agg_dict).reset_index()
    df_copy.columns=new_col_names
    return df_copy

In [7]:
class TestStatistics:
    """Class to test the five statistics functions"""
    
    def setup_class(self):
        """Setup class by defining filters and dataframes"""
        
        self.df=pd.DataFrame({"PlayerName":["Player1","Player2","Player3","Player4","Player5"],
                              "Kills":[5,10,15,20,25],
                              "Defused Bomb":[True,False,True,False,False]})
        self.filters={"PlayerName":["Player1","Player2","Player3"],"Kills":[">=10","<=20"],"Defused Bomb":[True]}
        self.filtered_df=pd.DataFrame({"PlayerName":["Player3"],"Kills":["==15"],"Defused Bomb":[True]})        
        self.invalid_numeric_filters={"Kills":[10]}
        self.invalid_logical_operator={"Kills":["=invalid=10"]}
        self.invalid_numeric_value={"Kills":["==1invalid0"]}
        self.invalid_str_filters={"PlayerName":[1]}
        self.invalid_bool_filters={"Defused Bomb":["True"]}
        self.statistics_df=pd.DataFrame({"AttackerName":["Player1","Player1","Player1","Player2","Player2","Player2"],
                                         "AttackerAreaId":[1,1,1,2,1,1],
                                         "Weapon":["Pistol","Melee","Pistol","Pistol","Pistol","Pistol"],
                                         "IsHeadshot":[True,False,True,False,False,True]})
        self.statistics_filters={"AttackerAreaId":["==1"],"Weapon":["Pistol"],"IsHeadshot":[True]}
        self.calculated_df=pd.DataFrame({"PlayerName":["Player1","Player2"],
                                         "Area 1 Pistol Headshot Kills":[2,1]})

    def test_extract_numeric_filters(self):
        """Test extract_numeric_filters function"""
        assert extract_numeric_filters({"Kills":["==15"]},"Kills")==(["=="],[15.0])
        assert extract_numeric_filters({"Kills":["!=15"]},"Kills")==(["!="],[15.0])
        assert extract_numeric_filters({"Kills":["<=15"]},"Kills")==(["<="],[15.0])
        assert extract_numeric_filters({"Kills":[">=15"]},"Kills")==([">="],[15.0])
        assert extract_numeric_filters({"Kills":["<15"]},"Kills")==(["<"],[15.0])
        assert extract_numeric_filters({"Kills":[">15"]},"Kills")==([">"],[15.0])
        assert extract_numeric_filters({"Kills":[">10","<20"]},"Kills")==([">","<"],[10.0,20.0])
        
    def test_extract_numeric_filters_invalid_type(self):
        """Test extract_numeric_filters function with invalid numeric filters"""
        with pytest.raises(ValueError):
            extract_numeric_filters(self.invalid_numeric_filters,"Kills")
            
    def test_extract_numeric_filters_invalid_operator(self):
        """Test extract_numeric_filters function with an invalid logical operator in the numeric filters"""
        with pytest.raises(Exception):
            extract_numeric_filters(self.invalid_logical_operator,"Kills")
            
    def test_extract_numeric_filters_invalid_numeric_value(self):
        """Test extract_numeric_filters function with an invalid numeric value in the the numerical filters"""
        with pytest.raises(Exception):
            extract_numeric_filters(self.invalid_numeric_value,"Kills")
   
    def test_check_filters_invalid_str_filters(self):
        """Test check_filters function with invalid string filters"""
        with pytest.raises(ValueError):
            check_filters(self.df,self.invalid_str_filters)
            
    def test_check_filters_invalid_bool_filters(self):
        """Test check_filters function with invalid boolean filters"""
        with pytest.raises(ValueError):
            check_filters(self.df,self.invalid_bool_filters)
            
    def test_logical_operation(self):
        """Test logical_operation function"""
        assert logical_operation(self.df,"Kills","==",15.0)==self.df.loc[self.df["Kills"]==15]
        assert logical_operation(self.df,"Kills","!=",15.0)==self.df.loc[self.df["Kills"]!=15]
        assert logical_operation(self.df,"Kills","<=",15.0)==self.df.loc[self.df["Kills"]<=15]
        assert logical_operation(self.df,"Kills",">=",15.0)==self.df.loc[self.df["Kills"]>=15]
        assert logical_operation(self.df,"Kills","<",15.0)==self.df.loc[self.df["Kills"]<15]
        assert logical_operation(self.df,"Kills",">",15.0)==self.df.loc[self.df["Kills"]>15]
        
    def test_filter_dataframe(self):
        """Test filter_dataframe function"""
        assert filter_dataframe(self.df,self.filters).equals(self.filtered_df)
        
    def test_calculate_statistics(self):
        """Test calculate_statistics function"""
        assert calculate_statistics(self.statistics_df,self.statistics_filters,["AttackerName"],["AttackerName"],[["count"]],["PlayerName","Area 1 Pistol Headshot Kills"]).equals(self.calculated_df)
        