In [1]:
import pandas as pd

In [2]:
def normalize(data_point: float, data_list: list) -> float:
    ''' Normalize the data to be within the rang of 0.0 -- 1.0
        Round answer to 2 digits
    '''

    if not isinstance(data_point, (int, float)):
        raise TypeError(f"Function '{normalize.__name__}': data_point object is not an int or float.")
    elif not isinstance(data_list, (list)):
        raise TypeError(f"Function '{normalize.__name__}': data_list object is not alist.")
    else:
        return format((data_point-min(data_list)) / (max(data_list)-min(data_list)), '0.3f')

In [3]:
def unique_topcs_methods(methods_list: list, topics_list: list):
    
    ### Try to collect all unique entries for methods and topics from reviewers
    #print(f'All possible topics: {set(reviewer_topic_list)}')
    
    methods = []
    #methods = set(methods_list)
    uniq_methods = ', '.join(set(methods_list)).split(', ') 
    #print(f'All possible methods: {set(uniq_methods)}\n')

    topics = []
    #topics = set(topics_list)
    uniq_topics = ', '.join(set(topics_list)).split(', ') 
    #print(f'All possible topics: {set(uniq_topics)}\n')

    counter=0
    #print(topics_list)
    for topic in uniq_topics:
        for t in topics_list:
            if topic == t:
                counter += 1
        #print(f'{topic}: {counter}')
    revi_methods = pd.DataFrame(data={'Methods': sorted(list(set(uniq_methods)))})
    revi_methods.to_csv('reviewer_methods.csv', sep=';', index=False)
    revi_topics = pd.DataFrame(data={'Methods': sorted(list(set(uniq_topics)))})
    revi_topics.to_csv('reviewer_topics.csv', sep=';', index=False)

In [4]:
def filter_top(entrants: pd.DataFrame, matching_df: pd.DataFrame) -> pd.DataFrame:
    ''' Filter the top 3 matchings for each entrant and reviewer, return the results.
    
    '''

    if not isinstance(entrants, pd.core.frame.DataFrame):
        raise TypeError(f"Function '{filter_top.__name__}': entrants object is not a Pandas' dataframe.")
    elif not isinstance(matching_df, pd.core.frame.DataFrame):
        raise TypeError(f"Function '{filter_top.__name__}': matching_df object is not a Pandas' dataframe.")

    reviewer_collector = {}
    entrant_collector = {}
    perfect_score_list = []

    ## TODO: check to see if I can remove the second condition below (and...)

    ## len(entrants) is the same as entrants.shape[0]

    ## assign starting value of zero
    for entrant in entrants.itertuples():
        if not entrant[1] in entrant_collector and len(entrant_collector) < len(entrants):
            entrant_collector[entrant[1]] = 0
            #print(entrant_collector)

    # key part: must operate on matching_df that has been sorted by total_score, and then limit it
    for reviewer in matching_df.itertuples():
        if not reviewer[1] in reviewer_collector and len(reviewer_collector) < len(entrants):
            reviewer_collector[reviewer[1]] = 0
            #print(reviewer_collector)

    for index, matching_pair in matching_df.iterrows():
        revi = matching_pair['reviewer']
        revi_email = matching_pair['reviewer_email']
        revi_topics = matching_pair['reviewer_topics']
        revi_methods = matching_pair['reviewer_methods']
        candi = matching_pair['entrant']
        candi_email = matching_pair['entrant_email']
        candi_topics = matching_pair['entrant_topics']
        candi_methods = matching_pair['entrant_methods']
        itemized_score = matching_pair['itemized_score']
        total_score = matching_pair['total_score']
        
        if revi in reviewer_collector and candi in entrant_collector \
                                      and reviewer_collector[revi] != 3 \
                                      and entrant_collector[candi] != 3:
            perfect_score_list.append((revi, candi, itemized_score, total_score))
            reviewer_collector[revi] += 1
            entrant_collector[candi] += 1

    final_df = pd.DataFrame(data={'Reviewer Name': [i[0] for i in perfect_score_list],
                                  'entrant Name': [i[1] for i in perfect_score_list],
                                  "itemized score [topics, methods, reviewer's history]": [i[2] for i in perfect_score_list],
                                  'total_score': [i[3] for i in perfect_score_list]})

    # Could sort by "Reviewer Name" or "entrant Name"
    final_df = final_df.sort_values(by=['entrant Name', 'total_score'], ascending=[True, False])
    #final_df = final_df.sort_values(by=['total_score', 'entrant Name'], ascending=[False, False])

    #print(perfect_score_list)
    return final_df

In [6]:
def scoring(reviewer: pd.Series, entrant: pd.Series, use_reviewer_hist: bool) -> int:
    ''' Score the matching between reviewer and entrant based on:
        1) topics,
        2) methods and
        3) history of recent reviews done by reviewer if specified as 'yes'.
        
        Input:
            reviewer: topics, methods, and history or performed reviews
            entrant: topics, methods
            use_reviewer_hist: use the history of performed reviews in the matching process
        Return:
            
    '''

    if not isinstance(reviewer, pd.core.series.Series):
        raise TypeError(f"Function '{scoring.__name__}': reviewer object is not a Pandas' dataframe.")
    elif not isinstance(entrant, pd.core.series.Series):
        raise TypeError(f"Function '{scoring.__name__}': entrant object is not a Pandas' dataframe.")

    score_topics = 0
    score_methods = 0
    score_total = 0

    reviewer_topics = reviewer['Topics'].split(sep=', ')
    reviewer_methods = reviewer['Methods'].split(sep=', ')
        
    entrant_topics = entrant['Topics'].split(sep=', ')    
    entrant_methods = entrant['Methods'].split(sep=', ')
    
    for topic in reviewer_topics:
        if topic.lower() in entrant_topics:
            score_topics += 1

    for methodology in reviewer_methods:
        if methodology in entrant_methods:
            score_methods += 1

    if use_reviewer_hist == True:
        score_history = 0
        reviewer_history = reviewer.filter(regex='Round').values.tolist()

        # original index position and reversed list values
        for orig_index, session in reversed(list(enumerate(reviewer_history))):    
            factor = (orig_index+1)/len(reviewer_history) ## scale factor for reducing history score

            if session.lower() != 'yes':
                 score_history += (1 - factor)
    else:
        score_history = 0

    return score_topics, score_methods, score_history

In [5]:
def perform_matching(reviewers: pd.DataFrame, entrants: pd.DataFrame,
                     use_reviewer_hist: bool=False) -> pd.DataFrame:
    '''
    1. Creates lists from dataframes
    2. Calls scoring function
    3. Sorts the top 3 scoreed matches for each pairing
    '''

    if not isinstance(reviewers, pd.core.frame.DataFrame):
        raise TypeError(f"Function '{perform_matching.__name__}': reviewers object is not a Pandas' dataframe.")
    elif not isinstance(entrants, pd.core.frame.DataFrame):
        raise TypeError(f"Function '{perform_matching.__name__}': entrants object is not a Pandas' dataframe.")

    reviewer_name_list = []
    reviewer_email_list = []
    reviewer_topic_list = []
    reviewer_method_list = []

    entrant_name_list = []
    entrant_email_list = []
    entrant_topic_list = []
    entrant_method_list = []
        
    score_all_list =[]
    score_total_list =[]

    # Basic idea here is that each X reviewer is listed with each Y entrant, and then scored

    for i in range(len(reviewers)):  # number_reviewers_rows; reviewers.shape[0]
        for j in range(len(entrants)):  # number_entrants_rows; entrants.shape[0]
            reviewer_name_list.append(reviewers.loc[i].Name)
            reviewer_email_list.append(reviewers.loc[i].Email)
            reviewer_topic_list.append(reviewers.loc[i].Topics)
            reviewer_method_list.append(reviewers.loc[i].Methods)

            entrant_name_list.append(entrants.loc[j].Name)
            entrant_email_list.append(entrants.loc[j].Email)
            entrant_topic_list.append(entrants.loc[j].Topics)
            entrant_method_list.append(entrants.loc[j].Methods)

            score_topics, score_methods, score_history = scoring(reviewer=reviewers.loc[i],
                                                                 entrant=entrants.loc[j],
                                                                 use_reviewer_hist=use_reviewer_hist)

            score_all = [score_topics, score_methods, score_history]
            score_all_list.append(score_all)

            score_total = score_topics + score_methods + score_history
            score_total_list.append(score_total)

    topic_score_all = []
    method_score_all = []
    history_score_all = []

    for sublist in (score_all_list):
        topic_score_all.append(sublist[0])
        method_score_all.append(sublist[1])
        history_score_all.append(sublist[2])

    score_topic_normalized = [normalize(x, topic_score_all) for x in topic_score_all]
    score_method_normalized = [normalize(x, method_score_all) for x in method_score_all]
    
    for item in history_score_all:
        if (item > 0):
            score_history_normalized = [normalize(x, history_score_all) for x in history_score_all]
            break
        else:
            score_history_normalized = history_score_all

    score_itemize_list = []
    score_itemize_list.extend([list(a) for a in zip(score_topic_normalized,
                                                score_method_normalized,
                                                score_history_normalized)])

    score_final_list = [float(a)+float(b)+float(c) for a,b,c in zip(score_topic_normalized,
                                                                   score_method_normalized,
                                                                   score_history_normalized)]

    results = {'reviewer': reviewer_name_list,
               'reviewer_email': reviewer_email_list,
               'reviewer_topics': reviewer_topic_list,
               'reviewer_methods': reviewer_method_list,
               'entrant': entrant_name_list,
               'entrant_email': entrant_email_list,
               'entrant_topics': entrant_topic_list,
               'entrant_methods': entrant_method_list,
               'itemized_score': score_itemize_list,
               'total_score': score_final_list}

    matching_df = pd.DataFrame(data=results)

    return matching_df

In [7]:
if __name__ == "__main__":
    ''' This programs optimizes the matching between reviewers and entrants.

        Input:
            1) reviewers CSV file (; seperated)
            2) entrants CSV file (; seperated)
            3) use_history = 'yes' or 'no'
        Output:
            1) CSV formatted file of matchings (; seperated), including itemized
            and total matching scores.
            2) prints to screen the suggested best matchings

        Structure of CSV files:
            entrants:
                Name;Email;Methods;Topics
            reviewers:
                Name;Email;Methods;Topics;Round 2019S;Round 2019F; ...

        Methods and Topics can be a several entries seperated by a comma.

        For the reviewers, it is assumed that the last 4 columns are the histories of their reviews.
            If they have reviewed, then the word 'yes' or 'no' must be provided in a column whose
            header contains 'Round'.

        Limitations:
            1. Each entrant is assigned 3 reviewers.
            2. Each reviewer is assigned 3 entrants.
            3. Only the last four history entries of the reviewer are considered in the score

        Contact:
            Daniel Jiang, Robert Bitterling and Karl N. Kirschner*
            University of Applied Sciences Bonn-Rhein-Sieg
            Grantham-Allee 20
            53757 Sankt Augustin - Germany

            Email: k.n.kirschner@gmail.com
            
        Contribution:
            Concept: Kirschner
            Initial coding and structure: Jiang and Bitterling
            Final coding: Kirschner
    '''

    use_history = True

    while True:
        try:
            reviewers_data = pd.read_csv('reviewers.csv', sep=';')
            entrants_data = pd.read_csv('entrants.csv', sep=';')
        except FileNotFoundError:
            print('Input files for reviewer or entrants was not found.')
        else:
            print("""Computing best entrant-reviewer matching based on 1) topics, 2) methodology 
                  and 3) the recent review history of the possible reviewers.

                  NOTE: To include the history of the reviewer from previous rounds,
                  the reviewer's CSV file should contains columns at the far-most-right
                  postion whose header includes the word 'Round' to allow a filtering to be done
                  (e.g., Round 2019S; Round 2019F; Round 2020S; Round 2020F).\n""")

            print(f'Total reviewers: {len(reviewers_data)}; Total entrants: {len(entrants_data)}')

            matchings = perform_matching(reviewers=reviewers_data,
                                         entrants=entrants_data,
                                         use_reviewer_hist=use_history)

            top_matches = filter_top(entrants=entrants_data, matching_df=matchings)
            display(top_matches)

            matchings.to_csv('matching_results.csv', sep=';', index=False)
            break

Computing best entrant-reviewer matching based on 1) topics, 2) methodology 
                  and 3) the recent review history of the possible reviewers.

                  NOTE: To include the history of the reviewer from previous rounds,
                  the reviewer's CSV file should contains columns at the far-most-right
                  postion whose header includes the word 'Round' to allow a filtering to be done
                  (e.g., Round 2019S; Round 2019F; Round 2020S; Round 2020F).

Total reviewers: 50; Total entrants: 10


Unnamed: 0,Reviewer Name,entrant Name,"itemized score [topics, methods, reviewer's history]",total_score
20,Hicktu Sockface,Bugby Doodoohill,"[0.000, 0.000, 1.000]",1.0
26,Sniffeenie Chewgold,Bugby Doodoohill,"[0.000, 1.000, 0.000]",1.0
23,Chewberry Mudman,Bugby Doodoohill,"[0.000, 0.500, 0.000]",0.5
2,Peafy Doodoofish,Buritt Noseface,"[0.500, 0.500, 1.000]",2.0
8,Chewlu Boogerbrain,Buritt Noseface,"[0.000, 0.000, 1.000]",1.0
5,Peawee Pimplehair,Buritt Noseface,"[0.500, 0.000, 0.000]",0.5
17,Stinkroid Noodleshine,Flapberry Fudgewhistle,"[1.000, 0.000, 1.000]",2.0
11,Eggster HoboSmittens,Flapberry Fudgewhistle,"[0.500, 0.000, 0.000]",0.5
14,Bushspitz Wigglebottom,Flapberry Fudgewhistle,"[0.500, 0.000, 0.000]",0.5
1,Peafy Doodoofish,Flufffy Gloomkins,"[0.000, 0.000, 1.000]",1.0
