### Modeller
Baselines vi skal have med:
- Random choose candidates from df
- Mergesort choose candidates from df
- Mergesort hardcoded to account for gender
- Plain RankNet

Fair model:
- RankNet account for diversity 

In [1]:
#Load packages
import pandas as pd
pd.set_option("display.max_columns", None)
import seaborn as sns
sns.set()
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import matplotlib as mpl

In [2]:
#plots setup
def setup_mpl():
    mpl.rcParams["font.family"] = "Helvetica Neue"
    mpl.rcParams["font.size"] = 11
    mpl.rcParams["figure.figsize"] = (7,2.5)
    mpl.rcParams["figure.dpi"] = 200
    # mpl.rcParams["lines.linewidth"] = 1
setup_mpl()

### DF

In [3]:
#Start here and load the dataframe
df = pd.read_csv("df_sum_score.csv")
df = df[df.STRATUM != "0"] #0 corresponds to nan, i.e. removing nans

#### Average score generation

In [4]:
df["HI_GRADE_AVG"] = df[["CR_S11", "CC_S11", "ENG_S11"]].mean(axis=1)
df["COL_GRADE_AVG"] = df[["CR_PRO", "CC_PRO", "ENG_PRO"]].mean(axis=1)

#### Mergesort

In [5]:
df_sort = df.sort_values(by="HI_GRADE_AVG", ascending=False, kind="mergesort")

In [6]:
#Top 100
df_sort[:100]

Unnamed: 0.1,Unnamed: 0,GENDER,STRATUM,CR_S11,CC_S11,ENG_S11,CR_PRO,CC_PRO,ENG_PRO,COL_GRADE_AVG,HI_GRADE_AVG
5129,5129,F,Stratum 4,100,100,100,99,97,100,98.666667,100.000000
11864,11864,F,Stratum 5,100,93,100,99,100,97,98.666667,97.666667
4874,4874,M,Stratum 3,100,90,100,98,95,99,97.333333,96.666667
7340,7340,M,Stratum 5,95,100,95,100,89,100,96.333333,96.666667
1089,1089,M,Stratum 3,100,90,95,96,96,99,97.000000,95.000000
...,...,...,...,...,...,...,...,...,...,...,...
6145,6145,M,Stratum 4,71,91,95,100,100,99,99.666667,85.666667
6269,6269,M,Stratum 1,100,67,90,100,96,99,98.333333,85.666667
8041,8041,M,Stratum 3,82,90,85,100,99,96,98.333333,85.666667
8352,8352,M,Stratum 6,75,82,100,42,2,94,46.000000,85.666667


#### Random Sort

In [7]:
df_random = df.sample(n = 100, random_state=2)

In [8]:
df_random

Unnamed: 0.1,Unnamed: 0,GENDER,STRATUM,CR_S11,CC_S11,ENG_S11,CR_PRO,CC_PRO,ENG_PRO,COL_GRADE_AVG,HI_GRADE_AVG
10027,10027,M,Stratum 4,55,60,50,47,37,17,33.666667,55.000000
11999,11999,M,Stratum 5,63,62,75,72,53,91,72.000000,66.666667
3874,3874,M,Stratum 2,49,62,48,27,44,43,38.000000,53.000000
3456,3456,M,Stratum 2,53,64,50,53,58,27,46.000000,55.666667
10147,10147,M,Stratum 4,74,77,90,84,76,96,85.333333,80.333333
...,...,...,...,...,...,...,...,...,...,...,...
1558,1558,F,Stratum 2,55,51,38,23,57,66,48.666667,48.000000
1554,1554,M,Stratum 2,44,45,57,61,41,63,55.000000,48.666667
2773,2773,F,Stratum 4,64,69,55,98,92,89,93.000000,62.666667
6341,6341,M,Stratum 2,43,49,46,27,29,50,35.333333,46.000000


#### Merge sort for Female df and Male df and append and sort

In [40]:
def gender_mergesort(df):
    df_temp = df.copy()
    df_M = df_temp[df_temp.GENDER == "M"]
    df_F = df_temp[df_temp.GENDER == "F"]
    
    df_sort_M = df_M.sort_values(by="HI_GRADE_AVG", ascending=False, kind="mergesort")[:50].reset_index()
    df_sort_F = df_F.sort_values(by="HI_GRADE_AVG", ascending=False, kind="mergesort")[:50].reset_index()

    if df_F.HI_GRADE_AVG.max() > df_M.HI_GRADE_AVG.max():
        sorted_with_gender = pd.concat([df_sort_F,df_sort_M], sort=False).sort_index()
    else: 
        sorted_with_gender = pd.concat([df_sort_M,df_sort_F], sort=False).sort_index()
    
    return sorted_with_gender.reset_index()
        
gender_mergesort(df)




Unnamed: 0.1,level_0,index,Unnamed: 0,GENDER,STRATUM,CR_S11,CC_S11,ENG_S11,CR_PRO,CC_PRO,ENG_PRO,COL_GRADE_AVG,HI_GRADE_AVG
0,0,5129,5129,F,Stratum 4,100,100,100,99,97,100,98.666667,100.000000
1,0,4874,4874,M,Stratum 3,100,90,100,98,95,99,97.333333,96.666667
2,1,11864,11864,F,Stratum 5,100,93,100,99,100,97,98.666667,97.666667
3,1,7340,7340,M,Stratum 5,95,100,95,100,89,100,96.333333,96.666667
4,2,5673,5673,F,Stratum 5,100,82,100,98,87,98,94.333333,94.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,47,5198,5198,F,Stratum 2,87,73,90,96,84,92,90.666667,83.333333
96,48,12204,12204,M,Stratum 5,84,77,100,100,98,92,96.666667,87.000000
97,48,5877,5877,F,Stratum 2,74,91,85,98,93,3,64.666667,83.333333
98,49,8104,8104,F,Stratum 2,86,82,82,87,96,93,92.000000,83.333333
