# Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Import Data

In [None]:
df_630 = pd.read_csv('https://raw.githubusercontent.com/msperand/Machine_Learning_Project/main/Data/Predictions_for_mastermind/DA_full_FlauBERT_large_vs0_2_decay0_1_6epochs_rs42_dacs_1.csv')
df_628 = pd.read_csv('https://raw.githubusercontent.com/msperand/Machine_Learning_Project/main/Data/Predictions_for_mastermind/DA_full_FlauBERT_large_vs0_2_decay0_05_6epochs_rs0_dacs_1.csv')
df_611 = pd.read_csv('https://raw.githubusercontent.com/msperand/Machine_Learning_Project/main/Data/Predictions_for_mastermind/DA_FlauBERT_large_vs0_4_decay0_05_6epochs_rs11_dacs_0_5%202.csv')
df_609 = pd.read_csv('https://raw.githubusercontent.com/msperand/Machine_Learning_Project/main/Data/Predictions_for_mastermind/DA_FlauBERT_large_vs0_4_4epochs_rs42_dacs_1.csv')
df_612 = pd.read_csv('https://raw.githubusercontent.com/msperand/Machine_Learning_Project/main/Data/Predictions_for_mastermind/DA_full_FlauBERT_large_vs0_4_decay0_05_6epochs_rs42_dacs_1.csv')
df_630_2 = pd.read_csv('https://raw.githubusercontent.com/msperand/Machine_Learning_Project/main/Data/Predictions_for_mastermind/DA_full_FlauBERT_large_vs0_2_decay0_05_4epochs_rs42_dacs_1.csv')
df_635 = pd.read_csv('https://raw.githubusercontent.com/msperand/Machine_Learning_Project/main/Data/Predictions_for_mastermind/DA_full_FlauBERT_large_vs0_2_bs16_decay0_05_rs42_dacs_1.csv')

# Define Functions

In [None]:
# This function calculates the weighted average prediction amongst the predictions of various models
def megamind_weighted(df, accuracies):
    new_column = {}

    for id_val in df['id']:
        voting_counts = {}
        for column, accuracy in zip(df.columns[1:], accuracies):
            difficulty_level = df.loc[df['id'] == id_val, column].iloc[0]
            if difficulty_level not in voting_counts:
                voting_counts[difficulty_level] = 0
            voting_counts[difficulty_level] += accuracy

        optimized_prediction = max(voting_counts, key=voting_counts.get)
        new_column[id_val] = optimized_prediction

    df['difficulty'] = df['id'].map(new_column)

    df_final_weighted = df.loc[:, ['id', 'difficulty']]

    return df_final_weighted

In [None]:
# This function integrates the predictions of several models into the predictions of a given model
# in cases where all other models agree on the difficulty level.
def megamind_of_megamind_outliers(accuracies, df, df_best_accuracy):
    merged_df = pd.merge(df, df_best_accuracy, on='id')

    others = [col for col in merged_df.columns if col not in ['id', 'difficulty']]

    new_column_name = 'difficulty_better'

    def get_new_value(row):
        if all(row[c] == row[others[0]] for c in others[1:]):
            return row[others[0]]
        else:
            return row['difficulty']

    merged_df[new_column_name] = merged_df.apply(get_new_value, axis=1)

    df_final_outliers = merged_df.loc[:, ['id', new_column_name]]
    df_final_outliers.rename(columns={new_column_name: 'difficulty'}, inplace=True)

    return df_final_outliers

# Merge the various dataframes

In [None]:
def merge_on_id(left, right, suffix):
    return pd.merge(left, right, on='id', how='inner', suffixes=('', f'_{suffix}'))

dataframes_with_suffixes = [
    (df_630, '630'),
    (df_628, '628'),
    (df_630_2, '630_2'),
    (df_635, '635'),
    (df_611, '611'),
    (df_609, '609'),
    (df_612, '612'),
]

merged_df = dataframes_with_suffixes[0][0]

for df, suffix in dataframes_with_suffixes[1:]:
    merged_df = merge_on_id(merged_df, df, suffix)

merged_df.rename(columns={'difficulty': 'difficulty_630'}, inplace=True)

# Execute code

In [None]:
# This combination gives the best result of 65.2%

# Here we calculate the weighted average prediction amongst the models
accuracies = [630, 628, 630.1, 635]
df_int = merged_df.loc[:, ['id', 'difficulty_630', 'difficulty_628', 'difficulty_630_2', 'difficulty_635']]
df_meta = megamind_weighted(df_int, accuracies)

# Here we correct the previously obtained predictions with the remaining three models
accuracies = [611, 609, 612]
df_int = merged_df.loc[:, ['id', 'difficulty_611', 'difficulty_609', 'difficulty_612']]
df_meta = megamind_of_megamind_outliers(accuracies, df_int, df_meta)

df_meta

Unnamed: 0,id,difficulty
0,0,C2
1,1,B1
2,2,B1
3,3,B1
4,4,C2
...,...,...
1195,1195,B1
1196,1196,A2
1197,1197,C2
1198,1198,B2


In [None]:
df_meta.difficulty.value_counts()

difficulty
B2    214
A1    207
C1    204
A2    204
C2    186
B1    185
Name: count, dtype: int64

# Download predictions

In [None]:
df_meta.to_csv('Megamind_Final.csv',index=False)