In [4]:
import pandas as pd

# Load the SNLI test data (including true labels)
snli_test_path = "/kaggle/input/stanford-natural-language-inference-corpus/snli_1.0_test.csv"
snli_test_df = pd.read_csv(snli_test_path)

# Define file paths for SNLI prediction files
snli_predictions_paths = {
    "deberta": "/kaggle/input/deberta-nli/deberta_snli_predictions.csv",
    "roberta": "/kaggle/input/roberta/roberta_snli_predictions.csv",
    "albert": "/kaggle/input/albert/albert_snli_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_snli = "/kaggle/working/combined_snli_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_snli_df = pd.DataFrame(columns=columns)

label_mapping = {'entailment': 0, 'neutral': 1, 'contradiction': 2}

# Load and merge the predictions
for model, path in snli_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_snli_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_snli_df['True_Label'] = snli_test_df['gold_label'].map(label_mapping)

# Convert True_Label to integer type
combined_snli_df['True_Label'] = combined_snli_df['True_Label'].astype('Int64')

# Save the combined DataFrame to CSV
combined_snli_df.to_csv(output_csv_path_snli, index=False)

print(f"Combined SNLI predictions with true labels saved to {output_csv_path_snli}")


Combined SNLI predictions with true labels saved to /kaggle/working/combined_snli_df


In [5]:
combined_snli_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.034767,0.962592,0.002641,0.012451,0.927093,0.060457,0.008653,0.947434,0.043913,1
1,0.001921,0.319032,0.679047,0.752766,0.242251,0.004983,0.740332,0.256434,0.003235,0
2,0.998783,0.000764,0.000453,0.000254,0.004494,0.995253,0.004677,0.060481,0.934843,2
3,0.001001,0.997708,0.001291,0.005844,0.990736,0.003419,0.034056,0.956687,0.009257,1
4,0.00108,0.301363,0.697557,0.278348,0.718575,0.003076,0.498761,0.49927,0.001969,0


In [6]:
# Load the ANLI Round 1 test data (including true labels)
mnli_matched_test_path = "/kaggle/input/nli-dataset-for-sentence-understanding/mnli_validation_matched.csv"
mnli_matched_test_df = pd.read_csv(mnli_matched_test_path)

# Define file paths for ANLI Round 1 prediction files
mnli_matched_predictions_paths = {
    "deberta": "/kaggle/input/validation/deberta_mnli_matched_val_predictions.csv",
    "roberta": "/kaggle/input/validation/roberta_mnli_matched_val_predictions.csv",
    "albert": "/kaggle/input/validation/albert_mnli_matched_val_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_mnli_matched = "/kaggle/working/combined_mnli_matched_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_mnli_matched_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in mnli_matched_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_mnli_matched_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_mnli_matched_df['True_Label'] = mnli_matched_test_df['label']

# Save the combined DataFrame to CSV
combined_mnli_matched_df.to_csv(output_csv_path_mnli_matched, index=False)

print(f"Combined MNLI-matched predictions with true labels saved to {output_csv_path_mnli_matched}")


Combined MNLI-matched predictions with true labels saved to /kaggle/working/combined_mnli_matched_df


In [7]:
combined_mnli_matched_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.005164,0.993364,0.001472,0.017844,0.950246,0.031909,0.010844,0.983012,0.006144,1
1,0.999153,0.000526,0.000321,0.001413,0.00203,0.996557,0.005388,0.007536,0.987076,2
2,0.000989,0.044792,0.954219,0.954781,0.042249,0.00297,0.853862,0.143483,0.002655,0
3,0.994965,0.004808,0.000228,0.000343,0.003511,0.996146,0.004128,0.070757,0.925115,2
4,0.999657,0.00022,0.000123,7.9e-05,0.000496,0.999425,0.003864,0.029262,0.966875,2


In [8]:
# Load the ANLI Round 1 test data (including true labels)
mnli_mismatched_test_path = "/kaggle/input/nli-dataset-for-sentence-understanding/mnli_validation_mismatched.csv"
mnli_mismatched_test_df = pd.read_csv(mnli_mismatched_test_path)

# Define file paths for ANLI Round 1 prediction files
mnli_mismatched_predictions_paths = {
    "deberta": "/kaggle/input/validation/deberta_mnli_mismatched_val_predictions.csv",
    "roberta": "/kaggle/input/validation/roberta_mnli_mismatched_val_predictions.csv",
    "albert": "/kaggle/input/validation/albert_mnli_mismatched_val_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_mnli_mismatched = "/kaggle/working/combined_mnli_mismatched_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_mnli_mismatched_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in mnli_mismatched_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_mnli_mismatched_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_mnli_mismatched_df['True_Label'] = mnli_mismatched_test_df['label']

# Save the combined DataFrame to CSV
combined_mnli_mismatched_df.to_csv(output_csv_path_mnli_mismatched, index=False)

print(f"Combined MNLI-mismatched predictions with true labels saved to {output_csv_path_mnli_mismatched}")


Combined MNLI-mismatched predictions with true labels saved to /kaggle/working/combined_mnli_mismatched_df


In [9]:
combined_mnli_mismatched_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.999667,0.00016,0.000173,6.8e-05,0.000402,0.999529,0.000894,0.003787,0.995318,2
1,0.998119,0.000962,0.000919,0.000183,0.001511,0.998306,0.006421,0.010224,0.983355,2
2,0.000552,0.004809,0.994639,0.986062,0.01202,0.001918,0.975041,0.023354,0.001605,0
3,0.827653,0.171961,0.000386,0.000478,0.270953,0.728569,0.001722,0.796122,0.202156,2
4,0.000292,0.002875,0.996833,0.975167,0.021904,0.002929,0.965952,0.032748,0.0013,0


In [10]:
# Load the ANLI Round 1 test data (including true labels)
anli_r1_test_path = "/kaggle/input/anli-a-large-scale-nli-benchmark-dataset/test_r1.csv"
anli_r1_test_df = pd.read_csv(anli_r1_test_path)

# Define file paths for ANLI Round 1 prediction files
anli_r1_predictions_paths = {
    "deberta": "/kaggle/input/deberta-nli/deberta_anli_r1_predictions.csv",
    "roberta": "/kaggle/input/roberta/roberta_anli_r1_predictions.csv",
    "albert": "/kaggle/input/albert/albert_anli_r1_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_anli_r1 = "/kaggle/working/combined_anli_r1_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_anli_r1_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in anli_r1_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_anli_r1_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_anli_r1_df['True_Label'] = anli_r1_test_df['label']

# Save the combined DataFrame to CSV
combined_anli_r1_df.to_csv(output_csv_path_anli_r1, index=False)

print(f"Combined ANLI Round 1 predictions with true labels saved to {output_csv_path_anli_r1}")


Combined ANLI Round 1 predictions with true labels saved to /kaggle/working/combined_anli_r1_df


In [11]:
combined_anli_r1_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.015388,0.976305,0.008307,0.996714,0.000376,0.00291,0.322974,0.667628,0.009398,0
1,0.224603,0.501549,0.273848,0.87572,0.000724,0.123556,0.998526,0.000604,0.000869,0
2,0.006642,0.97669,0.016669,0.999484,0.00033,0.000186,0.783352,0.212241,0.004407,0
3,0.966494,0.032235,0.001272,0.000686,0.998181,0.001133,0.002134,0.989523,0.008343,1
4,0.880736,0.028293,0.090971,0.000378,0.000197,0.999425,0.023283,0.013253,0.963464,2


In [12]:
# Load the ANLI Round 2 test data (including true labels)
anli_r2_test_path = "/kaggle/input/anli-a-large-scale-nli-benchmark-dataset/test_r2.csv"
anli_r2_test_df = pd.read_csv(anli_r2_test_path)

# Define file paths for ANLI Round 2 prediction files
anli_r2_predictions_paths = {
    "deberta": "/kaggle/input/deberta-nli/deberta_anli_r2_predictions.csv",
    "roberta": "/kaggle/input/roberta/roberta_anli_r2_predictions.csv",
    "albert": "/kaggle/input/albert/albert_anli_r2_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_anli_r2 = "/kaggle/working/combined_anli_r2_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_anli_r2_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in anli_r2_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_anli_r2_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_anli_r2_df['True_Label'] = anli_r2_test_df['label']

# Save the combined DataFrame to CSV
combined_anli_r2_df.to_csv(output_csv_path_anli_r2, index=False)

print(f"Combined ANLI Round 2 predictions with true labels saved to {output_csv_path_anli_r2}")


Combined ANLI Round 2 predictions with true labels saved to /kaggle/working/combined_anli_r2_df


In [13]:
combined_anli_r2_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.001309,0.029617,0.969075,0.999506,0.000264,0.00023,0.863365,0.133388,0.003246,0
1,0.724144,0.273676,0.00218,0.026951,0.05423,0.918819,0.0729,0.904344,0.022756,1
2,0.071604,0.917894,0.010503,0.001282,0.998108,0.00061,0.027402,0.972218,0.00038,0
3,0.066162,0.929179,0.004659,0.007091,0.992694,0.000215,0.632171,0.365194,0.002635,1
4,0.906199,0.089873,0.003928,0.006259,0.989432,0.004309,0.064109,0.234642,0.701249,2


In [14]:
# Load the ANLI Round 3 test data (including true labels)
anli_r3_test_path = "/kaggle/input/anli-a-large-scale-nli-benchmark-dataset/test_r3.csv"
anli_r3_test_df = pd.read_csv(anli_r3_test_path)

# Define file paths for ANLI Round 2 prediction files
anli_r3_predictions_paths = {
    "deberta": "/kaggle/input/deberta-nli/deberta_anli_r3_predictions.csv",
    "roberta": "/kaggle/input/roberta/roberta_anli_r3_predictions.csv",
    "albert": "/kaggle/input/albert/albert_anli_r3_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_anli_r3 = "/kaggle/working/combined_anli_r3_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_anli_r3_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in anli_r3_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_anli_r3_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_anli_r3_df['True_Label'] = anli_r3_test_df['label']

# Save the combined DataFrame to CSV
combined_anli_r3_df.to_csv(output_csv_path_anli_r3, index=False)

print(f"Combined ANLI Round 3 predictions with true labels saved to {output_csv_path_anli_r3}")


Combined ANLI Round 3 predictions with true labels saved to /kaggle/working/combined_anli_r3_df


In [15]:
combined_anli_r3_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.005921,0.960529,0.033551,0.022959,0.976533,0.000509,0.001848,0.998084,6.7e-05,0
1,0.009586,0.934714,0.0557,0.999611,0.000205,0.000185,0.951772,0.048075,0.000153,0
2,0.003428,0.976393,0.020179,0.00202,0.997897,8.3e-05,0.001014,0.998984,2e-06,0
3,0.004633,0.023985,0.971382,0.974441,0.024459,0.0011,0.996749,0.000989,0.002262,0
4,0.017428,0.633695,0.348877,0.984416,0.011166,0.004419,0.000518,0.128416,0.871066,0


In [16]:
# Check for missing values
missing_values_anli1 = combined_anli_r1_df.isnull().sum()

missing_values_anli2 = combined_anli_r2_df.isnull().sum()

missing_values_anli3 = combined_anli_r3_df.isnull().sum()

missing_values_snli = combined_snli_df.isnull().sum()

missing_values_mnli_matched = combined_mnli_matched_df.isnull().sum()

missing_values_mnli_mismatched = combined_mnli_mismatched_df.isnull().sum()

In [17]:
missing_values_anli1

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [18]:
missing_values_anli2

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [19]:
missing_values_anli3

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [20]:
missing_values_snli

Deberta_Entailment         0
Deberta_Neutral            0
Deberta_Contradiction      0
Roberta_Entailment         0
Roberta_Neutral            0
Roberta_Contradiction      0
Albert_Entailment          0
Albert_Neutral             0
Albert_Contradiction       0
True_Label               176
dtype: int64

In [21]:
missing_values_mnli_matched

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [22]:
missing_values_mnli_mismatched

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [23]:
combined_snli_df.dropna(subset=['True_Label'], inplace=True)


In [24]:
# Verify missing values again after removal
missing_values_snli_after_removal = combined_snli_df.isnull().sum()
print(missing_values_snli_after_removal)


Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64


In [25]:
combined_snli_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9824 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Deberta_Entailment     9824 non-null   float64
 1   Deberta_Neutral        9824 non-null   float64
 2   Deberta_Contradiction  9824 non-null   float64
 3   Roberta_Entailment     9824 non-null   float64
 4   Roberta_Neutral        9824 non-null   float64
 5   Roberta_Contradiction  9824 non-null   float64
 6   Albert_Entailment      9824 non-null   float64
 7   Albert_Neutral         9824 non-null   float64
 8   Albert_Contradiction   9824 non-null   float64
 9   True_Label             9824 non-null   Int64  
dtypes: Int64(1), float64(9)
memory usage: 853.8 KB


In [26]:
import pandas as pd
from scipy.stats import entropy

def calculate_majority_vote_and_entropy(df):
    model_prefixes = ['Deberta', 'Roberta', 'Albert']
    labels = ['Entailment', 'Neutral', 'Contradiction']
    
    # Extract the highest probability prediction for each model
    max_labels = pd.DataFrame({
        model: df[[f"{model}_{label}" for label in labels]].idxmax(axis=1)
        for model in model_prefixes
    })

    # Calculate the majority vote (mode) for each row
    df['Majority_Vote'] = max_labels.mode(axis=1)[0]

    # Calculate entropy for each model's predictions
    for model in model_prefixes:
        df[f"{model}_Entropy"] = df.apply(
            lambda row: entropy([row[f"{model}_{label}"] for label in labels]),
            axis=1
        )

    return df


In [27]:
# Apply the function to each ANLI round DataFrame
enhanced_anli_r1_df = calculate_majority_vote_and_entropy(combined_anli_r1_df)
enhanced_anli_r2_df = calculate_majority_vote_and_entropy(combined_anli_r2_df)
enhanced_anli_r3_df = calculate_majority_vote_and_entropy(combined_anli_r3_df)

In [28]:
enhanced_anli_r1_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label,Majority_Vote,Deberta_Entropy,Roberta_Entropy,Albert_Entropy
0,0.015388,0.976305,0.008307,0.996714,0.000376,0.00291,0.322974,0.667628,0.009398,0,Albert_Neutral,0.12744,0.023238,0.67862
1,0.224603,0.501549,0.273848,0.87572,0.000724,0.123556,0.998526,0.000604,0.000869,0,Albert_Entailment,1.036206,0.379813,0.01208
2,0.006642,0.97669,0.016669,0.999484,0.00033,0.000186,0.783352,0.212241,0.004407,0,Albert_Entailment,0.124586,0.004758,0.544162
3,0.966494,0.032235,0.001272,0.000686,0.998181,0.001133,0.002134,0.989523,0.008343,1,Albert_Neutral,0.152133,0.0145,0.063478
4,0.880736,0.028293,0.090971,0.000378,0.000197,0.999425,0.023283,0.013253,0.963464,2,Albert_Contradiction,0.430795,0.005237,0.180706


In [29]:
enhanced_anli_r2_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label,Majority_Vote,Deberta_Entropy,Roberta_Entropy,Albert_Entropy
0,0.001309,0.029617,0.969075,0.999506,0.000264,0.00023,0.863365,0.133388,0.003246,0,Albert_Entailment,0.143363,0.004595,0.414155
1,0.724144,0.273676,0.00218,0.026951,0.05423,0.918819,0.0729,0.904344,0.022756,1,Albert_Neutral,0.60172,0.333241,0.367912
2,0.071604,0.917894,0.010503,0.001282,0.998108,0.00061,0.027402,0.972218,0.00038,0,Albert_Neutral,0.315282,0.014942,0.128952
3,0.066162,0.929179,0.004659,0.007091,0.992694,0.000215,0.632171,0.365194,0.002635,1,Albert_Entailment,0.272938,0.04419,0.67343
4,0.906199,0.089873,0.003928,0.006259,0.989432,0.004309,0.064109,0.234642,0.701249,2,Albert_Contradiction,0.327553,0.065739,0.765145


In [30]:
enhanced_anli_r3_df

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label,Majority_Vote,Deberta_Entropy,Roberta_Entropy,Albert_Entropy
0,0.005921,0.960529,0.033551,0.022959,0.976533,0.000509,0.001848,0.998084,0.000067,0,Albert_Neutral,0.182945,0.113695,0.014192
1,0.009586,0.934714,0.055700,0.999611,0.000205,0.000185,0.951772,0.048075,0.000153,0,Albert_Entailment,0.268505,0.003716,0.194300
2,0.003428,0.976393,0.020179,0.002020,0.997897,0.000083,0.001014,0.998984,0.000002,0,Albert_Neutral,0.121542,0.015412,0.008036
3,0.004633,0.023985,0.971382,0.974441,0.024459,0.001100,0.996749,0.000989,0.002262,0,Albert_Entailment,0.142577,0.123487,0.023867
4,0.017428,0.633695,0.348877,0.984416,0.011166,0.004419,0.000518,0.128416,0.871066,0,Albert_Contradiction,0.727042,0.089608,0.387728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,0.150312,0.806051,0.043637,0.032452,0.068553,0.898994,0.122045,0.254093,0.623862,2,Albert_Contradiction,0.595303,0.390702,0.899181
1196,0.971834,0.026294,0.001872,0.009070,0.824654,0.166276,0.001115,0.003229,0.995656,2,Albert_Contradiction,0.135193,0.499956,0.030436
1197,0.973818,0.025074,0.001109,0.000352,0.000972,0.998677,0.310862,0.618914,0.070225,2,Albert_Neutral,0.125800,0.010860,0.846682
1198,0.341781,0.226539,0.431681,0.006147,0.073669,0.920184,0.054172,0.317935,0.627893,2,Albert_Contradiction,1.065946,0.299983,0.814480


In [31]:
# Apply the function to each DataFrame
enhanced_snli_df = calculate_majority_vote_and_entropy(combined_snli_df)
enhanced_mnli_matched_df = calculate_majority_vote_and_entropy(combined_mnli_matched_df)
enhanced_mnli_mismatched_df = calculate_majority_vote_and_entropy(combined_mnli_mismatched_df)

In [32]:
from sklearn.utils import resample

def oversample_high_uncertainty(df):
    # Identify high entropy samples as before
    entropy_threshold = df[['Deberta_Entropy', 'Roberta_Entropy', 'Albert_Entropy']].quantile(0.75)
    high_entropy_cases = df[
        (df['Deberta_Entropy'] > entropy_threshold['Deberta_Entropy']) |
        (df['Roberta_Entropy'] > entropy_threshold['Roberta_Entropy']) |
        (df['Albert_Entropy'] > entropy_threshold['Albert_Entropy'])
    ]
    
    # Resample these high entropy cases
    oversampled_high_entropy_cases = resample(high_entropy_cases,
                                              replace=True,  # Sample with replacement
                                              n_samples=len(df) - len(high_entropy_cases),  # to match majority class size
                                              random_state=42)  # for reproducibility
    
    # Combine with the rest of the dataset
    oversampled_df = pd.concat([df[~df.index.isin(high_entropy_cases.index)], oversampled_high_entropy_cases])
    
    return oversampled_df



In [33]:
augmented_anli_r1_df = oversample_high_uncertainty(enhanced_anli_r1_df)
augmented_anli_r2_df = oversample_high_uncertainty(enhanced_anli_r2_df)
augmented_anli_r3_df = oversample_high_uncertainty(enhanced_anli_r3_df)

In [34]:
augmented_anli_r1_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label,Majority_Vote,Deberta_Entropy,Roberta_Entropy,Albert_Entropy
3,0.966494,0.032235,0.001272,0.000686,0.998181,0.001133,0.002134,0.989523,0.008343,1,Albert_Neutral,0.152133,0.0145,0.063478
4,0.880736,0.028293,0.090971,0.000378,0.000197,0.999425,0.023283,0.013253,0.963464,2,Albert_Contradiction,0.430795,0.005237,0.180706
5,0.89474,0.102937,0.002323,0.000314,0.999132,0.000554,0.01128,0.96826,0.020459,1,Albert_Neutral,0.347645,0.007554,0.161392
7,0.982861,0.009957,0.007182,0.014728,0.000938,0.984333,0.003744,0.002637,0.993619,2,Albert_Contradiction,0.098341,0.08421,0.04294
10,0.004359,0.070512,0.925129,0.999762,4.8e-05,0.00019,0.999628,0.000349,2.2e-05,0,Albert_Entailment,0.282684,0.002344,0.003391


In [35]:
augmented_snli_df = oversample_high_uncertainty(enhanced_snli_df)
augmented_mnli_matched_df = oversample_high_uncertainty(enhanced_mnli_matched_df)
augmented_mnli_mismatched_df = oversample_high_uncertainty(enhanced_mnli_mismatched_df)

In [36]:
from sklearn.model_selection import train_test_split

# Function to split data
def split_data(df):
    X = df.drop(columns=['True_Label','Majority_Vote'])  # Features
    y = df['True_Label']                 # Labels
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_train, X_val, y_train, y_val

In [37]:
X_train_anli_augmented_r1, X_val_anli_augmented_r1, y_train_anli_augmented_r1, y_val_anli_augmented_r1 = split_data(augmented_anli_r1_df)
X_train_anli_augmented_r2, X_val_anli_augmented_r2, y_train_anli_augmented_r2, y_val_anli_augmented_r2 = split_data(augmented_anli_r2_df)
X_train_anli_augmented_r3, X_val_anli_augmented_r3, y_train_anli_augmented_r3, y_val_anli_augmented_r3 = split_data(augmented_anli_r3_df)

In [38]:
X_train_anli_augmented_r1.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,Deberta_Entropy,Roberta_Entropy,Albert_Entropy
20,0.921616,0.076885,0.001498,0.000209,0.999182,0.000608,1.1e-05,0.999427,0.000562,0.282216,0.007094,0.004903
827,0.006233,0.921876,0.071891,0.928338,0.007008,0.064654,0.011294,0.984312,0.004394,0.295899,0.280862,0.090048
807,0.937774,0.061132,0.001094,0.000106,0.999656,0.000238,0.011799,0.981802,0.006398,0.238553,0.003301,0.10274
90,0.038827,0.874887,0.086286,0.997415,0.000663,0.001922,0.623703,0.372358,0.003939,0.454481,0.019453,0.684102
817,0.049529,0.66159,0.288881,0.432895,0.559573,0.007533,0.543398,0.454248,0.002354,0.780869,0.724147,0.704121


In [39]:
y_train_anli_augmented_r1

20     1
827    0
807    1
90     0
817    1
      ..
230    1
584    2
808    1
960    0
221    0
Name: True_Label, Length: 723, dtype: int64

In [40]:
X_train_anli_augmented_snli, X_val_anli_augmented_snli, y_train_anli_augmented_snli, y_val_anli_augmented_snli = split_data(augmented_snli_df)
X_train_anli_augmented_mnli_matched, X_val_anli_augmented_mnli_matched, y_train_anli_augmented_mnli_matched, y_val_anli_augmented_mnli_matched = split_data(augmented_mnli_matched_df)
X_train_anli_augmented_mnli_mismatched, X_val_anli_augmented_mnli_mismatched, y_train_anli_augmented_mnli_mismatched, y_val_anli_augmented_mnli_mismatched = split_data(augmented_mnli_mismatched_df)

In [41]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Define the logistic regression model with a hyperparameter grid for cross-validation
param_grid = {
    'C': [0.01, 0.1, 1, 10],  # Regularization strengths
    'solver': ['lbfgs', 'liblinear', 'saga'],  # Optimization algorithms
    'max_iter': [1000]  # Increased iteration limit to ensure convergence
}

# Function to train and evaluate using logistic regression
def train_and_evaluate_L(X_train, y_train, X_val, y_val):
    model = LogisticRegression(random_state=42)
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    
    # Get the best model from grid search
    best_model = grid_search.best_estimator_
    
    # Evaluate the model using the validation set
    predictions = best_model.predict(X_val)
    accuracy = accuracy_score(y_val, predictions)
    print("Validation Accuracy:", accuracy)
    
    return best_model, grid_search.best_params_, grid_search.best_score_



In [42]:
# Train and evaluate on ANLI Round 1 dataset
best_model_anli_r1, best_params_r1, best_score_r1 = train_and_evaluate_L(X_train_anli_augmented_r1, y_train_anli_augmented_r1, X_val_anli_augmented_r1, y_val_anli_augmented_r1)
print("Best Parameters for ANLI R1:", best_params_r1)
print("Best Score for ANLI R1 (CV Accuracy):", best_score_r1)


Validation Accuracy: 0.7790055248618785
Best Parameters for ANLI R1: {'C': 10, 'max_iter': 1000, 'solver': 'liblinear'}
Best Score for ANLI R1 (CV Accuracy): 0.733103448275862


In [43]:
# Train and evaluate on ANLI Round 2 dataset
best_model_anli_r2, best_params_r2, best_score_r2 = train_and_evaluate_L(X_train_anli_augmented_r2, y_train_anli_augmented_r2, X_val_anli_augmented_r2, y_val_anli_augmented_r2)
print("Best Parameters for ANLI R2:", best_params_r2)
print("Best Score for ANLI R2 (CV Accuracy):", best_score_r2)


Validation Accuracy: 0.7102272727272727
Best Parameters for ANLI R2: {'C': 1, 'max_iter': 1000, 'solver': 'liblinear'}
Best Score for ANLI R2 (CV Accuracy): 0.7037284701114488


In [44]:
# Train and evaluate on ANLI Round 3 dataset
best_model_anli_r3, best_params_r3, best_score_r3 = train_and_evaluate_L(X_train_anli_augmented_r3, y_train_anli_augmented_r3, X_val_anli_augmented_r3, y_val_anli_augmented_r3)
print("Best Parameters for ANLI R3:", best_params_r3)
print("Best Score for ANLI R3 (CV Accuracy):", best_score_r3)

Validation Accuracy: 0.7072072072072072
Best Parameters for ANLI R3: {'C': 10, 'max_iter': 1000, 'solver': 'lbfgs'}
Best Score for ANLI R3 (CV Accuracy): 0.683971307052625


In [45]:
# Apply to each dataset
best_model_snli, best_params_snli, best_score_snli = train_and_evaluate_L(X_train_anli_augmented_snli, y_train_anli_augmented_snli, X_val_anli_augmented_snli, y_val_anli_augmented_snli)
print("Best Parameters for SNLI:", best_params_snli)
print("Best Score for SNLI (CV Accuracy):", best_score_snli)

Validation Accuracy: 0.9118629908103593
Best Parameters for SNLI: {'C': 10, 'max_iter': 1000, 'solver': 'liblinear'}
Best Score for SNLI (CV Accuracy): 0.9103621248953022


In [46]:
# Apply to each dataset
best_model_mnli_matched, best_params_mnli_matched, best_score_mnli_matched = train_and_evaluate_L(X_train_anli_augmented_mnli_matched, y_train_anli_augmented_mnli_matched, X_val_anli_augmented_mnli_matched, y_val_anli_augmented_mnli_matched)
print("Best Parameters for MNLI-matched:", best_params_mnli_matched)
print("Best Score for MNLI-matched (CV Accuracy):", best_score_mnli_matched)

Validation Accuracy: 0.8909541511771994
Best Parameters for MNLI-matched: {'C': 0.01, 'max_iter': 1000, 'solver': 'lbfgs'}
Best Score for MNLI-matched (CV Accuracy): 0.8976554333403021


In [47]:
# Apply to each dataset
best_model_mnli_mismatched, best_params_mnli_mismatched, best_score_mnli_mismatched= train_and_evaluate_L(X_train_anli_augmented_mnli_mismatched, y_train_anli_augmented_mnli_mismatched, X_val_anli_augmented_mnli_mismatched, y_val_anli_augmented_mnli_mismatched)
print("Best Parameters for MNLI-mismatched:", best_params_mnli_mismatched)
print("Best Score for MNLI-mismatched (CV Accuracy):", best_score_mnli_mismatched)

Validation Accuracy: 0.8951048951048951
Best Parameters for MNLI-mismatched: {'C': 0.1, 'max_iter': 1000, 'solver': 'lbfgs'}
Best Score for MNLI-mismatched (CV Accuracy): 0.89334602811895


In [48]:
# Concatenate all augmented dataframes into one
all_augmented_df = pd.concat([
    augmented_anli_r1_df,
    augmented_anli_r2_df,
    augmented_anli_r3_df,
    augmented_snli_df,
    augmented_mnli_matched_df,
    augmented_mnli_mismatched_df
], ignore_index=True)


In [49]:
from sklearn.model_selection import train_test_split

X = all_augmented_df.drop(columns=['True_Label','Majority_Vote'])
y = all_augmented_df['True_Label']


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [50]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 31291 entries, 34640 to 15795
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Deberta_Entailment     31291 non-null  float64
 1   Deberta_Neutral        31291 non-null  float64
 2   Deberta_Contradiction  31291 non-null  float64
 3   Roberta_Entailment     31291 non-null  float64
 4   Roberta_Neutral        31291 non-null  float64
 5   Roberta_Contradiction  31291 non-null  float64
 6   Albert_Entailment      31291 non-null  float64
 7   Albert_Neutral         31291 non-null  float64
 8   Albert_Contradiction   31291 non-null  float64
 9   Deberta_Entropy        31291 non-null  float64
 10  Roberta_Entropy        31291 non-null  float64
 11  Albert_Entropy         31291 non-null  float64
dtypes: float64(12)
memory usage: 3.1 MB


In [51]:
# Assuming your function is already defined as provided in previous discussions
best_model, best_params, best_score = train_and_evaluate_L(X_train, y_train, X_val, y_val)

print("Validation Accuracy on Combined Data:", best_score)
print("Best Parameters for the Unified Model:", best_params)


Validation Accuracy: 0.8843154799948869
Validation Accuracy on Combined Data: 0.8820745438808448
Best Parameters for the Unified Model: {'C': 0.1, 'max_iter': 1000, 'solver': 'liblinear'}


In [52]:
import joblib
# Assuming 'best_model' is your trained Logistic Regression model
joblib_file = "/kaggle/working/LR_entropy.pkl"  # Define your own path to save the file
joblib.dump(best_model, joblib_file)
print("Model saved successfully at:", joblib_file)


Model saved successfully at: /kaggle/working/LR_entropy.pkl


In [53]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score


# Define the parameter grid
param_grid = {
    'max_depth': [3, 5, 7],  # Depth of each tree
    'n_estimators': [50, 100, 200],  # Number of trees in the forest
    'learning_rate': [0.01, 0.1, 0.2]  # Step size shrinkage used to prevent overfitting
}

# Initialize the XGBoost classifier
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)

# Configure GridSearchCV
grid_search = GridSearchCV(xgb_model, param_grid, cv=5, scoring='accuracy', verbose=1)

def train_and_evaluate(X_train, y_train, X_val, y_val):
    # Fit the GridSearchCV object to the training data
    grid_search.fit(X_train, y_train)

    # Get the best model from grid search
    best_model = grid_search.best_estimator_

    # Evaluate the model using the validation set
    predictions = best_model.predict(X_val)
    accuracy = accuracy_score(y_val, predictions)
    print("Validation Accuracy:", accuracy)

    return best_model, grid_search.best_params_, grid_search.best_score_

# Apply to each dataset
best_model_anli_r1, best_params_r1, best_score_r1 = train_and_evaluate(X_train_anli_augmented_r1, y_train_anli_augmented_r1, X_val_anli_augmented_r1, y_val_anli_augmented_r1)
print("Best Parameters for ANLI R1:", best_params_r1)
print("Best Score for ANLI R1 (CV Accuracy):", best_score_r1)



Fitting 5 folds for each of 27 candidates, totalling 135 fits
Validation Accuracy: 0.861878453038674
Best Parameters for ANLI R1: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 50}
Best Score for ANLI R1 (CV Accuracy): 0.7870881226053641


In [54]:
best_model_anli_r2, best_params_r2, best_score_r2 = train_and_evaluate(X_train_anli_augmented_r2, y_train_anli_augmented_r2, X_val_anli_augmented_r2, y_val_anli_augmented_r2)
print("Best Parameters for ANLI R2:", best_params_r2)
print("Best Score for ANLI R2 (CV Accuracy):", best_score_r2)


Fitting 5 folds for each of 27 candidates, totalling 135 fits
Validation Accuracy: 0.7784090909090909
Best Parameters for ANLI R2: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200}
Best Score for ANLI R2 (CV Accuracy): 0.7321985815602837


In [55]:
best_model_anli_r3, best_params_r3, best_score_r3 = train_and_evaluate(X_train_anli_augmented_r3, y_train_anli_augmented_r3, X_val_anli_augmented_r3, y_val_anli_augmented_r3)
print("Best Parameters for ANLI R3:", best_params_r3)
print("Best Score for ANLI R3 (CV Accuracy):", best_score_r3)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
Validation Accuracy: 0.7657657657657657
Best Parameters for ANLI R3: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}
Best Score for ANLI R3 (CV Accuracy): 0.7720116803148607


In [56]:
# Apply to each dataset
best_model_snli, best_params_snli, best_score_snli = train_and_evaluate(X_train_anli_augmented_snli, y_train_anli_augmented_snli, X_val_anli_augmented_snli, y_val_anli_augmented_snli)
print("Best Parameters for SNLI:", best_params_snli)
print("Best Score for SNLI (CV Accuracy):", best_score_snli)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
Validation Accuracy: 0.974937343358396
Best Parameters for SNLI: {'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 200}
Best Score for SNLI (CV Accuracy): 0.9619716749742858


In [57]:
# Apply to each dataset
best_model_mnli_matched, best_params_mnli_matched, best_score_mnli_matched = train_and_evaluate(X_train_anli_augmented_mnli_matched, y_train_anli_augmented_mnli_matched, X_val_anli_augmented_mnli_matched, y_val_anli_augmented_mnli_matched)
print("Best Parameters for MNLI-matched:", best_params_mnli_matched)
print("Best Score for MNLI-matched (CV Accuracy):", best_score_mnli_matched)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
Validation Accuracy: 0.9570425444031392
Best Parameters for MNLI-matched: {'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 200}
Best Score for MNLI-matched (CV Accuracy): 0.958690592506944


In [58]:
# Apply to each dataset
best_model_mnli_mismatched, best_params_mnli_mismatched, best_score_mnli_mismatched= train_and_evaluate(X_train_anli_augmented_mnli_mismatched, y_train_anli_augmented_mnli_mismatched, X_val_anli_augmented_mnli_mismatched, y_val_anli_augmented_mnli_mismatched)
print("Best Parameters for MNLI-mismatched:", best_params_mnli_mismatched)
print("Best Score for MNLI-mismatched (CV Accuracy):", best_score_mnli_mismatched)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
Validation Accuracy: 0.9703825586178527
Best Parameters for MNLI-mismatched: {'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 100}
Best Score for MNLI-mismatched (CV Accuracy): 0.9550546933680325


In [59]:
# Assuming your function is already defined as provided in previous discussions
best_model, best_params, best_score = train_and_evaluate(X_train, y_train, X_val, y_val)

print("Validation Accuracy on Combined Data:", best_score)
print("Best Parameters for the Unified Model:", best_params)


Fitting 5 folds for each of 27 candidates, totalling 135 fits
Validation Accuracy: 0.9477182666496229
Validation Accuracy on Combined Data: 0.9415486327365169
Best Parameters for the Unified Model: {'learning_rate': 0.2, 'max_depth': 7, 'n_estimators': 200}


In [60]:
# Assuming 'best_model' is your trained XGBoost model from the last training session
joblib_file = "/kaggle/working/XGBoost_entropy.pkl"  # Define your own path to save the file
joblib.dump(best_model, joblib_file)
print("XGBoost model saved successfully at:", joblib_file)


XGBoost model saved successfully at: /kaggle/working/XGBoost_entropy.pkl
