In [2]:
import pandas as pd

# Load the SNLI test data (including true labels)
snli_test_path = "/kaggle/input/stanford-natural-language-inference-corpus/snli_1.0_test.csv"
snli_test_df = pd.read_csv(snli_test_path)

# Define file paths for SNLI prediction files
snli_predictions_paths = {
    "deberta": "/kaggle/input/deberta-nli/deberta_snli_predictions.csv",
    "roberta": "/kaggle/input/roberta/roberta_snli_predictions.csv",
    "albert": "/kaggle/input/albert/albert_snli_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_snli = "/kaggle/working/combined_snli_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_snli_df = pd.DataFrame(columns=columns)

label_mapping = {'entailment': 0, 'neutral': 1, 'contradiction': 2}

# Load and merge the predictions
for model, path in snli_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_snli_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_snli_df['True_Label'] = snli_test_df['gold_label'].map(label_mapping)

# Convert True_Label to integer type
combined_snli_df['True_Label'] = combined_snli_df['True_Label'].astype('Int64')

# Save the combined DataFrame to CSV
combined_snli_df.to_csv(output_csv_path_snli, index=False)

print(f"Combined SNLI predictions with true labels saved to {output_csv_path_snli}")


Combined SNLI predictions with true labels saved to /kaggle/working/combined_snli_df


In [3]:
combined_snli_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.034767,0.962592,0.002641,0.012451,0.927093,0.060457,0.008653,0.947434,0.043913,1
1,0.001921,0.319032,0.679047,0.752766,0.242251,0.004983,0.740332,0.256434,0.003235,0
2,0.998783,0.000764,0.000453,0.000254,0.004494,0.995253,0.004677,0.060481,0.934843,2
3,0.001001,0.997708,0.001291,0.005844,0.990736,0.003419,0.034056,0.956687,0.009257,1
4,0.00108,0.301363,0.697557,0.278348,0.718575,0.003076,0.498761,0.49927,0.001969,0


In [4]:
# Load the ANLI Round 1 test data (including true labels)
mnli_matched_test_path = "/kaggle/input/nli-dataset-for-sentence-understanding/mnli_validation_matched.csv"
mnli_matched_test_df = pd.read_csv(mnli_matched_test_path)

# Define file paths for ANLI Round 1 prediction files
mnli_matched_predictions_paths = {
    "deberta": "/kaggle/input/validation/deberta_mnli_matched_val_predictions.csv",
    "roberta": "/kaggle/input/validation/roberta_mnli_matched_val_predictions.csv",
    "albert": "/kaggle/input/validation/albert_mnli_matched_val_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_mnli_matched = "/kaggle/working/combined_mnli_matched_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_mnli_matched_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in mnli_matched_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_mnli_matched_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_mnli_matched_df['True_Label'] = mnli_matched_test_df['label']

# Save the combined DataFrame to CSV
combined_mnli_matched_df.to_csv(output_csv_path_mnli_matched, index=False)

print(f"Combined MNLI-matched predictions with true labels saved to {output_csv_path_mnli_matched}")


Combined MNLI-matched predictions with true labels saved to /kaggle/working/combined_mnli_matched_df


In [5]:
combined_mnli_matched_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.005164,0.993364,0.001472,0.017844,0.950246,0.031909,0.010844,0.983012,0.006144,1
1,0.999153,0.000526,0.000321,0.001413,0.00203,0.996557,0.005388,0.007536,0.987076,2
2,0.000989,0.044792,0.954219,0.954781,0.042249,0.00297,0.853862,0.143483,0.002655,0
3,0.994965,0.004808,0.000228,0.000343,0.003511,0.996146,0.004128,0.070757,0.925115,2
4,0.999657,0.00022,0.000123,7.9e-05,0.000496,0.999425,0.003864,0.029262,0.966875,2


In [6]:
# Load the ANLI Round 1 test data (including true labels)
mnli_mismatched_test_path = "/kaggle/input/nli-dataset-for-sentence-understanding/mnli_validation_mismatched.csv"
mnli_mismatched_test_df = pd.read_csv(mnli_mismatched_test_path)

# Define file paths for ANLI Round 1 prediction files
mnli_mismatched_predictions_paths = {
    "deberta": "/kaggle/input/validation/deberta_mnli_mismatched_val_predictions.csv",
    "roberta": "/kaggle/input/validation/roberta_mnli_mismatched_val_predictions.csv",
    "albert": "/kaggle/input/validation/albert_mnli_mismatched_val_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_mnli_mismatched = "/kaggle/working/combined_mnli_mismatched_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_mnli_mismatched_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in mnli_mismatched_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_mnli_mismatched_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_mnli_mismatched_df['True_Label'] = mnli_mismatched_test_df['label']

# Save the combined DataFrame to CSV
combined_mnli_mismatched_df.to_csv(output_csv_path_mnli_mismatched, index=False)

print(f"Combined MNLI-mismatched predictions with true labels saved to {output_csv_path_mnli_mismatched}")


Combined MNLI-mismatched predictions with true labels saved to /kaggle/working/combined_mnli_mismatched_df


In [7]:
combined_mnli_mismatched_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.999667,0.00016,0.000173,6.8e-05,0.000402,0.999529,0.000894,0.003787,0.995318,2
1,0.998119,0.000962,0.000919,0.000183,0.001511,0.998306,0.006421,0.010224,0.983355,2
2,0.000552,0.004809,0.994639,0.986062,0.01202,0.001918,0.975041,0.023354,0.001605,0
3,0.827653,0.171961,0.000386,0.000478,0.270953,0.728569,0.001722,0.796122,0.202156,2
4,0.000292,0.002875,0.996833,0.975167,0.021904,0.002929,0.965952,0.032748,0.0013,0


In [8]:
# Load the ANLI Round 1 test data (including true labels)
anli_r1_test_path = "/kaggle/input/anli-a-large-scale-nli-benchmark-dataset/test_r1.csv"
anli_r1_test_df = pd.read_csv(anli_r1_test_path)

# Define file paths for ANLI Round 1 prediction files
anli_r1_predictions_paths = {
    "deberta": "/kaggle/input/deberta-nli/deberta_anli_r1_predictions.csv",
    "roberta": "/kaggle/input/roberta/roberta_anli_r1_predictions.csv",
    "albert": "/kaggle/input/albert/albert_anli_r1_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_anli_r1 = "/kaggle/working/combined_anli_r1_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_anli_r1_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in anli_r1_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_anli_r1_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_anli_r1_df['True_Label'] = anli_r1_test_df['label']

# Save the combined DataFrame to CSV
combined_anli_r1_df.to_csv(output_csv_path_anli_r1, index=False)

print(f"Combined ANLI Round 1 predictions with true labels saved to {output_csv_path_anli_r1}")


Combined ANLI Round 1 predictions with true labels saved to /kaggle/working/combined_anli_r1_df


In [9]:
combined_anli_r1_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.015388,0.976305,0.008307,0.996714,0.000376,0.00291,0.322974,0.667628,0.009398,0
1,0.224603,0.501549,0.273848,0.87572,0.000724,0.123556,0.998526,0.000604,0.000869,0
2,0.006642,0.97669,0.016669,0.999484,0.00033,0.000186,0.783352,0.212241,0.004407,0
3,0.966494,0.032235,0.001272,0.000686,0.998181,0.001133,0.002134,0.989523,0.008343,1
4,0.880736,0.028293,0.090971,0.000378,0.000197,0.999425,0.023283,0.013253,0.963464,2


In [10]:
# Load the ANLI Round 2 test data (including true labels)
anli_r2_test_path = "/kaggle/input/anli-a-large-scale-nli-benchmark-dataset/test_r2.csv"
anli_r2_test_df = pd.read_csv(anli_r2_test_path)

# Define file paths for ANLI Round 2 prediction files
anli_r2_predictions_paths = {
    "deberta": "/kaggle/input/deberta-nli/deberta_anli_r2_predictions.csv",
    "roberta": "/kaggle/input/roberta/roberta_anli_r2_predictions.csv",
    "albert": "/kaggle/input/albert/albert_anli_r2_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_anli_r2 = "/kaggle/working/combined_anli_r2_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_anli_r2_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in anli_r2_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_anli_r2_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_anli_r2_df['True_Label'] = anli_r2_test_df['label']

# Save the combined DataFrame to CSV
combined_anli_r2_df.to_csv(output_csv_path_anli_r2, index=False)

print(f"Combined ANLI Round 2 predictions with true labels saved to {output_csv_path_anli_r2}")


Combined ANLI Round 2 predictions with true labels saved to /kaggle/working/combined_anli_r2_df


In [11]:
combined_anli_r2_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.001309,0.029617,0.969075,0.999506,0.000264,0.00023,0.863365,0.133388,0.003246,0
1,0.724144,0.273676,0.00218,0.026951,0.05423,0.918819,0.0729,0.904344,0.022756,1
2,0.071604,0.917894,0.010503,0.001282,0.998108,0.00061,0.027402,0.972218,0.00038,0
3,0.066162,0.929179,0.004659,0.007091,0.992694,0.000215,0.632171,0.365194,0.002635,1
4,0.906199,0.089873,0.003928,0.006259,0.989432,0.004309,0.064109,0.234642,0.701249,2


In [12]:
# Load the ANLI Round 3 test data (including true labels)
anli_r3_test_path = "/kaggle/input/anli-a-large-scale-nli-benchmark-dataset/test_r3.csv"
anli_r3_test_df = pd.read_csv(anli_r3_test_path)

# Define file paths for ANLI Round 2 prediction files
anli_r3_predictions_paths = {
    "deberta": "/kaggle/input/deberta-nli/deberta_anli_r3_predictions.csv",
    "roberta": "/kaggle/input/roberta/roberta_anli_r3_predictions.csv",
    "albert": "/kaggle/input/albert/albert_anli_r3_predictions.csv",
}

# Specify where you want to save the combined predictions CSV file
output_csv_path_anli_r3 = "/kaggle/working/combined_anli_r3_df"

# Define the column names, placing True_Label at the end
columns = [
    'Deberta_Entailment', 'Deberta_Neutral', 'Deberta_Contradiction',
    'Roberta_Entailment', 'Roberta_Neutral', 'Roberta_Contradiction',
    'Albert_Entailment', 'Albert_Neutral', 'Albert_Contradiction',
    'True_Label'  # Ensuring True_Label is the last column
]

# Initialize the DataFrame with specified columns
combined_anli_r3_df = pd.DataFrame(columns=columns)

# Load and merge the predictions
for model, path in anli_r3_predictions_paths.items():
    predictions_df = pd.read_csv(path)
    for label in ['Entailment', 'Neutral', 'Contradiction']:
        combined_anli_r3_df[f"{model.capitalize()}_{label}"] = predictions_df[label]

# Assign the true labels to the True_Label column, now positioned at the end
combined_anli_r3_df['True_Label'] = anli_r3_test_df['label']

# Save the combined DataFrame to CSV
combined_anli_r3_df.to_csv(output_csv_path_anli_r3, index=False)

print(f"Combined ANLI Round 3 predictions with true labels saved to {output_csv_path_anli_r3}")


Combined ANLI Round 3 predictions with true labels saved to /kaggle/working/combined_anli_r3_df


In [13]:
combined_anli_r3_df.head()

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.005921,0.960529,0.033551,0.022959,0.976533,0.000509,0.001848,0.998084,6.7e-05,0
1,0.009586,0.934714,0.0557,0.999611,0.000205,0.000185,0.951772,0.048075,0.000153,0
2,0.003428,0.976393,0.020179,0.00202,0.997897,8.3e-05,0.001014,0.998984,2e-06,0
3,0.004633,0.023985,0.971382,0.974441,0.024459,0.0011,0.996749,0.000989,0.002262,0
4,0.017428,0.633695,0.348877,0.984416,0.011166,0.004419,0.000518,0.128416,0.871066,0


In [14]:
# Check for missing values
missing_values_anli1 = combined_anli_r1_df.isnull().sum()

missing_values_anli2 = combined_anli_r2_df.isnull().sum()

missing_values_anli3 = combined_anli_r3_df.isnull().sum()

missing_values_snli = combined_snli_df.isnull().sum()

missing_values_mnli_matched = combined_mnli_matched_df.isnull().sum()

missing_values_mnli_mismatched = combined_mnli_mismatched_df.isnull().sum()

In [15]:
missing_values_anli1

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [16]:
missing_values_anli2

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [17]:
missing_values_anli3

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [18]:
missing_values_snli

Deberta_Entailment         0
Deberta_Neutral            0
Deberta_Contradiction      0
Roberta_Entailment         0
Roberta_Neutral            0
Roberta_Contradiction      0
Albert_Entailment          0
Albert_Neutral             0
Albert_Contradiction       0
True_Label               176
dtype: int64

In [19]:
missing_values_mnli_matched

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [20]:
missing_values_mnli_mismatched

Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64

In [21]:
combined_snli_df.dropna(subset=['True_Label'], inplace=True)


In [22]:
# Verify missing values again after removal
missing_values_snli_after_removal = combined_snli_df.isnull().sum()
print(missing_values_snli_after_removal)


Deberta_Entailment       0
Deberta_Neutral          0
Deberta_Contradiction    0
Roberta_Entailment       0
Roberta_Neutral          0
Roberta_Contradiction    0
Albert_Entailment        0
Albert_Neutral           0
Albert_Contradiction     0
True_Label               0
dtype: int64


In [23]:
combined_snli_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9824 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Deberta_Entailment     9824 non-null   float64
 1   Deberta_Neutral        9824 non-null   float64
 2   Deberta_Contradiction  9824 non-null   float64
 3   Roberta_Entailment     9824 non-null   float64
 4   Roberta_Neutral        9824 non-null   float64
 5   Roberta_Contradiction  9824 non-null   float64
 6   Albert_Entailment      9824 non-null   float64
 7   Albert_Neutral         9824 non-null   float64
 8   Albert_Contradiction   9824 non-null   float64
 9   True_Label             9824 non-null   Int64  
dtypes: Int64(1), float64(9)
memory usage: 853.8 KB


In [24]:
combined_snli_df

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label
0,0.034767,0.962592,0.002641,0.012451,0.927093,0.060457,0.008653,0.947434,0.043913,1
1,0.001921,0.319032,0.679047,0.752766,0.242251,0.004983,0.740332,0.256434,0.003235,0
2,0.998783,0.000764,0.000453,0.000254,0.004494,0.995253,0.004677,0.060481,0.934843,2
3,0.001001,0.997708,0.001291,0.005844,0.990736,0.003419,0.034056,0.956687,0.009257,1
4,0.001080,0.301363,0.697557,0.278348,0.718575,0.003076,0.498761,0.499270,0.001969,0
...,...,...,...,...,...,...,...,...,...,...
9995,0.998825,0.001033,0.000142,0.001264,0.028942,0.969794,0.006420,0.057240,0.936340,2
9996,0.000704,0.009793,0.989503,0.780946,0.217053,0.002001,0.894637,0.104095,0.001267,0
9997,0.999171,0.000493,0.000336,0.000054,0.000765,0.999181,0.000838,0.002670,0.996493,2
9998,0.000267,0.002178,0.997556,0.983402,0.015884,0.000714,0.984347,0.015223,0.000430,0


In [25]:
import numpy as np

def calculate_margin(row):
    # Assuming the row only contains the probabilities
    sorted_probs = np.sort(row)  # Sort probabilities in ascending order
    if len(sorted_probs) > 1:
        return sorted_probs[-1] - sorted_probs[-2]  # Difference between the highest and second highest
    else:
        return 0  # This handles the edge case where there is only one probability value



In [26]:
# Applying to a sample DataFrame with made-up column names
combined_snli_df['confidence_margin_entailment'] = combined_snli_df[['Deberta_Entailment', 'Roberta_Entailment', 'Albert_Entailment']].apply(calculate_margin, axis=1)
combined_snli_df['confidence_margin_neutral'] = combined_snli_df[['Deberta_Neutral', 'Roberta_Neutral', 'Albert_Neutral']].apply(calculate_margin, axis=1)
combined_snli_df['confidence_margin_contradiction'] = combined_snli_df[['Deberta_Contradiction', 'Roberta_Contradiction', 'Albert_Contradiction']].apply(calculate_margin, axis=1)


In [27]:
combined_snli_df

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label,confidence_margin_entailment,confidence_margin_neutral,confidence_margin_contradiction
0,0.034767,0.962592,0.002641,0.012451,0.927093,0.060457,0.008653,0.947434,0.043913,1,0.022316,0.015158,0.016543
1,0.001921,0.319032,0.679047,0.752766,0.242251,0.004983,0.740332,0.256434,0.003235,0,0.012435,0.062598,0.674064
2,0.998783,0.000764,0.000453,0.000254,0.004494,0.995253,0.004677,0.060481,0.934843,2,0.994106,0.055987,0.060410
3,0.001001,0.997708,0.001291,0.005844,0.990736,0.003419,0.034056,0.956687,0.009257,1,0.028212,0.006972,0.005837
4,0.001080,0.301363,0.697557,0.278348,0.718575,0.003076,0.498761,0.499270,0.001969,0,0.220412,0.219305,0.694481
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0.998825,0.001033,0.000142,0.001264,0.028942,0.969794,0.006420,0.057240,0.936340,2,0.992405,0.028298,0.033454
9996,0.000704,0.009793,0.989503,0.780946,0.217053,0.002001,0.894637,0.104095,0.001267,0,0.113691,0.112958,0.987502
9997,0.999171,0.000493,0.000336,0.000054,0.000765,0.999181,0.000838,0.002670,0.996493,2,0.998334,0.001905,0.002688
9998,0.000267,0.002178,0.997556,0.983402,0.015884,0.000714,0.984347,0.015223,0.000430,0,0.000945,0.000661,0.996842


In [28]:
import numpy as np

def add_prediction_flags(df):
    # Assuming model prediction columns and a 'True_Label' column exist
    for model in ['Deberta', 'Roberta', 'Albert']:
        # Calculate the predicted class by finding the argmax across each set of prediction columns for the model
        entailment_col = f"{model}_Entailment"
        neutral_col = f"{model}_Neutral"
        contradiction_col = f"{model}_Contradiction"
        
        # Create a DataFrame slice of the relevant columns
        model_preds = df[[entailment_col, neutral_col, contradiction_col]]
        
        # Argmax will give us 0 for entailment, 1 for neutral, 2 for contradiction based on column order
        df[f"{model}_Predicted"] = np.argmax(model_preds.values, axis=1)
        
        # Generate binary flags (1 if prediction is correct, 0 if incorrect)
        df[f"{model}_Correct"] = (df[f"{model}_Predicted"] == df['True_Label']).astype(int)

# Apply the function to your combined DataFrame
add_prediction_flags(combined_snli_df)

# You can apply this function to other combined DataFrames similarly
add_prediction_flags(combined_mnli_matched_df)
add_prediction_flags(combined_mnli_mismatched_df)
add_prediction_flags(combined_anli_r1_df)
add_prediction_flags(combined_anli_r2_df)
add_prediction_flags(combined_anli_r3_df)


In [29]:
combined_snli_df

Unnamed: 0,Deberta_Entailment,Deberta_Neutral,Deberta_Contradiction,Roberta_Entailment,Roberta_Neutral,Roberta_Contradiction,Albert_Entailment,Albert_Neutral,Albert_Contradiction,True_Label,confidence_margin_entailment,confidence_margin_neutral,confidence_margin_contradiction,Deberta_Predicted,Deberta_Correct,Roberta_Predicted,Roberta_Correct,Albert_Predicted,Albert_Correct
0,0.034767,0.962592,0.002641,0.012451,0.927093,0.060457,0.008653,0.947434,0.043913,1,0.022316,0.015158,0.016543,1,1,1,1,1,1
1,0.001921,0.319032,0.679047,0.752766,0.242251,0.004983,0.740332,0.256434,0.003235,0,0.012435,0.062598,0.674064,2,0,0,1,0,1
2,0.998783,0.000764,0.000453,0.000254,0.004494,0.995253,0.004677,0.060481,0.934843,2,0.994106,0.055987,0.060410,0,0,2,1,2,1
3,0.001001,0.997708,0.001291,0.005844,0.990736,0.003419,0.034056,0.956687,0.009257,1,0.028212,0.006972,0.005837,1,1,1,1,1,1
4,0.001080,0.301363,0.697557,0.278348,0.718575,0.003076,0.498761,0.499270,0.001969,0,0.220412,0.219305,0.694481,2,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0.998825,0.001033,0.000142,0.001264,0.028942,0.969794,0.006420,0.057240,0.936340,2,0.992405,0.028298,0.033454,0,0,2,1,2,1
9996,0.000704,0.009793,0.989503,0.780946,0.217053,0.002001,0.894637,0.104095,0.001267,0,0.113691,0.112958,0.987502,2,0,0,1,0,1
9997,0.999171,0.000493,0.000336,0.000054,0.000765,0.999181,0.000838,0.002670,0.996493,2,0.998334,0.001905,0.002688,0,0,2,1,2,1
9998,0.000267,0.002178,0.997556,0.983402,0.015884,0.000714,0.984347,0.015223,0.000430,0,0.000945,0.000661,0.996842,2,0,0,1,0,1


In [30]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf

# Assuming 'combined_snli_df' is already loaded as described

# Features and Labels
X_snli = combined_snli_df.drop('True_Label', axis=1).values
y_snli = combined_snli_df['True_Label'].values

# Features and Labels
X_mnli_matched = combined_mnli_matched_df.drop('True_Label', axis=1).values
y_mnli_matched = combined_mnli_matched_df['True_Label'].values

# Features and Labels
X_mnli_mismatched = combined_mnli_mismatched_df.drop('True_Label', axis=1).values
y_mnli_mismatched = combined_mnli_mismatched_df['True_Label'].values

# Features and Labels
X_anli_r1 = combined_anli_r1_df.drop('True_Label', axis=1).values
y_anli_r1 = combined_anli_r1_df['True_Label'].values

# Features and Labels
X_anli_r2 = combined_anli_r2_df.drop('True_Label', axis=1).values
y_anli_r2 = combined_anli_r2_df['True_Label'].values

# Features and Labels
X_anli_r3 = combined_anli_r3_df.drop('True_Label', axis=1).values
y_anli_r3 = combined_anli_r3_df['True_Label'].values


2024-04-17 09:34:39.745838: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-17 09:34:39.745933: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-17 09:34:39.863963: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [31]:
from sklearn.preprocessing import StandardScaler

# Function to scale features
def scale_features(X):
    scaler = StandardScaler()
    return scaler.fit_transform(X)

# Apply feature scaling
X_snli_scaled = scale_features(X_snli)
X_mnli_matched_scaled = scale_features(X_mnli_matched)
X_mnli_mismatched_scaled = scale_features(X_mnli_mismatched)
X_anli_r1_scaled = scale_features(X_anli_r1)
X_anli_r2_scaled = scale_features(X_anli_r2)
X_anli_r3_scaled = scale_features(X_anli_r3)

# One-hot encode labels for each dataset using TensorFlow/Keras utility
y_encoded_snli = tf.keras.utils.to_categorical(y_snli)
y_encoded_mnli_matched = tf.keras.utils.to_categorical(y_mnli_matched)
y_encoded_mnli_mismatched = tf.keras.utils.to_categorical(y_mnli_mismatched)
y_encoded_anli_r1 = tf.keras.utils.to_categorical(y_anli_r1)
y_encoded_anli_r2 = tf.keras.utils.to_categorical(y_anli_r2)
y_encoded_anli_r3 = tf.keras.utils.to_categorical(y_anli_r3)


In [32]:
X_snli_scaled

array([[-0.65460956,  1.48536003, -0.76075099, ...,  0.29794821,
         0.04084633,  0.29713748],
       [-0.72705336, -0.01460591,  0.75051539, ...,  0.29794821,
        -1.19384399,  0.29713748],
       [ 1.47158549, -0.75640249, -0.76564053, ...,  0.29794821,
         1.27553666,  0.29713748],
       ...,
       [ 1.47244222, -0.75703522, -0.76590179, ...,  0.29794821,
         1.27553666,  0.29713748],
       [-0.73070171, -0.75310812,  1.46214563, ...,  0.29794821,
        -1.19384399,  0.29713748],
       [-0.72325989,  1.56141689, -0.76411608, ...,  0.29794821,
         0.04084633,  0.29713748]])

In [33]:
X_snli_rnn = X_snli_scaled.reshape(X_snli_scaled.shape[0], 1, X_snli_scaled.shape[1])
X_mnli_matched_rnn = X_mnli_matched_scaled.reshape(X_mnli_matched_scaled.shape[0], 1, X_mnli_matched_scaled.shape[1])
X_mnli_mismatched_rnn = X_mnli_mismatched_scaled.reshape(X_mnli_mismatched_scaled.shape[0], 1, X_mnli_mismatched_scaled.shape[1])
X_anli_r1_rnn = X_anli_r1_scaled.reshape(X_anli_r1_scaled.shape[0], 1, X_anli_r1_scaled.shape[1])
X_anli_r2_rnn = X_anli_r2_scaled.reshape(X_anli_r2_scaled.shape[0], 1, X_anli_r2_scaled.shape[1])
X_anli_r3_rnn = X_anli_r3_scaled.reshape(X_anli_r3_scaled.shape[0], 1, X_anli_r3_scaled.shape[1])


In [34]:
X_snli_rnn

array([[[-0.65460956,  1.48536003, -0.76075099, ...,  0.29794821,
          0.04084633,  0.29713748]],

       [[-0.72705336, -0.01460591,  0.75051539, ...,  0.29794821,
         -1.19384399,  0.29713748]],

       [[ 1.47158549, -0.75640249, -0.76564053, ...,  0.29794821,
          1.27553666,  0.29713748]],

       ...,

       [[ 1.47244222, -0.75703522, -0.76590179, ...,  0.29794821,
          1.27553666,  0.29713748]],

       [[-0.73070171, -0.75310812,  1.46214563, ...,  0.29794821,
         -1.19384399,  0.29713748]],

       [[-0.72325989,  1.56141689, -0.76411608, ...,  0.29794821,
          0.04084633,  0.29713748]]])

In [35]:
X_snli_rnn.shape

(9824, 1, 18)

In [36]:
import numpy as np
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

def create_rnn_model(input_shape, num_classes):
    model = Sequential([
        Input(shape=input_shape),
        SimpleRNN(128, activation='relu', return_sequences=True),  # RNN layer
        Dropout(0.5),
        SimpleRNN(64, activation='relu'),  # Second RNN layer
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def train_and_evaluate_kfold(X, y, name, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    all_scores = []

    for train_index, val_index in kf.split(X):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        model = create_rnn_model(X_train.shape[1:], y_train.shape[1])  # Adjust input shape for RNN

        early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

        print(f"Training fold for {name}...")
        history = model.fit(X_train, y_train, epochs=10, batch_size=32,
                            validation_data=(X_val, y_val), callbacks=[early_stopping])

        val_loss, val_accuracy = model.evaluate(X_val, y_val)
        all_scores.append(val_accuracy)
        print(f"Validation accuracy: {val_accuracy * 100:.2f}%")

    average_accuracy = np.mean(all_scores)
    print(f"Average Validation Accuracy for {name}: {average_accuracy * 100:.2f}%")
    return average_accuracy

# Example usage:
# X_snli_rnn and y_encoded_snli should be prepared before calling this function
average_accuracy_snli = train_and_evaluate_kfold(X_snli_rnn, y_encoded_snli, 'SNLI RNN Model')


Training fold for SNLI RNN Model...
Epoch 1/10
[1m 93/246[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m0s[0m 2ms/step - accuracy: 0.7618 - loss: 0.6011

I0000 00:00:1713346496.127855      80 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 37ms/step - accuracy: 0.8471 - loss: 0.4043 - val_accuracy: 0.9735 - val_loss: 0.0825
Epoch 2/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9572 - loss: 0.1085 - val_accuracy: 0.9771 - val_loss: 0.0579
Epoch 3/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9681 - loss: 0.0747 - val_accuracy: 0.9842 - val_loss: 0.0454
Epoch 4/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9725 - loss: 0.0656 - val_accuracy: 0.9827 - val_loss: 0.0424
Epoch 5/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9804 - loss: 0.0469 - val_accuracy: 0.9863 - val_loss: 0.0343
Epoch 6/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9810 - loss: 0.0483 - val_accuracy: 0.9863 - val_loss: 0.0307
Epoch 7/10
[1m246/246[0m [32m━━━━━

In [37]:
print("Training and evaluating on MNLI Matched dataset:")
average_accuracy_mnli_matched = train_and_evaluate_kfold(X_mnli_matched_rnn, y_encoded_mnli_matched, 'MNLI Matched RNN Model')


Training and evaluating on MNLI Matched dataset:
Training fold for MNLI Matched RNN Model...
Epoch 1/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 26ms/step - accuracy: 0.8607 - loss: 0.3961 - val_accuracy: 0.9674 - val_loss: 0.0911
Epoch 2/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9512 - loss: 0.1216 - val_accuracy: 0.9812 - val_loss: 0.0596
Epoch 3/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9660 - loss: 0.0832 - val_accuracy: 0.9878 - val_loss: 0.0408
Epoch 4/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9768 - loss: 0.0613 - val_accuracy: 0.9934 - val_loss: 0.0278
Epoch 5/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9837 - loss: 0.0448 - val_accuracy: 0.9954 - val_loss: 0.0216
Epoch 6/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accurac

In [38]:
print("Training and evaluating on MNLI Mismatched dataset:")
average_accuracy_mnli_mismatched = train_and_evaluate_kfold(X_mnli_mismatched_rnn, y_encoded_mnli_mismatched, 'MNLI Mismatched RNN Model')

Training and evaluating on MNLI Mismatched dataset:
Training fold for MNLI Mismatched RNN Model...
Epoch 1/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 51ms/step - accuracy: 0.8547 - loss: 0.3856 - val_accuracy: 0.9573 - val_loss: 0.0965
Epoch 2/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9559 - loss: 0.1116 - val_accuracy: 0.9756 - val_loss: 0.0676
Epoch 3/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9673 - loss: 0.0760 - val_accuracy: 0.9827 - val_loss: 0.0495
Epoch 4/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9769 - loss: 0.0648 - val_accuracy: 0.9929 - val_loss: 0.0390
Epoch 5/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9798 - loss: 0.0510 - val_accuracy: 0.9832 - val_loss: 0.0381
Epoch 6/10
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - a

In [39]:
print("Training and evaluating on ANLI Round 1 dataset:")
average_accuracy_anli_r1 = train_and_evaluate_kfold(X_anli_r1_rnn, y_encoded_anli_r1, 'ANLI Round 1 RNN Model')


Training and evaluating on ANLI Round 1 dataset:
Training fold for ANLI Round 1 RNN Model...
Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 43ms/step - accuracy: 0.5193 - loss: 0.9861 - val_accuracy: 0.8000 - val_loss: 0.6735
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7298 - loss: 0.7048 - val_accuracy: 0.8200 - val_loss: 0.5357
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8102 - loss: 0.5463 - val_accuracy: 0.8400 - val_loss: 0.4467
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8339 - loss: 0.4551 - val_accuracy: 0.8550 - val_loss: 0.3748
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8699 - loss: 0.4157 - val_accuracy: 0.8550 - val_loss: 0.3287
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8815 - l

In [40]:
print("Training and evaluating on ANLI Round 2 dataset:")
average_accuracy_anli_r2 = train_and_evaluate_kfold(X_anli_r2_rnn, y_encoded_anli_r2, 'ANLI Round 2 RNN Model')


Training and evaluating on ANLI Round 2 dataset:
Training fold for ANLI Round 2 RNN Model...
Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.4652 - loss: 1.0992 - val_accuracy: 0.7750 - val_loss: 0.6413
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6469 - loss: 0.8068 - val_accuracy: 0.8400 - val_loss: 0.4997
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7427 - loss: 0.6330 - val_accuracy: 0.8650 - val_loss: 0.4057
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7861 - loss: 0.5336 - val_accuracy: 0.8800 - val_loss: 0.3412
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7960 - loss: 0.5394 - val_accuracy: 0.9000 - val_loss: 0.2960
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8452 - l

In [41]:
print("Training and evaluating on ANLI Round 3 dataset:")
average_accuracy_anli_r3 = train_and_evaluate_kfold(X_anli_r3_rnn, y_encoded_anli_r3, 'ANLI Round 3 RNN Model')


Training and evaluating on ANLI Round 3 dataset:
Training fold for ANLI Round 3 RNN Model...
Epoch 1/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.3737 - loss: 1.1147 - val_accuracy: 0.8083 - val_loss: 0.6255
Epoch 2/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7248 - loss: 0.7573 - val_accuracy: 0.8500 - val_loss: 0.4577
Epoch 3/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7716 - loss: 0.5962 - val_accuracy: 0.8750 - val_loss: 0.3610
Epoch 4/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8016 - loss: 0.5376 - val_accuracy: 0.8875 - val_loss: 0.3033
Epoch 5/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8193 - loss: 0.4693 - val_accuracy: 0.9208 - val_loss: 0.2637
Epoch 6/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8696 - l

In [50]:
# Determining the maximum number of features
max_features = max(X_snli_scaled.shape[1], X_mnli_matched_scaled.shape[1], X_mnli_mismatched_scaled.shape[1],
                   X_anli_r1_scaled.shape[1], X_anli_r2_scaled.shape[1], X_anli_r3_scaled.shape[1])

# Function to pad arrays to the maximum feature size
def pad_features(X, max_features):
    padding = max_features - X.shape[1]
    return np.pad(X, ((0, 0), (0, padding)), 'constant')

# Pad each dataset
X_snli_rnn = pad_features(X_snli_scaled, max_features).reshape(-1, 1, max_features)
X_mnli_matched_rnn = pad_features(X_mnli_matched_scaled, max_features).reshape(-1, 1, max_features)
X_mnli_mismatched_rnn = pad_features(X_mnli_mismatched_scaled, max_features).reshape(-1, 1, max_features)
X_anli_r1_rnn = pad_features(X_anli_r1_scaled, max_features).reshape(-1, 1, max_features)
X_anli_r2_rnn = pad_features(X_anli_r2_scaled, max_features).reshape(-1, 1, max_features)
X_anli_r3_rnn = pad_features(X_anli_r3_scaled, max_features).reshape(-1, 1, max_features)

# Concatenate all reshaped and padded arrays
X_combined_rnn = np.concatenate((X_snli_rnn, X_mnli_matched_rnn, X_mnli_mismatched_rnn, X_anli_r1_rnn, X_anli_r2_rnn, X_anli_r3_rnn), axis=0)
y_combined = np.concatenate((y_encoded_snli, y_encoded_mnli_matched, y_encoded_mnli_mismatched, y_encoded_anli_r1, y_encoded_anli_r2, y_encoded_anli_r3), axis=0)

print("Shape of combined feature set:", X_combined_rnn.shape)
print("Shape of combined label set:", y_combined.shape)


Shape of combined feature set: (32671, 1, 18)
Shape of combined label set: (32671, 3)


In [52]:

def train_and_evaluate_kfold_all(X, y, name, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    all_scores = []

    for train_index, val_index in kf.split(X):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        model = create_rnn_model(X_train.shape[1:], y_train.shape[1])

        early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

        print(f"Training fold for {name}...")
        model.fit(X_train, y_train, epochs=10, batch_size=32,
                  validation_data=(X_val, y_val), callbacks=[early_stopping])

        val_loss, val_accuracy = model.evaluate(X_val, y_val)
        all_scores.append(val_accuracy)
        print(f"Validation accuracy: {val_accuracy * 100:.2f}%")

    average_accuracy = np.mean(all_scores)
    print(f"Average Validation Accuracy for {name}: {average_accuracy * 100:.2f}%")
    return model, average_accuracy



# Example usage:
model,average_combined_accuracy = train_and_evaluate_kfold_all(X_combined_rnn, y_combined, 'All Tasks RNN Model')

Training fold for All Tasks RNN Model...
Epoch 1/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 14ms/step - accuracy: 0.8642 - loss: 0.3456 - val_accuracy: 0.9685 - val_loss: 0.0872
Epoch 2/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9587 - loss: 0.1039 - val_accuracy: 0.9786 - val_loss: 0.0547
Epoch 3/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9707 - loss: 0.0703 - val_accuracy: 0.9830 - val_loss: 0.0414
Epoch 4/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9787 - loss: 0.0523 - val_accuracy: 0.9849 - val_loss: 0.0338
Epoch 5/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9812 - loss: 0.0443 - val_accuracy: 0.9876 - val_loss: 0.0290
Epoch 6/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9844 - loss: 0.0379 - val_accuracy: 0.9891 - va

In [55]:
# Save the model after training
model.save('RNN_features_ensemble.h5')
print("Model saved to 'RNN_features_ensemble'.")

Model saved to 'RNN_features_ensemble'.
