# Training and Analysis of ELECTRA-small on SNLI Dataset

In [None]:
!git clone https://github.com/gregdurrett/fp-dataset-artifacts
%cd fp-dataset-artifacts

In [None]:
# Ensure the required libraries are installed
!pip install transformers datasets evaluate --quiet

## Training the Model

Run the following command to train the model on the SNLI dataset.

In [None]:
!python3 run.py --do_train --task nli --dataset snli --output_dir ./trained_model/

## Evaluating the Model

Evaluate the trained model on the SNLI validation set.

In [None]:
!python3 run.py --do_eval --task nli --dataset snli --model ./trained_model/ --output_dir ./eval_output/

## Data Analysis

Analyze the SNLI dataset to understand its structure and characteristics.

In [None]:
from datasets import load_dataset

# Load SNLI dataset
snli = load_dataset("snli")

# Analyze dataset structure
print(snli)

# Check label distribution
label_dist = snli['train'].features['label'].names
print("Labels: ", label_dist)

# Sample data points
print(snli['train'][0:5])

## Visualizing Results

Visualize the evaluation metrics for better insights.

In [None]:
import json
import matplotlib.pyplot as plt

# Load evaluation results
with open('./eval_output/eval_metrics.json', 'r') as f:
    metrics = json.load(f)

# Visualize accuracy
accuracy = metrics.get('accuracy', 0)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Optionally plot if multiple metrics exist
metrics_keys = list(metrics.keys())
values = list(metrics.values())
plt.bar(metrics_keys, values)
plt.title('Evaluation Metrics')
plt.show()