In [None]:
# Install required libraries
!pip install torch tensorflow transformers datasets scikit-learn

In [None]:
# Check GPU availability
import torch
print("GPU Available:", torch.cuda.is_available())

In [None]:
# Load and preprocess the IMDB dataset
from datasets import load_dataset
from transformers import AutoTokenizer

dataset = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def preprocess_function(examples):
    return tokenizer(examples['text'], truncation=True, padding=True)

tokenized_dataset = dataset.map(preprocess_function, batched=True)

In [None]:
# Load Pretrained DistilBERT Model
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

In [None]:
# Define Training Arguments and Trainer
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=200,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
)

In [None]:
# Fine-Tune the Model
trainer.train()

In [None]:
# Save the Fine-Tuned Model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

In [None]:
# Evaluate the Model
results = trainer.evaluate()
print("Evaluation Results:", results)

In [None]:
# Detailed Metrics Evaluation
from sklearn.metrics import classification_report
import numpy as np

predictions = trainer.predict(tokenized_dataset["test"])
y_pred = np.argmax(predictions.predictions, axis=1)
y_true = tokenized_dataset["test"]["label"]

print(classification_report(y_true, y_pred))

In [None]:
# Reflection & Suggestions for Improvement
if results['eval_accuracy'] < 0.90:
    print("\nSuggested Improvements:")
    print("- Increase the number of training epochs.")
    print("- Experiment with different learning rates.")
    print("- Use data augmentation techniques to improve generalization.")
    print("- Fine-tune on a larger dataset for better accuracy.")