# 03 - Model Training

### Description:
This notebook loads the preprocessed email data and trains a GPT-2 model to generate email text. The trained model is saved for future use.

In [None]:
# Import necessary libraries
import pandas as pd
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [None]:
# Load preprocessed data
cleaned_data = pd.read_csv('data/processed/cleaned_data.csv')

In [None]:
# Load GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

In [None]:
# Example input (you would use cleaned data for actual training)
sample_text = " ".join(cleaned_data['clean_text'].iloc[0])
inputs = tokenizer(sample_text, return_tensors="pt")

In [None]:
# Train the model (expand this section to include full training loop)
outputs = model.generate(inputs['input_ids'], max_length=100)

In [None]:
# Save the trained model
model.save_pretrained('models/trained_gpt2_model')
tokenizer.save_pretrained('models/trained_gpt2_tokenizer')

In [None]:
print("Model and tokenizer saved in 'models/' directory.")