# Email Template Generation Project

### Overview:
This notebook encompasses all stages of the email template generation project, from data preprocessing to model training and email generation.


## 1. Data Preprocessing

In [1]:
# Import necessary libraries
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

ModuleNotFoundError: No module named 'nltk'

In [None]:
# Download stopwords (ensure that you have nltk installed and the stopwords dataset)
import nltk
nltk.download('stopwords')

In [None]:
# Load raw email data
# Example assumes 'data/raw/emails.csv' exists in the directory
raw_data = pd.read_csv('data/raw/emails.csv')

In [None]:
# Example: Clean and tokenize text (expand as necessary)
stop_words = set(stopwords.words('english'))

In [None]:
def preprocess(text):
    tokens = word_tokenize(text.lower())  # Convert to lowercase and tokenize
    tokens = [word for word in tokens if word.isalnum() and word not in stop_words]  # Remove non-alphanumeric tokens and stopwords
    return tokens

In [None]:
# Apply preprocessing to each email
raw_data['clean_text'] = raw_data['text'].apply(preprocess)

In [None]:
# Display the first few rows of the cleaned data
raw_data.head()

## 2. Exploratory Data Analysis (EDA)

In [None]:
# Import visualization libraries
import matplotlib.pyplot as plt
from wordcloud import WordCloud

In [None]:
# Join the clean text into a single string for analysis
text = ' '.join([' '.join(tokens) for tokens in raw_data['clean_text']])

In [None]:
# Generate a word cloud
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text)

In [None]:
# Display the word cloud
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
# Plot distribution of email lengths
email_lengths = raw_data['clean_text'].apply(len)

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(email_lengths, bins=20, color='blue', edgecolor='black')
plt.title('Distribution of Email Lengths (in tokens)')
plt.xlabel('Number of Tokens')
plt.ylabel('Frequency')
plt.show()

## 3. Model Training

In [None]:
# Import necessary machine learning libraries
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [None]:
# Load pre-trained GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

In [None]:
# Prepare example data (you would use your dataset for training)
sample_email = "Hello, I wanted to follow up on our meeting."
inputs = tokenizer(sample_email, return_tensors="pt")

In [None]:
# Generate a response using the pre-trained model
outputs = model.generate(inputs['input_ids'], max_length=50)

In [None]:
# Decode and print the generated text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Generated Email:", generated_text)

## 4. Model Evaluation

In [None]:
# Example of evaluating model accuracy
# Since this is a generative task, evaluation could involve metrics like BLEU or ROUGE scores for text generation.

# Import BLEU score for evaluation (if using NLTK)
from nltk.translate.bleu_score import sentence_bleu

In [None]:
# Reference sentences and a generated sentence example
reference = [['hello', 'this', 'is', 'a', 'test', 'email']]
generated = ['hello', 'this', 'is', 'an', 'example']

In [None]:
# Calculate BLEU score
bleu_score = sentence_bleu(reference, generated)
print(f"BLEU Score: {bleu_score}")

# You can expand this to compare model outputs to a reference dataset and calculate overall performance.

## 5. Email Template Generation

In [None]:
# Use the trained model to generate email templates based on input

# Example: Generate an email template based on the context
context = "I hope this email finds you well. I'm writing to follow up on our previous discussion regarding the project."

In [None]:
# Tokenize input context
input_ids = tokenizer(context, return_tensors="pt")

In [None]:
# Generate email continuation based on the context
generated_email = model.generate(input_ids['input_ids'], max_length=100)

In [None]:
# Decode and print the generated email
generated_email_text = tokenizer.decode(generated_email[0], skip_special_tokens=True)
print("Generated Email Template:", generated_email_text)

## 6. Outlook Integration

In [None]:
# Example code for integrating with Outlook using pywin32 (Windows only)

import win32com.client as win32

In [None]:
# Connect to the Outlook application
outlook = win32.Dispatch('outlook.application')

In [None]:
# Create a new email item
mail = outlook.CreateItem(0)  # 0 indicates mail item

In [None]:
# Set email parameters
mail.Subject = "Test Email from Python"
mail.Body = "This is a test email generated by Python using pywin32."
mail.To = "recipient@example.com"

In [None]:
# Send the email
mail.Send()

print("Email sent successfully!")