# AI-Powered Phishing Email Detection 📧🔒
This notebook demonstrates an end-to-end machine learning and deep learning pipeline to detect phishing emails.

In [2]:
# 🛠️ Step 1: Install Required Libraries (if not already installed)
# !pip install pandas numpy scikit-learn nltk matplotlib seaborn keras tensorflow transformers

# 📦 Step 2: Import Libraries
import pandas as pd
import numpy as np
import re
import nltk
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from transformers import BertTokenizer, TFBertForSequenceClassification
from tensorflow.keras.optimizers import Adam

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('wordnet')

ModuleNotFoundError: No module named 'pandas'

In [None]:
# 2. Load Dataset (placeholder)
# Replace with actual path or data loading logic
df = pd.read_csv('phishing_dataset.csv')
df.head()

In [None]:
# 3. Preprocess Emails
def clean_email(text):
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    text = re.sub(r'\S+@\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    tokens = [t for t in tokens if t not in stopwords.words('english')]
    lemmatizer = WordNetLemmatizer()
    return ' '.join([lemmatizer.lemmatize(t) for t in tokens])

df['cleaned_body'] = df['body'].apply(clean_email)

In [None]:
# 4. TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['cleaned_body'])
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
# 5. Train ML Model - Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)
preds = model.predict(X_test)
print(classification_report(y_test, preds))

In [None]:
# 6. Placeholder for LSTM or BERT Model (Deep Learning)
# Add DL code here using Keras or Huggingface Transformers