<a href="https://colab.research.google.com/github/suaiba04/phishing-detection/blob/main/phishing_mail.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ===============================
# Phishing Email Detection - Colab Notebook
# ===============================

# Cell 1: Install Required Libraries
!pip install pandas scikit-learn




In [3]:
# Cell 2: Upload Dataset
from google.colab import files
import pandas as pd

uploaded = files.upload()  # Upload phishing_data.csv

# Read the dataset
data = pd.read_csv(next(iter(uploaded.keys())))  # Automatically uses the uploaded file
data.head()

Saving phishing_data.csv to phishing_data.csv


Unnamed: 0,EmailText,Label
0,"Your account has been compromised, click the l...",1
1,Meeting scheduled at 3 PM today.,0
2,Update your password immediately to avoid susp...,1
3,Project report attached for your review.,0
4,Congratulations! You won a gift card. Claim now.,1


In [4]:
# Cell 3: Preprocessing & Train-Test Split
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

X = data['EmailText']
y = data['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_features=500)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)



In [5]:
# Cell 4: Train Random Forest Model
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_vec, y_train)

y_pred = model.predict(X_test_vec)
print("Test Accuracy:", accuracy_score(y_test, y_pred))



Test Accuracy: 1.0


In [6]:
# Cell 5: Save Model & Vectorizer
import pickle

with open('phishing_model.pkl', 'wb') as f:
    pickle.dump({'model': model, 'vectorizer': vectorizer}, f)

print("Model saved as phishing_model.pkl")

# Optional: Download model to local machine
from google.colab import files
files.download('phishing_model.pkl')



Model saved as phishing_model.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [10]:
# Cell 6: Test New Emails
# You can modify these emails for testing
new_emails = [
    "Your account will be suspended unless you verify it.",
    "Meeting scheduled at 3 AM tomorrow."
]



In [11]:
# Load saved model
with open('phishing_model.pkl', 'rb') as f:
    saved = pickle.load(f)

model = saved['model']
vectorizer = saved['vectorizer']



In [12]:
# Transform and predict
new_vec = vectorizer.transform(new_emails)
predictions = model.predict(new_vec)

for email, pred in zip(new_emails, predictions):
    if pred == 1:
        print(f"ðŸš¨ Phishing Email: {email}")
    else:
        print(f"âœ… Legitimate Email: {email}")


ðŸš¨ Phishing Email: Your account will be suspended unless you verify it.
âœ… Legitimate Email: Meeting scheduled at 3 AM tomorrow.
