In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib

# === Step 1: Load Dataset ===
data_path = r"C:\Users\sagni\Downloads\Email Phising Detector\Phishing_Email.csv"
df = pd.read_csv(data_path)

# === Step 2: Preprocessing ===
# Drop Unnamed column if present
if 'Unnamed: 0' in df.columns:
    df = df.drop(columns=['Unnamed: 0'])

# Fill missing values if any
df['Email Text'] = df['Email Text'].fillna("")

# Encode labels: 'Phishing Email' → 1, 'Safe Email' → 0
label_encoder = LabelEncoder()
df['Label'] = label_encoder.fit_transform(df['Email Type'])

# Save label encoder
joblib.dump(label_encoder, "label_encoder.pkl")

# === Step 3: Vectorization ===
vectorizer = TfidfVectorizer(stop_words='english', max_features=10000)
X = vectorizer.fit_transform(df['Email Text']).toarray()

# Save vectorizer
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

y = df['Label'].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# === Step 4: Build Neural Network Model ===
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# === Step 5: Train Model ===
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model.fit(X_train, y_train, epochs=10, batch_size=32,
          validation_split=0.2, callbacks=[early_stop])

# === Step 6: Evaluate ===
y_pred = model.predict(X_test).flatten()
y_pred_label = np.where(y_pred > 0.5, 1, 0)

print("\n=== Classification Report ===")
print(classification_report(y_test, y_pred_label, target_names=label_encoder.classes_))
print(f"Accuracy: {accuracy_score(y_test, y_pred_label):.4f}")

# === Step 7: Save Model ===
model.save("email_phishing_model.h5")
print("\n✅ Model and vectorizer saved successfully.")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.8469 - loss: 0.3169 - val_accuracy: 0.9678 - val_loss: 0.0789
Epoch 2/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9847 - loss: 0.0395 - val_accuracy: 0.9698 - val_loss: 0.0753
Epoch 3/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9853 - loss: 0.0267 - val_accuracy: 0.9718 - val_loss: 0.0767
Epoch 4/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9864 - loss: 0.0224 - val_accuracy: 0.9718 - val_loss: 0.0799
Epoch 5/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9869 - loss: 0.0242 - val_accuracy: 0.9718 - val_loss: 0.0822
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

=== Classification Report ===




                precision    recall  f1-score   support

Phishing Email       0.96      0.97      0.97      1457
    Safe Email       0.98      0.97      0.98      2273

      accuracy                           0.97      3730
     macro avg       0.97      0.97      0.97      3730
  weighted avg       0.97      0.97      0.97      3730

Accuracy: 0.9737

✅ Model and vectorizer saved successfully.


In [2]:
from tensorflow.keras.models import load_model
import joblib

# Load model and vectorizer
model = load_model("email_phishing_model.h5")
vectorizer = joblib.load("tfidf_vectorizer.pkl")
label_encoder = joblib.load("label_encoder.pkl")

# Sample input
email_text = "Dear user, your account has been compromised. Click here to reset."

# Vectorize and predict
x_input = vectorizer.transform([email_text]).toarray()
pred = model.predict(x_input)[0][0]
label = label_encoder.inverse_transform([int(pred > 0.5)])

print(f"Prediction: {label[0]}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
Prediction: Phishing Email
