In [2]:
pip install scikit-learn pandas tensorflow


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
import os
import pandas as pd
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# 📁 Paths
csv_path = r"C:\Users\sagni\Downloads\Email Phising Detector\Phishing_Email.csv"
save_dir = r"C:\Users\sagni\Downloads\Email Phising Detector"
os.makedirs(save_dir, exist_ok=True)

# 📥 Load Dataset
df = pd.read_csv(csv_path)

# 🎯 Features and Labels
X_raw = df["Email Text"].astype(str)
y_raw = df["Email Type"]

# 🧹 Label Encoding
le = LabelEncoder()
y_encoded = le.fit_transform(y_raw)
y_categorical = to_categorical(y_encoded)

# 💾 Save Label Encoder
with open(os.path.join(save_dir, "label_encoder.pkl"), "wb") as f:
    pickle.dump(le, f)

# 🔤 TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=5000)
X_vectorized = vectorizer.fit_transform(X_raw).toarray()

# 💾 Save Vectorizer
with open(os.path.join(save_dir, "tfidf_vectorizer.pkl"), "wb") as f:
    pickle.dump(vectorizer, f)

# 🔀 Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y_categorical, test_size=0.2, random_state=42)

# 🧠 Build Model
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(y_categorical.shape[1], activation='softmax'))

# ⚙️ Compile Model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 🏋️ Train Model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# 💾 Save model in multiple formats
model.save(os.path.join(save_dir, "email_model.keras"))             # ✅ Modern format
model.save(os.path.join(save_dir, "email_model_legacy.h5"))        # ✅ Legacy format

# 💾 Save model architecture (JSON)
model_json = model.to_json()
with open(os.path.join(save_dir, "email_model_architecture.json"), "w") as json_file:
    json_file.write(model_json)

# 💾 Save model weights (must end with `.weights.h5`)
model.save_weights(os.path.join(save_dir, "email_model.weights.h5"))

print("✅ All files saved in:", save_dir)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8699 - loss: 0.2689 - val_accuracy: 0.9692 - val_loss: 0.0795
Epoch 2/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9825 - loss: 0.0404 - val_accuracy: 0.9651 - val_loss: 0.0849
Epoch 3/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9871 - loss: 0.0262 - val_accuracy: 0.9658 - val_loss: 0.0920
Epoch 4/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9887 - loss: 0.0211 - val_accuracy: 0.9578 - val_loss: 0.0983
Epoch 5/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9855 - loss: 0.0231 - val_accuracy: 0.9662 - val_loss: 0.1021
Epoch 6/10
[1m373/373[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9857 - loss: 0.0234 - val_accuracy: 0.9688 - val_loss: 0.1041
Epoch 7/10
[1m373/373[0m 



✅ All files saved in: C:\Users\sagni\Downloads\Email Phising Detector
