In [None]:
# training/train_phishing_model.ipynb

import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load dataset
# You can use datasets like "phishtank" or "openphish"
# For example:
# data = pd.read_csv('phishing_dataset.csv')

# For demonstration, let's create a dummy dataset

data = pd.DataFrame({
    'url': [
        'http://example.com',
        'http://malicious.com',
        'http://safe-site.org',
        'http://bad-site.net'
        # Add more URLs and labels as needed for real world website
        # You can also use more advanced techniques like word embeddings or machine learning models
        'htts://ecajmer.ac.in',
        'https://www.google.com',
        'https://www.facebook.com',
        'https://www.amazon.com',
        'https://www.instagram.com',
        'https://www.twitter.com',
        'https://www.youtube.com',
        'https://www.linkedin.com',
        'https://www.reddit.com',
        'https://www.pinterest.com',
        'https://www.snapchat.com',
        'https://www.tiktok.com',
        'https://www.whatsapp.com',
        'https://www.snapchat.com',
        'https://www.tiktok.com',
        'https://www.whatsapp.com',
        'https://www.snapchat.com',
        'https://www.tiktok.com',
        'https://www.whatsapp.com',
        'https://www.snapchat.com',
        'https://www.tiktok.com',
        'https://www.whatsapp.com',
        'https://www.snapchat.com',
        'https://grok.com'
        'https://github.com'         
        'https://stackoverflow.com'        
        
        
    ],
    'is_phishing': [0, 1, 0, 1]
})

# Feature extraction
# Implement feature extraction logic here
# For example, URL length, presence of HTTPS, etc.
data['url_length'] = data['url'].apply(len)
data['has_https'] = data['url'].apply(lambda x: 'https' in x)


# Prepare features and labels
X = data[['url_length', 'has_https']].values
y = data['is_phishing'].values

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build model
model = Sequential()
model.add(Dense(64, input_dim=2, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data=(X_test, y_test))

# Save model
model.save('phishing_model.h5')