In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from sklearn.model_selection import train_test_split
import pathlib
from sklearn.utils import shuffle

In [None]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        accuracy = 0.95
        if(logs.get('accuracy') >= accuracy):
            print(f"\nModel accuracy {accuracy*100}%")
            self.model.stop_training = True
callbacks = myCallback()

In [None]:
dataframe = pd.read_csv("../input/web-page-phishing-detection/phishing_data.csv").drop(columns="url")
dataframe = shuffle(dataframe)
dataframe.loc[dataframe["status"]=="legitimate", "status"] = 0
dataframe.loc[dataframe["status"]=="phishing", "status"] = 1
dataframe["status"] = dataframe["status"].astype(float)
dataframe = dataframe.replace({"zero": 0, "one": 1,"Zero": 0, "One": 1})

In [None]:
allColumns = list(dataframe.columns)
allColumns.remove("status")

In [None]:
numericColumns = []
categoricalColumns = []

for column in allColumns:
    if set(dataframe[column]) == {0, 1}:
        dataframe.loc[dataframe[column]==0, column] = "No"
        dataframe.loc[dataframe[column]==1, column] = "Yes"
        categoricalColumns.append(column)
    elif len(set(dataframe[column])) > 1:
        dataframe[column] = dataframe[column].astype(float)
        numericColumns.append(column)

In [None]:
train, val = train_test_split(dataframe, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'val examples')

In [None]:
minMaxScalar = {}

for column in numericColumns:
    high = train[column].max()
    low = train[column].min()
    minMaxScalar[column] = {"low": low, "high": high}
    train[column] = (train[column] - low) / (high - low)
    val[column] = (val[column] - low) / (high - low)

In [None]:
def df_to_dataset(df, shuffle=True, batch_size=256):
    df = df.copy()
    labels = df.pop('status')
    ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(df))
    ds = ds.batch(batch_size)
    return ds

In [None]:
feature_columns = []

for column in numericColumns:
    feature_columns.append(feature_column.numeric_column(column))

for column in categoricalColumns:
    categorical_column = feature_column.categorical_column_with_vocabulary_list(column, dataframe[column].unique())
    indicator_column = feature_column.indicator_column(categorical_column)
    feature_columns.append(indicator_column)

In [None]:
train_ds = df_to_dataset(train)
val_ds = df_to_dataset(val, shuffle=False)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.DenseFeatures(feature_columns),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy'])
history = model.fit(train_ds, validation_data=val_ds, epochs=50, callbacks=[callbacks])

In [None]:
accuracy=history.history['accuracy']
val_accuracy=history.history['val_accuracy']

epocRanges=range(len(accuracy))
plt.plot(epocRanges, accuracy)
plt.plot(epocRanges, val_accuracy)
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(["accuracy","val_accuracy"])
plt.figure()