In [1]:
from flask import Flask, render_template, request, jsonify
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.cluster import AffinityPropagation
from sklearn.metrics.pairwise import pairwise_distances_argmin_min
from sklearn.preprocessing import StandardScaler
import pickle

In [2]:
app = Flask(__name__)

In [3]:
def canopy_feature_selection(X, threshold):
    canopy_centers = []
    canopy_points = []
    for i, point in enumerate(X):
        i
        f len(canopy_centers) == 0:
            canopy_centers.append(point)
        else:
            distances = [((c - point) ** 2).sum() for c in canopy_centers]
            min_distance = min(distances)
            closest_center = distances.index(min_distance)
            if min_distance < threshold:
                canopy_points[closest_center].append(point)
            else:
                canopy_centers.append(point)
                canopy_points.append([point])
    return canopy_centers

In [4]:
def make_prediction(url, dt_model, rf_model, xgb_model):
    prediction = "Phishing"
    return prediction

In [5]:
dataset = pd.read_csv('phishing.csv')
dataset.dropna(inplace=True)
X = dataset.drop(columns=['class'])
y = dataset['class']

In [6]:
X_canopy = canopy_feature_selection(X.values, threshold=0.5)
X_canopy_df = pd.DataFrame(X_canopy, columns=X.columns)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_canopy_df, y, test_size=0.3, random_state=42)

In [8]:
dt_model = DecisionTreeClassifier()
rf_model = RandomForestClassifier()
xgb_model = XGBClassifier()

In [9]:
voting_model = VotingClassifier([('dt', dt_model), ('rf', rf_model), ('xgb', xgb_model)], voting='hard')

In [10]:
param_grid = {
    'dt__max_depth': [5, 10, 15],
    'rf__n_estimators': [50, 100, 200],
    'xgb__n_estimators': [50, 100, 200]
}

In [11]:
grid_search = GridSearchCV(voting_model, param_grid, cv=5)
grid_search.fit(X_train, y_train)

In [12]:

y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [18]:
accuracy

0.964124208622249

In [19]:
precision

0.9677938808373591

In [20]:
recall

0.9683136412459721

In [21]:
f1

0.9680536912751678

In [22]:

with open('dt_model.pkl', 'wb') as f:
    pickle.dump(dt_model, f)

In [23]:
with open('rf_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

In [24]:
with open('xgb_model.pkl', 'wb') as f:
    pickle.dump(xgb_model, f)

In [27]:
@app.route('/')
def home():
    return render_template('index.html')

AssertionError: View function mapping is overwriting an existing endpoint function: home

In [None]:
@app.route('/predict', methods=['POST'])
def predict():
    url = request.json['url']
    with open('dt_model.pkl', 'rb') as f:
        dt_model = pickle.load(f)

    with open('rf_model.pkl', 'rb') as f:
        rf_model = pickle.load(f)

    with open('xgb_model.pkl', 'rb') as f:
        xgb_model = pickle.load(f)

    prediction = make_prediction(url, dt_model, rf_model, xgb_model)
    return jsonify({'prediction': prediction})

In [None]:
if __name__ == '__main__':
    app.run(debug=True)