In [5]:
import re
from urllib.parse import urlparse
from tensorflow import keras

# Feature extraction functions
def fd_length(url):
    urlpath = urlparse(url).path
    try:
        return len(urlpath.split('/')[1])
    except:
        return 0

def digit_count(url):
    return sum(1 for char in url if char.isnumeric())

def letter_count(url):
    return sum(1 for char in url if char.isalpha())

def no_of_dir(url):
    urldir = urlparse(url).path
    return urldir.count('/')

def having_ip_address(url):
    match = re.search(
        # IPv4 in hexadecimal
        '(([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.'
        '([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\/)|'  # IPv4
        '((0x[0-9a-fA-F]{1,2})\\.(0x[0-9a-fA-F]{1,2})\\.(0x[0-9a-fA-F]{1,2})\\.(0x[0-9a-fA-F]{1,2})\\/)'
        '(?:[a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4}', url)  # Ipv6
    if match:
        return -1
    else:
        return 1

def hostname_length(url):
    return len(urlparse(url).netloc)

def url_length(url):
    return len(urlparse(url).path)

def get_counts(url):
    count_features = []

    i = url.count('-')
    count_features.append(i)

    i = url.count('@')
    count_features.append(i)

    i = url.count('?')
    count_features.append(i)

    i = url.count('%')
    count_features.append(i)

    i = url.count('.')
    count_features.append(i)

    i = url.count('=')
    count_features.append(i)

    i = url.count('http')
    count_features.append(i)

    i = url.count('https')
    count_features.append(i)

    i = url.count('www')
    count_features.append(i)

    return count_features

# Feature extraction function
def extract_features(url):
    url_features = []

    # hostname length
    i = hostname_length(url)
    url_features.append(i)

    # path length
    i = url_length(url)
    url_features.append(i)

    i = fd_length(url)
    url_features.append(i)

    i = get_counts(url)
    url_features = url_features + i

    i = digit_count(url)
    url_features.append(i)

    i = letter_count(url)
    url_features.append(i)

    i = no_of_dir(url)
    url_features.append(i)

    i = having_ip_address(url)
    url_features.append(i)

    return url_features

# Model prediction function
def get_prediction(url, model_path):
    print("Loading the model...")
    model = keras.models.load_model(model_path)

    print("Extracting features from url...")
    url_features = extract_features(url)

    print("Making prediction...")
    prediction = model.predict([url_features])

    probability = prediction[0][0] * 100
    probability = round(probability, 3)
    print(f"There is {probability}% chance the URL is malicious!")

    if probability < 10:
      print("The website is more safe.")
    elif probability < 25:
      print("The website is quite safe.")
    elif probability < 50:
      print("The website is having some Malicious activity.")
    else:
      print("Restricte!!! malicious website detected.")



    return probability

if __name__ == "__main__":
    # Path to trained model
    model_path = r"/content/drive/MyDrive/Phising_ML/Url_model.h5"

    # Input URL
    url = "https://www.google.com/"

    # Get prediction
    prediction = get_prediction(url, model_path)





Loading the model...
Extracting features from url...
Making prediction...
There is 1.512% chance the URL is malicious!
The website is more safe.


In [None]:
import streamlit as st
from urllib.parse import urlparse
from tensorflow import keras
import re

# Feature extraction functions
def fd_length(url):
    try:
        return len(urlparse(url).path.split('/')[1])
    except IndexError:
        return 0

def digit_count(url):
    return sum(1 for char in url if char.isnumeric())

def letter_count(url):
    return sum(1 for char in url if char.isalpha())

def no_of_dir(url):
    return urlparse(url).path.count('/')

def having_ip_address(url):
    ip_pattern = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b')
    return -1 if ip_pattern.search(url) else 1

def hostname_length(url):
    return len(urlparse(url).netloc)

def url_length(url):
    return len(urlparse(url).path)

def get_counts(url):
    count_features = ['-', '@', '?', '%', '.', '=', 'http', 'https', 'www']
    return [url.count(char) for char in count_features]

# Feature extraction function
def extract_features(url):
    url_features = [
        hostname_length(url),
        url_length(url),
        fd_length(url),
        *get_counts(url),
        digit_count(url),
        letter_count(url),
        no_of_dir(url),
        having_ip_address(url)
    ]
    return url_features

# Model prediction function
def get_prediction(url):
    model_path = "/content/drive/MyDrive/Phising_ML/Url_model.h5"
    model = keras.models.load_model(model_path)

    url_features = extract_features(url)
    prediction = model.predict([url_features])

    probability = round(prediction[0][0] * 100, 3)

    if probability < 10:
        result = "The website is more safe."
    elif probability < 25:
        result = "The website is quite safe."
    elif probability < 50:
        result = "The website is having some malicious activity."
    else:
        result = "Restricted!!! Malicious website detected."

    return probability, result

# Streamlit app
st.title("URL Maliciousness Prediction")

url_input = st.text_input("Enter URL:")
submit_button = st.button("Submit")
reset_button = st.button("Reset")

if submit_button:
    if url_input:
        probability, result = get_prediction(url_input)
        st.write(f"Prediction: There is {probability}% chance the URL is malicious!")
        st.write(result)
    else:
        st.write("Please enter a URL.")

if reset_button:
    url_input = ""


In [None]:
!pip install streamlit

In [None]:
!wget -q -O - ipv4.icanhazip.com

In [None]:
! streamlit run /content/drive/MyDrive/Phising_ML/app.py & npx localtunnel --port 8501