# Model- URL detection

## Load Dataset

In [1]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

## Load Dataset

In [2]:
df = pd.read_csv("/kaggle/input/phishing-website-detector/phishing.csv")  # rename your file

df.head()


Unnamed: 0,Index,UsingIP,LongURL,ShortURL,Symbol@,Redirecting//,PrefixSuffix-,SubDomains,HTTPS,DomainRegLen,...,UsingPopupWindow,IframeRedirection,AgeofDomain,DNSRecording,WebsiteTraffic,PageRank,GoogleIndex,LinksPointingToPage,StatsReport,class
0,0,1,1,1,1,1,-1,0,1,-1,...,1,1,-1,-1,0,-1,1,1,1,-1
1,1,1,0,1,1,1,-1,-1,-1,-1,...,1,1,1,-1,1,-1,1,0,-1,-1
2,2,1,0,1,1,1,-1,-1,-1,1,...,1,1,-1,-1,1,-1,1,-1,1,-1
3,3,1,0,-1,1,1,-1,1,1,-1,...,-1,1,-1,-1,0,-1,1,1,1,1
4,4,-1,0,-1,1,-1,-1,1,1,-1,...,1,1,1,1,1,-1,1,-1,-1,1


## Feature Engg and Data Cleaning 

In [3]:
df['class'].value_counts()


class
 1    6157
-1    4897
Name: count, dtype: int64

In [4]:
X = df.drop(columns=['Index', 'class'])
y = df['class']


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [6]:
model = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)


In [7]:
preds = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, preds))


Accuracy: 0.9692446856625961


In [8]:
importances = pd.Series(
    model.feature_importances_,
    index=X.columns
).sort_values(ascending=False)

importances.head(10)


HTTPS                  0.320176
AnchorURL              0.251460
WebsiteTraffic         0.071725
SubDomains             0.063525
LinksInScriptTags      0.042367
PrefixSuffix-          0.038884
ServerFormHandler      0.020566
LinksPointingToPage    0.019759
RequestURL             0.019481
DomainRegLen           0.017200
dtype: float64

In [9]:
joblib.dump(model, "url_fraud_model.pkl")
print("URL model trained & saved (small size)")


URL model trained & saved (small size)
