In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv("./synthetic_fake_profile_dataset.csv")  

X = df.drop(columns=["profile_id", "suspicious_activity"])
y = df["suspicious_activity"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestClassifier(random_state=42, n_estimators=100)
rf_model.fit(X_train, y_train)

rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Classification Report:\n", classification_report(y_test, rf_predictions))


xgb_model = XGBClassifier(
    use_label_encoder=False,
    eval_metric="logloss",
    random_state=42,
    n_estimators=50,  
    subsample=0.8,  
    max_depth=6 
)
xgb_model.fit(X_train, y_train)

xgb_predictions = xgb_model.predict(X_test)
xgb_accuracy = accuracy_score(y_test, xgb_predictions)
print("XGBoost Accuracy:", xgb_accuracy)
print("XGBoost Classification Report:\n", classification_report(y_test, xgb_predictions))



Random Forest Accuracy: 0.9985
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1995
           1       1.00      0.40      0.57         5

    accuracy                           1.00      2000
   macro avg       1.00      0.70      0.79      2000
weighted avg       1.00      1.00      1.00      2000

XGBoost Accuracy: 0.999
XGBoost Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1995
           1       1.00      0.60      0.75         5

    accuracy                           1.00      2000
   macro avg       1.00      0.80      0.87      2000
weighted avg       1.00      1.00      1.00      2000



In [None]:
import joblib
# joblib.dump(rf_model,'rf_model.pkl')
# joblib.dump(xgb_model,'xgb_model.pkl')


import numpy as np
model = joblib.load('./rf_model.pkl')

profile_picture = 1
friend_count= 500
post_count= 200
account_age_days= 1000
likes_per_post= 50.5
comments_per_post= 10.2
shared_links=5
about_section=0

ans = np.array([
    [
          profile_picture,
            friend_count,
            post_count,
            account_age_days,
            likes_per_post,
            comments_per_post,
            shared_links,
            about_section,
    ]
])

prediction = model.predict(ans)
print("Prediction is : ", prediction)





Prediction is :  [0]
