#**Random Forest Classifier**

In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

In [2]:
df = pd.read_csv("../data/instagram.csv")
df['class'] = df['class'].replace(['a', 'i', 's'], 'f')
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

In [3]:
df['class'] = df['class'].replace(['f'], 0)
df['class'] = df['class'].replace(['r'], 1)
print("Unique values in 'class' column:", df['class'].unique())

Unique values in 'class' column: [0 1]


  df['class'] = df['class'].replace(['r'], 1)


In [4]:
X = df.drop('class', axis=1)
y = df['class']

In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [7]:
rf_model = RandomForestClassifier(
    n_estimators=100,          # Number of trees in the forest
    max_depth=None,            # Maximum depth of each tree (None means no limit)
    min_samples_split=2,       # Minimum samples required to split an internal node
    min_samples_leaf=1,        # Minimum samples required to be at a leaf node
    class_weight='balanced',   # Handles class imbalance
    random_state=42,
    n_jobs=-1                  # Use all available CPU cores for faster training
)

In [8]:
print("Unique values in y_train:", np.unique(y_train))
print("Data type of y_train:", y_train.dtype)

Unique values in y_train: [0 1]
Data type of y_train: int64


In [9]:
rf_model.fit(X_train, y_train)

In [10]:
y_pred = rf_model.predict(X_test)

In [11]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.9641537830072513
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97     13257
           1       0.96      0.95      0.95      8670

    accuracy                           0.96     21927
   macro avg       0.96      0.96      0.96     21927
weighted avg       0.96      0.96      0.96     21927



In [12]:
# single_row_data = {
#     'pos': 29,
#     'flw': 475,
#     'flg': 1400,
#     'bl': 13,
#     'pic': 1,
#     'lin': 0,
#     'cl': 7,
#     'cz': 0.1111111119,
#     'ni': 0,
#     'erl': 11.2299999542,
#     'erc': 0.33000000131,
#     'lt': 0.6110000014,
#     'hc': 0.05600000017,
#     'pr': 0,
#     'fo': 0,
#     'cs': 0.7908499837,
#     'pi': 622.0027466
# }

# single_row_data = {
#     'pos': 22,
#     'flw': 559,
#     'flg': 2600,
#     'bl': 4,
#     'pic': 1,
#     'lin': 0,
#     'cl': 41,
#     'cz': 0.555555582,
#     'ni': 0.3330000043,
#     'erl': 20.430000305,
#     'erc': 0.9399999976,
#     'lt': 0.1110000014,
#     'hc': 0.1669999957,
#     'pr': 0,
#     'fo': 0,
#     'cs': 0.2431399971,
#     'pi': 703.57653809
# }

# single_row_data = {
#     'pos': 30,
#     'flw': 259,
#     'flg': 251,
#     'bl': 16,
#     'pic': 1,
#     'lin': 0,
#     'cl': 39,
#     'cz': 0,
#     'ni': 0.6110000014,
#     'erl': 19.0699999695,
#     'erc': 1.0499999523,
#     'lt': 0.0560000017,
#     'hc': 1.0559999943,
#     'pr': 0,
#     'fo': 0,
#     'cs': 0.0405320004,
#     'pi': 488.0598755
# }

single_row_data = {
    'pos': 6,                     # 1st value
    'flw': 77,                    # 2nd value
    'flg': 235,                   # 3rd value
    'bl': 0,                      # 4th value
    'pic': 1,                     # 5th value
    'lin': 0,                     # 6th value
    'cl': 3,                      # 7th value
    'cz': 0.8333333135,           # 8th value
    'ni': 0.1669999957,           # 9th value
    'erl': 8.8699998856,          # 10th value
    'erc': 0.6499999762,          # 11th value
    'lt': 0.3330000043,           # 12th value
    'hc': 0,                      # 13th value
    'pr': 0,
    'fo': 0,		                      # 14th value
    'cs': 0.6666669846,           # 15th value
    'pi': 3847.7927246            # 16th value
} 

single_row_df = pd.DataFrame([single_row_data])
single_row_scaled = scaler.transform(single_row_df)
prediction = rf_model.predict(single_row_scaled)
# Get the predicted class (0 for fake, 1 for real)
print("Predicted class:", prediction[0])

Predicted class: 1


In [13]:
# Save models
with open("../models/random_forest_model.pkl", "wb") as rf_file:
    pickle.dump(rf_model, rf_file)

with open("../models/scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)


print("Scaler and model saved successfully!")

Scaler and model saved successfully!
