In [4]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix

# Load the dataset
cheaters = np.load("data/cheaters.npy")
clean = np.load("data/legit.npy")

# Upsample cheaters to balance classes
cheaters = np.repeat(cheaters, 5, axis=0)
cheaters_labels = np.ones(10000, dtype=np.float32)
clean_labels = np.zeros(10000, dtype=np.float32)

# Combine data and labels
x = np.concatenate((cheaters, clean))
y = np.concatenate((cheaters_labels, clean_labels))

# Flatten the data for Random Forest input (if necessary)
x = x.reshape(x.shape[0], -1)

# Split into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=17)

# Initialize and train the Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=17, class_weight='balanced')
rf.fit(x_train, y_train)

# Evaluate the model
y_pred = rf.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Random Forest Performance:")
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.4f}")

Random Forest Performance:
Accuracy: 99.83%
Precision: 0.9965
