1. Import necessary libraries.

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
import joblib


2. Dataset loading.

In [2]:
df = pd.read_csv("data.csv")
print("Dataset loaded successfully.")


Dataset loaded successfully.


3. Preprocessing the data.

In [3]:
df = pd.get_dummies(df, columns=["Gender", "Post_Type"], drop_first=True)

X = df.drop(columns=["User_ID", "Misinformation_Spread", "Platform", "Country"])
y = df["Misinformation_Spread"]

print("Data preprocessing completed.")


Data preprocessing completed.


4. Split the dataset.

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Dataset split into training and testing sets.")


Dataset split into training and testing sets.


5. Defining 10 ML learning models.

In [5]:
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=500, random_state=42),
    "Decision Tree": DecisionTreeClassifier(max_depth=10, random_state=42),
    "SVM": SVC(kernel='rbf', probability=True, random_state=42),
    "Naive Bayes": GaussianNB(),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "AdaBoost": AdaBoostClassifier(n_estimators=50, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, max_depth=5, random_state=42)
}
print("Defined machine learning models.")


Defined machine learning models.


6. Train & evaluate the best model

In [6]:
best_model = None
best_accuracy = 0

for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy of {name}: {accuracy:.4f}")
    
    if accuracy > best_accuracy:
        best_model = model
        best_accuracy = accuracy

print(f"Best model: {type(best_model).__name__} with accuracy: {best_accuracy:.4f}")


Training Random Forest...
Accuracy of Random Forest: 0.4995
Training Logistic Regression...
Accuracy of Logistic Regression: 0.5015
Training Decision Tree...
Accuracy of Decision Tree: 0.4913
Training SVM...
Accuracy of SVM: 0.4983
Training Naive Bayes...
Accuracy of Naive Bayes: 0.5055
Training K-Nearest Neighbors...
Accuracy of K-Nearest Neighbors: 0.5208
Training AdaBoost...




Accuracy of AdaBoost: 0.5108
Training Gradient Boosting...
Accuracy of Gradient Boosting: 0.5018
Best model: KNeighborsClassifier with accuracy: 0.5208


7. Saving the best model.

In [7]:
if best_model:
    joblib.dump(best_model, "best_model.joblib", compress=3)
    print("Best model saved as 'best_model.joblib'")


Best model saved as 'best_model.joblib'
