In [9]:
import pandas as pd
from helpers import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

In [10]:
# Loading in data and cleaning up unnecessary columns.
truth_seeker = pd.read_csv("TruthSeeker2023/Features_For_Traditional_ML_Techniques.csv")
truth_seeker = truth_seeker.drop(columns=["Unnamed: 0"])

In [11]:
# Extracting all features.
truth_seeker_features = truth_seeker.drop(columns=["majority_target", "statement", "BinaryNumTarget", "tweet", "embeddings"])

# Extracting label column for data.
truth_seeker_output = truth_seeker["BinaryNumTarget"]

# Split the data into training and testing sets (e.g., 75% train, 25% test)
X_train, X_test, y_train, y_test = train_test_split(
    truth_seeker_features,
    truth_seeker_output,
    test_size=0.25,  # Adjust the test_size as needed
    random_state=100  # Set a random seed for reproducibility
)

In [12]:
# Creating and testing Decision Tree Classifier.
tree_model = DecisionTreeClassifier()
train_and_evaluate_model(tree_model, "Decision Tree Classifier", X_train, y_train, X_test, y_test)

Decision Tree Classifier Cross-Validation Scores: [0.59726776 0.60009935 0.60029806 0.59998013 0.59898654]
Decision Tree Classifier Mean Accuracy: 0.5993
Decision Tree Classifier Standard Deviation: 0.0011
Decision Tree Classifier Test Accuracy: 0.5975


In [5]:
# Creating and testing Gaussian Naive Bayes model.
nb_model = GaussianNB()
train_and_evaluate_model(nb_model, "Gaussian NB", X_train, y_train, X_test, y_test)

Gaussian NB Cross-Validation Scores: [0.56532538 0.52180825 0.49751615 0.52556014 0.52248   ]
Gaussian NB Mean Accuracy: 0.5265
Gaussian NB Standard Deviation: 0.0218
Gaussian NB Test Accuracy: 0.5339


In [6]:
# Creating and testing K Nearest Neighbors model.
knn_model = KNeighborsClassifier()
train_and_evaluate_model(knn_model, "K Nearest Neighbors", X_train, y_train, X_test, y_test)

K Nearest Neighbors Cross-Validation Scores: [0.59393939 0.58852459 0.58768008 0.59466441 0.59252819]
K Nearest Neighbors Mean Accuracy: 0.5915
K Nearest Neighbors Standard Deviation: 0.0028
K Nearest Neighbors Test Accuracy: 0.5939


In [7]:
# Create an instance of the StandardScaler.
scaler = StandardScaler()

# Scale (normalize) the features in the training and testing sets.
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Creating and testing Support Vector Classifier.
svc_model = SVC()
train_and_evaluate_model(svc_model, "Support Vector Classifier", X_train_scaled, y_train, X_test_scaled, y_test)

Support Vector Classifier Cross-Validation Scores: [0.69542971 0.68857427 0.68822653 0.68741617 0.68682001]
Support Vector Classifier Mean Accuracy: 0.6893
Support Vector Classifier Standard Deviation: 0.0031
Support Vector Classifier Test Accuracy: 0.6853


In [8]:
# Creating and testing Random Forest Classifier.

rf_model = RandomForestClassifier()
train_and_evaluate_model(rf_model, "Random Forest Classifier", X_train, y_train, X_test, y_test)

Random Forest Classifier Cross-Validation Scores: [0.6941381  0.68941878 0.68415301 0.68910527 0.68935367]
Random Forest Classifier Mean Accuracy: 0.6892
Random Forest Classifier Standard Deviation: 0.0032
Random Forest Classifier Test Accuracy: 0.6916
