In [3]:
import os
import datetime
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
import pickle

In [4]:
#part 1: SVM on original data
start_date = "20201117"
end_date = "20210520"
start_date_datetime = datetime.datetime.strptime(start_date, "%Y%m%d")
end_date_datetime = datetime.datetime.strptime(end_date, "%Y%m%d")
proc_date = start_date_datetime
duration = 300  # t

data_check_list = os.listdir("Data/")
data_check_dic = {i: 1 for i in data_check_list}


dataset_X = []
dataset_y = []

for _ in range(duration):
    # process the data in this date
    proc_date_str = proc_date.strftime("%Y-%m-%d")

    input_data_tmp_path = "Tmp/" + proc_date_str + "/"
    input_data_label_path = "Label/" + proc_date_str + "/"
    output_data_folder_path = "Label/All/"
    output_model_folder_path = "Model/"

    if not proc_date_str in data_check_dic.keys():
        proc_date = proc_date + datetime.timedelta(days=1)
        if proc_date == end_date_datetime:
            break
        continue

    if not os.path.exists(output_data_folder_path):
        os.makedirs(output_data_folder_path)

    if not os.path.exists(output_model_folder_path):
        os.makedirs(output_model_folder_path)

    input_data_feature = input_data_tmp_path + "tweet_feature_original"
    input_data_label = input_data_label_path + "labeled_tweets.txt"

    label_dic = {}

    with open(
        input_data_label, "r", encoding="utf-8", errors="ignore"
    ) as file_label_in:
        for label_line in file_label_in:
            label_line_split = label_line.strip().split("\t")
            label_tweet_id = label_line_split[0]
            label_dic[label_tweet_id] = int(label_line_split[-1])

    with open(
        input_data_feature, "r", encoding="utf-8", errors="ignore"
    ) as file_feature_in:
        for feature_line in file_feature_in:
            feature_line_split = feature_line.strip().split("\t")
            feature_tweet_id = feature_line_split[0]
            if feature_tweet_id in label_dic.keys():
                dataset_X.append([float(i) for i in feature_line_split[2:-1]])
                dataset_y.append(label_dic[feature_tweet_id])
    #     print(label_dic)

    proc_date = proc_date + datetime.timedelta(days=1)
    if proc_date == end_date_datetime:
        break

In [5]:
# Split data
dataset_X = np.array(dataset_X)
dataset_y = np.array(dataset_y)

scaler = MinMaxScaler()
dataset_X = scaler.fit_transform(dataset_X)

data_train, data_test, labels_train, labels_test = train_test_split(
    dataset_X, dataset_y, test_size=0.20, random_state=42
)

In [6]:
# Define classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Initialize the SVM classifier
model = SVC()

# Fit the classifier to your data
model.fit(data_train, labels_train)

# Calculate and print the training accuracy
train_accuracy = model.score(data_train, labels_train)
print("Training accuracy:", train_accuracy)

# Calculate test accuracy
test_accuracy = model.score(data_test, labels_test)
print("Test accuracy:", test_accuracy)

# Save the trained SVM model
save_model_path = output_model_folder_path + "svm_model.p"
pickle.dump(model, open(save_model_path, "wb"))

# Save the dataset
save_data_path = output_data_folder_path + "dataset.p"
pickle.dump([data_train, labels_train], open(save_data_path, "wb"))

Training accuracy: 0.7392241379310345
Test accuracy: 0.7948717948717948


In [7]:
# Get the predicted labels
predicted_labels = model.predict(data_test)

# Calculate TP, TN, FP, FN
tp = np.sum((predicted_labels == 1) & (labels_test == 1)) 
tn = np.sum((predicted_labels == 0) & (labels_test == 0))
fp = np.sum((predicted_labels == 1) & (labels_test == 0))
fn = np.sum((predicted_labels == 0) & (labels_test == 1))

print(f"True Positives (TP): {tp}")
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * tp / (2*tp + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

output_data_folder_path = "Label/All/"
output_model_folder_path = "Model/"

load_model_path = output_model_folder_path + "svm_model.p"
loaded_model = pickle.load(open(load_model_path, "rb"))

load_data_path = output_data_folder_path + "dataset.p"
dataset_X, dataset_y = pickle.load(open(load_data_path, "rb"))

result = loaded_model.score(data_test, labels_test)

True Positives (TP): 1
True Negatives (TN): 92
False Positives (FP): 1
False Negatives (FN): 23
Precision:  0.5
Recall:  0.041666666666666664
F1 Score:  0.07692307692307693


In [13]:
#Part 2: SVM on modified data

start_date = "20201117"
end_date = "20210520"
start_date_datetime = datetime.datetime.strptime(start_date, "%Y%m%d")
end_date_datetime = datetime.datetime.strptime(end_date, "%Y%m%d")
proc_date = start_date_datetime
duration = 300  # t

data_check_list = os.listdir("Data/")
data_check_dic = {i: 1 for i in data_check_list}


dataset_X = []
dataset_y = []

for _ in range(duration):
    # process the data in this date
    proc_date_str = proc_date.strftime("%Y-%m-%d")

    input_data_tmp_path = "Tmp/" + proc_date_str + "/"
    input_data_label_path = "Label/" + proc_date_str + "/"
    output_data_folder_path = "Label/All/"
    output_model_folder_path = "Model/"

    if not proc_date_str in data_check_dic.keys():
        proc_date = proc_date + datetime.timedelta(days=1)
        if proc_date == end_date_datetime:
            break
        continue

    if not os.path.exists(output_data_folder_path):
        os.makedirs(output_data_folder_path)

    if not os.path.exists(output_model_folder_path):
        os.makedirs(output_model_folder_path)

    input_data_feature = input_data_tmp_path + "tweet_feature"
    input_data_label = input_data_label_path + "labeled_tweets.txt"

    label_dic = {}

    with open(
        input_data_label, "r", encoding="utf-8", errors="ignore"
    ) as file_label_in:
        for label_line in file_label_in:
            label_line_split = label_line.strip().split("\t")
            label_tweet_id = label_line_split[0]
            label_dic[label_tweet_id] = int(label_line_split[-1])

    with open(
        input_data_feature, "r", encoding="utf-8", errors="ignore"
    ) as file_feature_in:
        for feature_line in file_feature_in:
            feature_line_split = feature_line.strip().split("\t")
            feature_tweet_id = feature_line_split[0]
            if feature_tweet_id in label_dic.keys():
                dataset_X.append([float(i) for i in feature_line_split[2:-1]])
                dataset_y.append(label_dic[feature_tweet_id])
    #     print(label_dic)

    proc_date = proc_date + datetime.timedelta(days=1)
    if proc_date == end_date_datetime:
        break

# Split data
dataset_X = np.array(dataset_X)
dataset_y = np.array(dataset_y)

scaler = MinMaxScaler()
dataset_X = scaler.fit_transform(dataset_X)

data_train, data_test, labels_train, labels_test = train_test_split(
    dataset_X, dataset_y, test_size=0.20, random_state=42
)

# Initialize the SVM classifier
model = SVC()

# Fit the classifier to your data
model.fit(data_train, labels_train)

# Calculate and print the training accuracy
train_accuracy = model.score(data_train, labels_train)
print("Training accuracy:", train_accuracy)

# Calculate test accuracy
test_accuracy = model.score(data_test, labels_test)
print("Test accuracy:", test_accuracy)

# Save the trained SVM model
save_model_path = output_model_folder_path + "svm_model.p"
pickle.dump(model, open(save_model_path, "wb"))

# Save the dataset
save_data_path = output_data_folder_path + "dataset.p"
pickle.dump([data_train, labels_train], open(save_data_path, "wb"))

# Get the predicted labels
predicted_labels = model.predict(data_test)

# Calculate TP, TN, FP, FN
tp = np.sum((predicted_labels == 1) & (labels_test == 1)) 
tn = np.sum((predicted_labels == 0) & (labels_test == 0))
fp = np.sum((predicted_labels == 1) & (labels_test == 0))
fn = np.sum((predicted_labels == 0) & (labels_test == 1))

print(f"True Positives (TP): {tp}")
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * tp / (2*tp + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

output_data_folder_path = "Label/All/"
output_model_folder_path = "Model/"

load_model_path = output_model_folder_path + "svm_model.p"
loaded_model = pickle.load(open(load_model_path, "rb"))

load_data_path = output_data_folder_path + "dataset.p"
dataset_X, dataset_y = pickle.load(open(load_data_path, "rb"))

result = loaded_model.score(data_test, labels_test)


Training accuracy: 0.7306034482758621
Test accuracy: 0.8034188034188035
True Positives (TP): 1
True Negatives (TN): 93
False Positives (FP): 0
False Negatives (FN): 23
Precision:  1.0
Recall:  0.041666666666666664
F1 Score:  0.08


In [14]:
#SVM on modified data 2 - age, favorites, hashtag, text len, followers count, friends count, statuses count

start_date = "20201117"
end_date = "20210520"
start_date_datetime = datetime.datetime.strptime(start_date, "%Y%m%d")
end_date_datetime = datetime.datetime.strptime(end_date, "%Y%m%d")
proc_date = start_date_datetime
duration = 300  # t

data_check_list = os.listdir("Data/")
data_check_dic = {i: 1 for i in data_check_list}


dataset_X = []
dataset_y = []

for _ in range(duration):
    # process the data in this date
    proc_date_str = proc_date.strftime("%Y-%m-%d")

    input_data_tmp_path = "Tmp/" + proc_date_str + "/"
    input_data_label_path = "Label/" + proc_date_str + "/"
    output_data_folder_path = "Label/All/"
    output_model_folder_path = "Model/"

    if not proc_date_str in data_check_dic.keys():
        proc_date = proc_date + datetime.timedelta(days=1)
        if proc_date == end_date_datetime:
            break
        continue

    if not os.path.exists(output_data_folder_path):
        os.makedirs(output_data_folder_path)

    if not os.path.exists(output_model_folder_path):
        os.makedirs(output_model_folder_path)

    input_data_feature = input_data_tmp_path + "tweet_feature_2"
    input_data_label = input_data_label_path + "labeled_tweets.txt"

    label_dic = {}

    with open(
        input_data_label, "r", encoding="utf-8", errors="ignore"
    ) as file_label_in:
        for label_line in file_label_in:
            label_line_split = label_line.strip().split("\t")
            label_tweet_id = label_line_split[0]
            label_dic[label_tweet_id] = int(label_line_split[-1])

    with open(
        input_data_feature, "r", encoding="utf-8", errors="ignore"
    ) as file_feature_in:
        for feature_line in file_feature_in:
            feature_line_split = feature_line.strip().split("\t")
            feature_tweet_id = feature_line_split[0]
            if feature_tweet_id in label_dic.keys():
                dataset_X.append([float(i) for i in feature_line_split[2:-1]])
                dataset_y.append(label_dic[feature_tweet_id])
    #     print(label_dic)

    proc_date = proc_date + datetime.timedelta(days=1)
    if proc_date == end_date_datetime:
        break

# Split data
dataset_X = np.array(dataset_X)
dataset_y = np.array(dataset_y)

scaler = MinMaxScaler()
dataset_X = scaler.fit_transform(dataset_X)

data_train, data_test, labels_train, labels_test = train_test_split(
    dataset_X, dataset_y, test_size=0.20, random_state=42
)

# Initialize the SVM classifier
model = SVC()

# Fit the classifier to your data
model.fit(data_train, labels_train)

# Calculate and print the training accuracy
train_accuracy = model.score(data_train, labels_train)
print("Training accuracy:", train_accuracy)

# Calculate test accuracy
test_accuracy = model.score(data_test, labels_test)
print("Test accuracy:", test_accuracy)

# Save the trained SVM model
save_model_path = output_model_folder_path + "svm_model.p"
pickle.dump(model, open(save_model_path, "wb"))

# Save the dataset
save_data_path = output_data_folder_path + "dataset.p"
pickle.dump([data_train, labels_train], open(save_data_path, "wb"))

# Get the predicted labels
predicted_labels = model.predict(data_test)

# Calculate TP, TN, FP, FN
tp = np.sum((predicted_labels == 1) & (labels_test == 1)) 
tn = np.sum((predicted_labels == 0) & (labels_test == 0))
fp = np.sum((predicted_labels == 1) & (labels_test == 0))
fn = np.sum((predicted_labels == 0) & (labels_test == 1))

print(f"True Positives (TP): {tp}")
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * tp / (2*tp + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

output_data_folder_path = "Label/All/"
output_model_folder_path = "Model/"

load_model_path = output_model_folder_path + "svm_model.p"
loaded_model = pickle.load(open(load_model_path, "rb"))

load_data_path = output_data_folder_path + "dataset.p"
dataset_X, dataset_y = pickle.load(open(load_data_path, "rb"))

result = loaded_model.score(data_test, labels_test)



Training accuracy: 0.7306034482758621
Test accuracy: 0.8034188034188035
True Positives (TP): 1
True Negatives (TN): 93
False Positives (FP): 0
False Negatives (FN): 23
Precision:  1.0
Recall:  0.041666666666666664
F1 Score:  0.08


In [12]:
# SVM on modified data 3 - age, favorites, hashtag, textlen, sensitive, url count, verified, retweet count, reply count, user mentions count

start_date = "20201117"
end_date = "20210520"
start_date_datetime = datetime.datetime.strptime(start_date, "%Y%m%d")
end_date_datetime = datetime.datetime.strptime(end_date, "%Y%m%d")
proc_date = start_date_datetime
duration = 300  # t

data_check_list = os.listdir("Data/")
data_check_dic = {i: 1 for i in data_check_list}


dataset_X = []
dataset_y = []

for _ in range(duration):
    # process the data in this date
    proc_date_str = proc_date.strftime("%Y-%m-%d")

    input_data_tmp_path = "Tmp/" + proc_date_str + "/"
    input_data_label_path = "Label/" + proc_date_str + "/"
    output_data_folder_path = "Label/All/"
    output_model_folder_path = "Model/"

    if not proc_date_str in data_check_dic.keys():
        proc_date = proc_date + datetime.timedelta(days=1)
        if proc_date == end_date_datetime:
            break
        continue

    if not os.path.exists(output_data_folder_path):
        os.makedirs(output_data_folder_path)

    if not os.path.exists(output_model_folder_path):
        os.makedirs(output_model_folder_path)

    input_data_feature = input_data_tmp_path + "tweet_feature_3"
    input_data_label = input_data_label_path + "labeled_tweets.txt"

    label_dic = {}

    with open(
        input_data_label, "r", encoding="utf-8", errors="ignore"
    ) as file_label_in:
        for label_line in file_label_in:
            label_line_split = label_line.strip().split("\t")
            label_tweet_id = label_line_split[0]
            label_dic[label_tweet_id] = int(label_line_split[-1])

    with open(
        input_data_feature, "r", encoding="utf-8", errors="ignore"
    ) as file_feature_in:
        for feature_line in file_feature_in:
            feature_line_split = feature_line.strip().split("\t")
            feature_tweet_id = feature_line_split[0]
            if feature_tweet_id in label_dic.keys():
                dataset_X.append([float(i) for i in feature_line_split[2:-1]])
                dataset_y.append(label_dic[feature_tweet_id])
    #     print(label_dic)

    proc_date = proc_date + datetime.timedelta(days=1)
    if proc_date == end_date_datetime:
        break

# Split data
dataset_X = np.array(dataset_X)
dataset_y = np.array(dataset_y)

scaler = MinMaxScaler()
dataset_X = scaler.fit_transform(dataset_X)

data_train, data_test, labels_train, labels_test = train_test_split(
    dataset_X, dataset_y, test_size=0.20, random_state=42
)

# Initialize the SVM classifier
model = SVC()

# Fit the classifier to your data
model.fit(data_train, labels_train)

# Calculate and print the training accuracy
train_accuracy = model.score(data_train, labels_train)
print("Training accuracy:", train_accuracy)

# Calculate test accuracy
test_accuracy = model.score(data_test, labels_test)
print("Test accuracy:", test_accuracy)

# Save the trained SVM model
save_model_path = output_model_folder_path + "svm_model.p"
pickle.dump(model, open(save_model_path, "wb"))

# Save the dataset
save_data_path = output_data_folder_path + "dataset.p"
pickle.dump([data_train, labels_train], open(save_data_path, "wb"))

# Get the predicted labels
predicted_labels = model.predict(data_test)

# Calculate TP, TN, FP, FN
tp = np.sum((predicted_labels == 1) & (labels_test == 1)) 
tn = np.sum((predicted_labels == 0) & (labels_test == 0))
fp = np.sum((predicted_labels == 1) & (labels_test == 0))
fn = np.sum((predicted_labels == 0) & (labels_test == 1))

print(f"True Positives (TP): {tp}")
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * tp / (2*tp + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

output_data_folder_path = "Label/All/"
output_model_folder_path = "Model/"

load_model_path = output_model_folder_path + "svm_model.p"
loaded_model = pickle.load(open(load_model_path, "rb"))

load_data_path = output_data_folder_path + "dataset.p"
dataset_X, dataset_y = pickle.load(open(load_data_path, "rb"))

result = loaded_model.score(data_test, labels_test)



Training accuracy: 0.7241379310344828
Test accuracy: 0.7948717948717948
True Positives (TP): 0
True Negatives (TN): 93
False Positives (FP): 0
False Negatives (FN): 24
Precision:  nan
Recall:  0.0
F1 Score:  0.0


  precision = tp / (tp + fp)


In [7]:
from sklearn.linear_model import LogisticRegression

# Initialize the Logistic Regression classifier
model = LogisticRegression(max_iter=1000)

# Fit the classifier to your data
model.fit(data_train, labels_train)

# Calculate and print the training accuracy
train_accuracy = model.score(data_train, labels_train)
print("Training accuracy:", train_accuracy)

# Calculate test accuracy
test_accuracy = model.score(data_test, labels_test)
print("Test accuracy:", test_accuracy)

# Save the trained logistic regression model
save_model_path = output_model_folder_path + "log_regress_model.p"
pickle.dump(model, open(save_model_path, "wb"))

# Save the dataset
save_data_path = output_data_folder_path + "dataset.p"
pickle.dump([data_train, labels_train], open(save_data_path, "wb"))

# Get the predicted labels
predicted_labels = model.predict(data_test)

# Calculate TP, TN, FP, FN
tp = np.sum((predicted_labels == 1) & (labels_test == 1))
tn = np.sum((predicted_labels == 0) & (labels_test == 0))
fp = np.sum((predicted_labels == 1) & (labels_test == 0))
fn = np.sum((predicted_labels == 0) & (labels_test == 1))

print(f"True Positives (TP): {tp}")
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * tp / (2*tp + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

output_data_folder_path = "Label/All/"
output_model_folder_path = "Model/"

load_model_path = output_model_folder_path + "log_regress_model.p"
loaded_model = pickle.load(open(load_model_path, "rb"))

load_data_path = output_data_folder_path + "dataset.p"
dataset_X, dataset_y = pickle.load(open(load_data_path, "rb"))

result = loaded_model.score(data_test, labels_test)

Training accuracy: 0.728448275862069
Test accuracy: 0.8034188034188035
True Positives (TP): 1
True Negatives (TN): 93
False Positives (FP): 0
False Negatives (FN): 23
Precision:  1.0
Recall:  0.041666666666666664
F1 Score:  0.08


In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Initialize the Decision Tree classifier
model = DecisionTreeClassifier()

# Fit the classifier to your data
model.fit(data_train, labels_train)

# Calculate and print the training accuracy
train_accuracy = model.score(data_train, labels_train)
print("Training accuracy:", train_accuracy)

# Calculate test accuracy
test_accuracy = model.score(data_test, labels_test)
print("Test accuracy:", test_accuracy)

# Save the trained logistic regression model
save_model_path = output_model_folder_path + "log_regress_model.p"
pickle.dump(model, open(save_model_path, "wb"))

# Save the dataset
save_data_path = output_data_folder_path + "dataset.p"
pickle.dump([data_train, labels_train], open(save_data_path, "wb"))

# Get the predicted labels
predicted_labels = model.predict(data_test)

# Calculate TP, TN, FP, FN
tp = np.sum((predicted_labels == 1) & (labels_test == 1))
tn = np.sum((predicted_labels == 0) & (labels_test == 0))
fp = np.sum((predicted_labels == 1) & (labels_test == 0))
fn = np.sum((predicted_labels == 0) & (labels_test == 1))

print(f"True Positives (TP): {tp}")
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * tp / (2*tp + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

output_data_folder_path = "Label/All/"
output_model_folder_path = "Model/"

load_model_path = output_model_folder_path + "log_regress_model.p"
loaded_model = pickle.load(open(load_model_path, "rb"))

load_data_path = output_data_folder_path + "dataset.p"
dataset_X, dataset_y = pickle.load(open(load_data_path, "rb"))

result = loaded_model.score(data_test, labels_test)


Training accuracy: 1.0
Test accuracy: 0.7435897435897436
True Positives (TP): 12
True Negatives (TN): 75
False Positives (FP): 18
False Negatives (FN): 12
Precision:  0.4
Recall:  0.5
F1 Score:  0.4444444444444444


In [9]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest classifier
model = RandomForestClassifier()

# Fit the classifier to your data
model.fit(data_train, labels_train)

# Calculate and print the training accuracy
train_accuracy = model.score(data_train, labels_train)
print("Training accuracy:", train_accuracy)

# Calculate test accuracy
test_accuracy = model.score(data_test, labels_test)
print("Test accuracy:", test_accuracy)

# Save the trained logistic regression model
save_model_path = output_model_folder_path + "log_regress_model.p"
pickle.dump(model, open(save_model_path, "wb"))

# Save the dataset
save_data_path = output_data_folder_path + "dataset.p"
pickle.dump([data_train, labels_train], open(save_data_path, "wb"))

# Get the predicted labels
predicted_labels = model.predict(data_test)

# Calculate TP, TN, FP, FN
tp = np.sum((predicted_labels == 1) & (labels_test == 1))
tn = np.sum((predicted_labels == 0) & (labels_test == 0))
fp = np.sum((predicted_labels == 1) & (labels_test == 0))
fn = np.sum((predicted_labels == 0) & (labels_test == 1))

print(f"True Positives (TP): {tp}")
print(f"True Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")

precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * tp / (2*tp + fp + fn)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

output_data_folder_path = "Label/All/"
output_model_folder_path = "Model/"

load_model_path = output_model_folder_path + "log_regress_model.p"
loaded_model = pickle.load(open(load_model_path, "rb"))

load_data_path = output_data_folder_path + "dataset.p"
dataset_X, dataset_y = pickle.load(open(load_data_path, "rb"))

result = loaded_model.score(data_test, labels_test)


Training accuracy: 1.0
Test accuracy: 0.811965811965812
True Positives (TP): 6
True Negatives (TN): 89
False Positives (FP): 4
False Negatives (FN): 18
Precision:  0.6
Recall:  0.25
F1 Score:  0.35294117647058826
