# Data Preparation

In [1]:
import numpy as np
import pandas as pd

# Data Training

In [3]:
def calculate_class_probabilities(train_data):
    class_probabilities = {}
    word_counts = {}
    total_count = 0

    for index, row in train_data.iterrows():
        # Access data in the 'text' and 'label' columns
        text = row['Text']
        label = row['label']
        words = text.split()

        # Update class count
        class_probabilities[label] = class_probabilities.get(label, 0) + 1

        # Update word counts
        for word in words:
            if word not in word_counts:
                word_counts[word] = {label: 0}
            word_counts[word][label] = word_counts[word].get(label, 0) + 1
            total_count += 1

    # Calculate class probabilities
    total_docs = sum(class_probabilities.values())
    for label in class_probabilities:
        class_probabilities[label] /= total_docs

    # Calculate word probabilities
    for word in word_counts:
        for label in word_counts[word]:
            word_counts[word][label] /= total_count
    #print(class_probabilities)
    return class_probabilities, word_counts


# Prediction 

In [17]:
def predict(class_probabilities, word_counts, text):
    words = text.split()
    scores = {label: 0 for label in class_probabilities}
    for label in scores:
        scores[label] = class_probabilities[label]
        for word in words:
            if word in word_counts:
                scores[label] *= word_counts[word].get(label, 0)
                
    return max(scores, key=scores.get)


# Evaluation

In [12]:
def evaluate(test_data, class_probabilities, word_counts):
    correct_predictions = 0
    total_predictions = len(test_data)
    true_positive = false_positive = true_negative = false_negative = 0
    count=0
    for index, row in test_data.iterrows():
        # Access data in the 'text' and 'label' columns
        count+=1
        text = row['Text']
        label = row['label']
        words = text.split()
        predicted_label = predict(class_probabilities, word_counts, text)
        

        if predicted_label == label:
            correct_predictions += 1

        if predicted_label == "Real" and label == "Real":
            true_positive += 1
        elif predicted_label == "Real" and label == "Fake":
            false_positive += 1
        elif predicted_label == "Fake" and label == "Fake":
            true_negative += 1
        elif predicted_label == "Fake" and label == "Real":
            false_negative += 1

    #print(correct_predictions)
    #print(len(test_data))
    #print(true_positive)
    #print(false_positive)
    #print(false_negative)
    #print(true_negative )
    accuracy = correct_predictions / len(test_data)
    precision = true_positive / (true_positive + false_positive) 
    sensitivity = true_positive / (true_positive + false_negative) 
    specificity = true_negative / (true_negative + false_positive) 

    return accuracy, precision, sensitivity, specificity

In [19]:
# Load data
#data = np.genfromtxt('fake_and_real_news.csv', delimiter=',', skip_header=1, dtype=str, encoding='utf-8')
data = pd.read_csv('fake_and_real_news.csv')
#split_index = int(len(dataset) * split_ratio)
split_index=int(70/100*len(data))
train_data = data[:split_index]
#print(train_data)
test_data = data[split_index:]

# Train model
class_probabilities, word_counts = calculate_class_probabilities(train_data)

# Evaluate model
accuracy, precision, sensitivity, specificity = evaluate(test_data, class_probabilities, word_counts)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Sensitivity (Recall):", sensitivity)
print("Specificity:", specificity)

Accuracy: 0.5498316498316498
Precision: 1.0
Sensitivity (Recall): 0.10508701472556894
Specificity: 1.0
