In [29]:
# TF-IDF simulation

from sklearn.metrics.pairwise import cosine_similarity
import joblib

tfidf_matrix = joblib.load('tfidf_matrix.pkl')
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')
data = pd.read_csv('clickbait_data.csv')

# Recommandation function 
def simulate_recommendations_tfidf(data, tfidf_matrix, n_iterations=100, n_choices=3):
    proposed_clickbait = 0
    proposed_non_clickbait = 0
    clicked_clickbait = 0
    clicked_non_clickbait = 0

    for _ in range(n_iterations):
        indices = random.sample(range(tfidf_matrix.shape[0]), n_choices)
        chosen_index = random.choice(indices)

        # Count the proposed proportions
        for idx in indices:
            if data.iloc[idx]['clickbait'] == 1:
                proposed_clickbait += 1
            else:
                proposed_non_clickbait += 1

        # Count the proportion clicked
        if data.iloc[chosen_index]['clickbait'] == 1:
            clicked_clickbait += 1
        else:
            clicked_non_clickbait += 1

    return {
        'proposed_clickbait': proposed_clickbait,
        'proposed_non_clickbait': proposed_non_clickbait,
        'clicked_clickbait': clicked_clickbait,
        'clicked_non_clickbait': clicked_non_clickbait
    }

# Simulation 
results_tfidf = simulate_recommendations_tfidf(data, tfidf_matrix, n_iterations=100)

print("TF-IDF Results:", results_tfidf)


TF-IDF Results: {'proposed_clickbait': 149, 'proposed_non_clickbait': 151, 'clicked_clickbait': 46, 'clicked_non_clickbait': 54}


In [30]:
# Word2Vec simulation 

data = pd.read_pickle('clickbait_data_with_vectors.pkl')

# Recommandation function 
def simulate_recommendations_word2vec(data, n_iterations=100, n_choices=3):
    proposed_clickbait = 0
    proposed_non_clickbait = 0
    clicked_clickbait = 0
    clicked_non_clickbait = 0

    for _ in range(n_iterations):
        indices = random.sample(range(len(data)), n_choices)
        chosen_index = random.choice(indices)

        # Count the proposed proportions
        for idx in indices:
            if data.iloc[idx]['clickbait'] == 1:
                proposed_clickbait += 1
            else:
                proposed_non_clickbait += 1

        # Count the proportion clicked
        if data.iloc[chosen_index]['clickbait'] == 1:
            clicked_clickbait += 1
        else:
            clicked_non_clickbait += 1

    return {
        'proposed_clickbait': proposed_clickbait,
        'proposed_non_clickbait': proposed_non_clickbait,
        'clicked_clickbait': clicked_clickbait,
        'clicked_non_clickbait': clicked_non_clickbait
    }

# Simulation 
results_word2vec = simulate_recommendations_word2vec(data, n_iterations=100)

print("Word2Vec Results:", results_word2vec)


Word2Vec Results: {'proposed_clickbait': 154, 'proposed_non_clickbait': 146, 'clicked_clickbait': 48, 'clicked_non_clickbait': 52}


In [31]:
import random
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

data = pd.read_pickle('clickbait_data_with_sentence_vectors.pkl')

# Recommandation function 
def simulate_recommendations_sentence_transformer(data, n_iterations=100, n_choices=3):
    proposed_clickbait = 0
    proposed_non_clickbait = 0
    clicked_clickbait = 0
    clicked_non_clickbait = 0

    for _ in tqdm(range(n_iterations), desc="Simulating recommendations"):
        indices = random.sample(range(len(data)), n_choices)
        chosen_index = random.choice(indices)

        # Count the proposed proportions
        for idx in indices:
            if data.iloc[idx]['clickbait'] == 1:
                proposed_clickbait += 1
            else:
                proposed_non_clickbait += 1

        # Count the proportion clicked
        if data.iloc[chosen_index]['clickbait'] == 1:
            clicked_clickbait += 1
        else:
            clicked_non_clickbait += 1

    return {
        'proposed_clickbait': proposed_clickbait,
        'proposed_non_clickbait': proposed_non_clickbait,
        'clicked_clickbait': clicked_clickbait,
        'clicked_non_clickbait': clicked_non_clickbait
    }

# Simulation 
results_sentence_transformer = simulate_recommendations_sentence_transformer(data, n_iterations=100)

print("Sentence Transformer Results:", results_sentence_transformer)


Simulating recommendations: 100%|██████████████████████████████| 100/100 [00:00<00:00, 10152.26it/s]

Sentence Transformer Results: {'proposed_clickbait': 148, 'proposed_non_clickbait': 152, 'clicked_clickbait': 53, 'clicked_non_clickbait': 47}





In [32]:
# Function to analyze results and calculating percentage 
def analyze_results(results, method_name):
    total_proposed = results['proposed_clickbait'] + results['proposed_non_clickbait']
    total_clicked = results['clicked_clickbait'] + results['clicked_non_clickbait']

    print(f"--- {method_name} ---")
    print(f"Proposed Clickbait: {results['proposed_clickbait']} ({results['proposed_clickbait']/total_proposed*100:.2f}%)")
    print(f"Proposed Non-Clickbait: {results['proposed_non_clickbait']} ({results['proposed_non_clickbait']/total_proposed*100:.2f}%)")
    print(f"Clicked Clickbait: {results['clicked_clickbait']} ({results['clicked_clickbait']/total_clicked*100:.2f}%)")
    print(f"Clicked Non-Clickbait: {results['clicked_non_clickbait']} ({results['clicked_non_clickbait']/total_clicked*100:.2f}%)")
    print("\n")
    

In [33]:
print("TF-IDF Analysis:")
analyze_results(results_tfidf, "TF-IDF")

print("Word2Vec Analysis:")
analyze_results(results_word2vec, "Word2Vec")

print("Sentence Transformer Analysis:")
analyze_results(results_sentence_transformer, "Sentence Transformer")


TF-IDF Analysis:
--- TF-IDF ---
Proposed Clickbait: 149 (49.67%)
Proposed Non-Clickbait: 151 (50.33%)
Clicked Clickbait: 46 (46.00%)
Clicked Non-Clickbait: 54 (54.00%)


Word2Vec Analysis:
--- Word2Vec ---
Proposed Clickbait: 154 (51.33%)
Proposed Non-Clickbait: 146 (48.67%)
Clicked Clickbait: 48 (48.00%)
Clicked Non-Clickbait: 52 (52.00%)


Sentence Transformer Analysis:
--- Sentence Transformer ---
Proposed Clickbait: 148 (49.33%)
Proposed Non-Clickbait: 152 (50.67%)
Clicked Clickbait: 53 (53.00%)
Clicked Non-Clickbait: 47 (47.00%)


