In [3]:
import torch
from transformers import BertTokenizer, BertModel, pipeline

def text_embedding(data):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)

    def get_bert_embeddings(data):
        tokens = tokenizer(data.tolist(), padding=True, truncation=True, return_tensors='pt').to(device)
        with torch.no_grad():
            embeddings = bert_model(**tokens).last_hidden_state.mean(dim=1)
        return embeddings

    batch_size = 128
    num_samples = len(data)
    num_batches = (num_samples + batch_size - 1) // batch_size

    embeddings_list = []

    for i in range(num_batches):
        start_idx = i * batch_size
        end_idx = (i + 1) * batch_size
        batch_data = data.iloc[start_idx:end_idx]
        batch_embeddings = get_bert_embeddings(batch_data)
        embeddings_list.append(batch_embeddings)

    embeddings = torch.cat(embeddings_list, dim=0).cpu().numpy()
    return embeddings

In [2]:
import pandas as pd

df = pd.read_csv('../original_files/politifact_data_2022_score.csv')

df['documented_time'] = pd.to_datetime(df['documented_time'])

df = df[~df['label'].isin({'full-flop', 'half-flip', 'no-flip'})]

#only rows after 2022 (recent)
df = df[df['documented_time'].dt.year >= 2022]

label_map = {'pants-fire': 5, 'false': 4, 'barely-true': 3, 
             'half-true': 2, 'mostly-true': 1, 'true': 0}
df['label'] = df['label'].astype(int)
df['label'] = df['label'].replace(label_map)

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from gnews import GNews
import numpy as np
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Weaviate
import nltk
import torch
from transformers import BertTokenizer, BertModel, pipeline
nltk.download('punkt')
def rag(content):
    # Advance RAG
    print("Retrieving keywords...")
    tfidf_vectorizer = TfidfVectorizer(stop_words=ENGLISH_STOP_WORDS)
    tfidf_matrix = tfidf_vectorizer.fit_transform([content])
    feature_names = tfidf_vectorizer.get_feature_names()
    scores = tfidf_matrix.toarray().flatten()
    indices = scores.argsort()[::-1]
    top_n = 10
    top_features = [(feature_names[i], scores[i]) for i in indices[:top_n]]
    keywords = " ".join([feature for feature, score in top_features])
    for feature, score in top_features:
        print(f"{feature}: {score}")
    print("RAG: Getting new evidence...")
    google_news = GNews()
    max_results = 4
    # google_news.period = '7d'
    google_news.max_results = max_results 
    # google_news.country = 'United States'
    google_news.language = 'english'
    # google_news.exclude_websites = ['yahoo.com', 'cnn.com'] 
    google_news.start_date = (2022, 1, 1)
    google_news.end_date = (2023, 12, 31)
    articles = []
    news = google_news.get_news(keywords)
    for i in range(max_results):
        try:
            article = google_news.get_full_article(
                news[i]['url']
            )
        except:
            break
        articles.append(article)
    title_text = [article.title for article in articles if article]
    article_text = [article.text for article in articles if article]

    # Chunk the google news
    class Document:
        def __init__(self, text):
            self.page_content = text
            self.metadata = {'source': 'google news'}

    print("Chunking the articles")
    documents = [Document(article) for article in article_text]
    text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0)
    chunked_articles = text_splitter.split_documents(documents)
    chunked_articles = [document.page_content for document in chunked_articles]
    
    # #Advance RAG
    print("Posting new evidence to vector database...")
    for article in chunked_articles:
        # Check for duplicate before posting
        try:
            properties = {"context": article}
            vector = text_embedding(pd.Series(article)).tolist()[0]
            client.data_object.create(properties, "test_dataset_1", vector=vector)
        except:
            continue

2024-03-06 06:41:45.688919: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
03/06/2024 06:41:47 AM - Note: NumExpr detected 40 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
03/06/2024 06:41:47 AM - NumExpr defaulting to 8 threads.
[nltk_data] Downloading package punkt to /home/zhj003/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
import weaviate
client = weaviate.Client(
    url = "https://testing-cluster-2qgcoz4q.weaviate.network",  # Replace with your endpoint
    auth_client_secret=weaviate.auth.AuthApiKey(api_key="qRarwGLC0CwrpQsSpK64E1V0c3HajFoAy893"),  # Replace w/ your Weaviate instance API key
)

            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            


In [None]:
from IPython.display import clear_output
for index, row in df.iterrows():
    client = weaviate.Client(
    url = "https://testing-cluster-2qgcoz4q.weaviate.network",  # Replace with your endpoint
        auth_client_secret=weaviate.auth.AuthApiKey(api_key="qRarwGLC0CwrpQsSpK64E1V0c3HajFoAy893"),  # Replace w/ your Weaviate instance API key
    )
    if index < 50:
        continue
    if index % 50 == 0:
        clear_output(wait=True)
        print(f"Running at iteration {index}")
    rag(row['article'])

Running at iteration 700
Retrieving keywords...
pentagon: 0.3491282676376715
facebook: 0.2493773340269082
attack: 0.2493773340269082
explosion: 0.2493773340269082
news: 0.19950186722152657
post: 0.19950186722152657
22: 0.19950186722152657
near: 0.19950186722152657
said: 0.14962640041614492
smoke: 0.14962640041614492
RAG: Getting new evidence...


03/07/2024 05:34:57 AM - Created a chunk of size 326, which is longer than the specified 300
03/07/2024 05:34:57 AM - Created a chunk of size 361, which is longer than the specified 300
03/07/2024 05:34:57 AM - Created a chunk of size 366, which is longer than the specified 300
03/07/2024 05:34:57 AM - Created a chunk of size 332, which is longer than the specified 300
03/07/2024 05:34:57 AM - Created a chunk of size 376, which is longer than the specified 300
03/07/2024 05:34:57 AM - Created a chunk of size 320, which is longer than the specified 300
03/07/2024 05:34:57 AM - Created a chunk of size 364, which is longer than the specified 300
03/07/2024 05:34:57 AM - Created a chunk of size 305, which is longer than the specified 300
03/07/2024 05:34:57 AM - Created a chunk of size 340, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
pandemic: 0.3731033238480316
said: 0.3497843661075296
flu: 0.2798274928860237
children: 0.2798274928860237
bird: 0.1865516619240158
avian: 0.16323270418351382
health: 0.13991374644301185
positive: 0.13991374644301185
tested: 0.13991374644301185
morris: 0.13991374644301185
RAG: Getting new evidence...
Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
bowman: 0.44992127066584753
killed: 0.22496063533292376
dr: 0.179968508266339
burdock: 0.179968508266339
sebi: 0.179968508266339
facebook: 0.13497638119975425
false: 0.13497638119975425
video: 0.13497638119975425
died: 0.13497638119975425
prison: 0.13497638119975425
RAG: Getting new evidence...


03/07/2024 05:36:29 AM - Created a chunk of size 456, which is longer than the specified 300
03/07/2024 05:36:29 AM - Created a chunk of size 437, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
election: 0.3916103699798974
lake: 0.3916103699798974
signatures: 0.24920659907811654
ballots: 0.23140612771539393
maricopa: 0.1958051849899487
mail: 0.1780047136272261
county: 0.1780047136272261
arizona: 0.1780047136272261
signature: 0.1602042422645035
verification: 0.14240377090178089
RAG: Getting new evidence...


03/07/2024 05:36:59 AM - Created a chunk of size 444, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 336, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 344, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 445, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 331, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 678, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 447, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 394, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 412, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 302, which is longer than the specified 300
03/07/2024 05:36:59 AM - Created a chunk of size 488, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
en: 0.4538485592273606
el: 0.41062488691999294
la: 0.3890130507663091
que: 0.30256570615157374
del: 0.2161183615368384
video: 0.1728946892294707
biden: 0.1728946892294707
peligro: 0.15128285307578687
discurso: 0.12967101692210303
publicación: 0.1080591807684192
RAG: Getting new evidence...


03/07/2024 05:39:42 AM - Created a chunk of size 367, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
men: 0.49761714291127057
labor: 0.29857028574676236
participation: 0.2587609143138607
force: 0.2587609143138607
age: 0.2587609143138607
working: 0.23885622859740988
scott: 0.1791421714480574
people: 0.1592374857316066
decline: 0.1592374857316066
rate: 0.13933280001515577
RAG: Getting new evidence...


03/07/2024 05:39:54 AM - Created a chunk of size 534, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 460, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 676, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 1368, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 341, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 370, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 477, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 387, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 1080, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 1003, which is longer than the specified 300
03/07/2024 05:39:54 AM - Created a chunk of size 841, which is long

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
water: 0.49948427010514984
h3o2: 0.3178536264305499
video: 0.18163064367459994
fruits: 0.18163064367459994
said: 0.18163064367459994
facebook: 0.13622298275594996
body: 0.13622298275594996
drinking: 0.13622298275594996
human: 0.09081532183729997
plain: 0.09081532183729997
RAG: Getting new evidence...
Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
hiv: 0.37912988830561156
covid: 0.28434741622920867
aids: 0.260651798210108
vaccines: 0.260651798210108
said: 0.23695618019100723
19: 0.23695618019100723
cdc: 0.18956494415280578
post: 0.14217370811460434
increase: 0.14217370811460434
cancers: 0.14217370811460434
RAG: Getting new evidence...


03/07/2024 05:42:07 AM - Created a chunk of size 632, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 436, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 680, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 741, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 780, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 459, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 643, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 381, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 340, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 835, which is longer than the specified 300
03/07/2024 05:42:07 AM - Created a chunk of size 889, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
students: 0.36628971787912823
school: 0.3174510888285778
000: 0.2686124597780274
year: 0.24419314525275215
miami: 0.24419314525275215
new: 0.21977383072747694
suarez: 0.19535451620220173
dade: 0.19535451620220173
public: 0.19535451620220173
number: 0.17093520167692652
RAG: Getting new evidence...
An error occurred while fetching the article: Article `download()` failed with HTTPSConnectionPool(host='www.miamiherald.com', port=443): Read timed out. (read timeout=7) on URL https://news.google.com/rss/articles/CBMiUWh0dHBzOi8vd3d3Lm1pYW1paGVyYWxkLmNvbS9uZXdzL2xvY2FsL2NvbW11bml0eS9taWFtaS1kYWRlL2FydGljbGUyNzY4NjA5MTMuaHRtbNIBUWh0dHBzOi8vYW1wLm1pYW1paGVyYWxkLmNvbS9uZXdzL2xvY2FsL2NvbW11bml0eS9taWFtaS1kYWRlL2FydGljbGUyNzY4NjA5MTMuaHRtbA?oc=5&hl=en-US&gl=US&ceid=US:en


03/07/2024 05:45:32 AM - Created a chunk of size 320, which is longer than the specified 300
03/07/2024 05:45:32 AM - Created a chunk of size 308, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
patients: 0.4607878789932069
pneumonia: 0.373018759184977
covid: 0.3071919193288046
19: 0.26330735942468964
associated: 0.19748051956851723
study: 0.19748051956851723
ventilator: 0.19748051956851723
ventilators: 0.17553823961645976
killed: 0.13165367971234482
nearly: 0.10971139976028735
RAG: Getting new evidence...
An error occurred while fetching the article: Article `download()` failed with 403 Client Error: Max restarts limit reached for url: https://www.forbes.com/sites/brucelee/2023/06/11/claims-that-ventilators-killed-nearly-all-with-covid-19-are-unfounded/ on URL https://news.google.com/rss/articles/CBMid2h0dHBzOi8vd3d3LmZvcmJlcy5jb20vc2l0ZXMvYnJ1Y2VsZWUvMjAyMy8wNi8xMS9jbGFpbXMtdGhhdC12ZW50aWxhdG9ycy1raWxsZWQtbmVhcmx5LWFsbC13aXRoLWNvdmlkLTE5LWFyZS11bmZvdW5kZWQv0gF7aHR0cHM6Ly93d3cuZm9yYmVzLmNvbS9zaXRlcy9icnVjZWxlZS8yMDIzLzA2LzExL2NsYWltcy10aGF0LXZlbnRpbGF0b3JzLWtpbGxlZC1uZWFybHktYWxsLXdpdGgtY2

03/07/2024 05:47:27 AM - Created a chunk of size 361, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 373, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 387, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 486, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 389, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 340, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 397, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 528, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 433, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 373, which is longer than the specified 300
03/07/2024 05:47:27 AM - Created a chunk of size 457, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
myocarditis: 0.39497143888904995
covid: 0.29622857916678746
19: 0.27154286423622187
cases: 0.27154286423622187
said: 0.2221714343750906
health: 0.2221714343750906
enterovirus: 0.19748571944452498
wales: 0.14811428958339373
infection: 0.14811428958339373
vaccines: 0.14811428958339373
RAG: Getting new evidence...


03/07/2024 05:49:07 AM - Created a chunk of size 428, which is longer than the specified 300
03/07/2024 05:49:07 AM - Created a chunk of size 370, which is longer than the specified 300
03/07/2024 05:49:07 AM - Created a chunk of size 312, which is longer than the specified 300
03/07/2024 05:49:07 AM - Created a chunk of size 331, which is longer than the specified 300
03/07/2024 05:49:07 AM - Created a chunk of size 337, which is longer than the specified 300
03/07/2024 05:49:07 AM - Created a chunk of size 349, which is longer than the specified 300
03/07/2024 05:49:07 AM - Created a chunk of size 460, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
target: 0.46647781017834217
sizes: 0.24878816542844917
tuck: 0.24878816542844917
friendly: 0.21768964474989302
pride: 0.21768964474989302
walsh: 0.18659112407133688
children: 0.18659112407133688
swimsuit: 0.15549260339278073
kids: 0.15549260339278073
swimwear: 0.15549260339278073
RAG: Getting new evidence...


03/07/2024 05:50:44 AM - Created a chunk of size 324, which is longer than the specified 300
03/07/2024 05:50:44 AM - Created a chunk of size 318, which is longer than the specified 300
03/07/2024 05:50:44 AM - Created a chunk of size 324, which is longer than the specified 300
03/07/2024 05:50:44 AM - Created a chunk of size 323, which is longer than the specified 300
03/07/2024 05:50:44 AM - Created a chunk of size 315, which is longer than the specified 300
03/07/2024 05:50:44 AM - Created a chunk of size 386, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
obama: 0.4330127018922193
facebook: 0.24056261216234406
wife: 0.24056261216234406
michael: 0.24056261216234406
mullen: 0.19245008972987526
claim: 0.19245008972987526
said: 0.19245008972987526
family: 0.14433756729740643
jack: 0.14433756729740643
speech: 0.14433756729740643
RAG: Getting new evidence...
An error occurred while fetching the article: Article `download()` failed with 401 Client Error: HTTP Forbidden for url: https://www.reuters.com/article/idUSL1N37K2FK/ on URL https://news.google.com/rss/articles/CBMiLmh0dHBzOi8vd3d3LnJldXRlcnMuY29tL2FydGljbGUvaWRVU0wxTjM3SzJGSy_SAQA?oc=5&hl=en-US&gl=US&ceid=US:en
Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
sexual: 0.30638410206544403
orientation: 0.2785310018776764
said: 0.2506779016899088
based: 0.2506779016899088
state: 0.22282480150214112
behavior: 0.19497170131437347
law: 0.16711860112660584
sex: 0.167118

03/07/2024 05:52:37 AM - Created a chunk of size 873, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 957, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 319, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 2877, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 6227, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 4983, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 1861, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 980, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 488, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 1220, which is longer than the specified 300
03/07/2024 05:52:37 AM - Created a chunk of size 340, which is lo

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
el: 0.47233683211739314
en: 0.3936140267644943
que: 0.3673730916468613
la: 0.3280116889704119
del: 0.23616841605869657
los: 0.2230479484998801
benceno: 0.19680701338224715
las: 0.1705660782646142
al: 0.13120467558816476
aire: 0.11808420802934828
RAG: Getting new evidence...
Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
debt: 0.5560384374855327
trump: 0.34752402342845795
president: 0.34752402342845795
obama: 0.16217787759994703
american: 0.16217787759994703
federal: 0.16217787759994703
trillion: 0.13900960937138318
year: 0.13900960937138318
jeffries: 0.11584134114281931
said: 0.11584134114281931
RAG: Getting new evidence...


03/07/2024 05:55:59 AM - Created a chunk of size 308, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 398, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 315, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 328, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 341, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 308, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 400, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 356, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 303, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 539, which is longer than the specified 300
03/07/2024 05:55:59 AM - Created a chunk of size 403, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
el: 0.4072954339937378
en: 0.36029980699446035
que: 0.2663085529959055
la: 0.2663085529959055
estados: 0.25064334399614635
unidos: 0.25064334399614635
personas: 0.21931292599662805
para: 0.18798250799710975
los: 0.14098688099783233
estos: 0.14098688099783233
RAG: Getting new evidence...


03/07/2024 05:57:57 AM - Created a chunk of size 339, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 584, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 694, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 669, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 511, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 413, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 314, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 633, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 479, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 575, which is longer than the specified 300
03/07/2024 05:57:57 AM - Created a chunk of size 1161, which is longer

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
housing: 0.358974358974359
black: 0.3076923076923077
homeownership: 0.28205128205128205
said: 0.2564102564102564
homeowners: 0.15384615384615385
act: 0.15384615384615385
rate: 0.15384615384615385
scott: 0.15384615384615385
homes: 0.1282051282051282
home: 0.10256410256410256
RAG: Getting new evidence...


03/07/2024 05:58:32 AM - Created a chunk of size 325, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 304, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 384, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 373, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 435, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 409, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 355, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 406, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 502, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 456, which is longer than the specified 300
03/07/2024 05:58:32 AM - Created a chunk of size 303, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
hiv: 0.45706896310845374
service: 0.28867513459481287
covid: 0.2646188733785785
infections: 0.2646188733785785
members: 0.24056261216234406
diagnosed: 0.21650635094610965
2021: 0.21650635094610965
19: 0.21650635094610965
increase: 0.19245008972987526
vaccines: 0.14433756729740643
RAG: Getting new evidence...


03/07/2024 06:00:12 AM - Created a chunk of size 632, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 436, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 680, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 741, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 780, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 459, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 643, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 456, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 355, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 400, which is longer than the specified 300
03/07/2024 06:00:12 AM - Created a chunk of size 331, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
protein: 0.3211726365433135
mrna: 0.2676438637860946
l17: 0.2141150910288757
videos: 0.2141150910288757
video: 0.2141150910288757
vaccine: 0.2141150910288757
tiktok: 0.16058631827165676
covid: 0.16058631827165676
ribosomal: 0.16058631827165676
street: 0.16058631827165676
RAG: Getting new evidence...
Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
cnn: 0.4588314677411235
town: 0.2867696673382022
audience: 0.1720618004029213
hall: 0.1720618004029213
social: 0.1720618004029213
live: 0.1720618004029213
media: 0.1720618004029213
network: 0.1720618004029213
meta: 0.11470786693528087
said: 0.11470786693528087
RAG: Getting new evidence...


03/07/2024 06:02:19 AM - Created a chunk of size 477, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 330, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 639, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 739, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 723, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 843, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 1143, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 779, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 377, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 485, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 432, which is longer

03/07/2024 06:02:19 AM - Created a chunk of size 555, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 836, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 523, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 767, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 715, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 609, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 431, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 763, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 394, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 498, which is longer than the specified 300
03/07/2024 06:02:19 AM - Created a chunk of size 584, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
trudeau: 0.4933011327825659
facebook: 0.24665056639128294
news: 0.1849879247934622
kim: 0.1849879247934622
like: 0.1849879247934622
standing: 0.1849879247934622
feet: 0.12332528319564147
minister: 0.12332528319564147
read: 0.12332528319564147
false: 0.12332528319564147
RAG: Getting new evidence...
Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
chinese: 0.4177502070101985
migrants: 0.3779644730092272
said: 0.3381787390082559
border: 0.278500138006799
china: 0.17903580300437077
video: 0.15914293600388513
tiktok: 0.1392500690033995
southern: 0.1392500690033995
military: 0.11935720200291386
men: 0.11935720200291386
RAG: Getting new evidence...
An error occurred while fetching the article: Article `download()` failed with 401 Client Error: HTTP Forbidden for url: https://www.reuters.com/investigates/special-report/usa-immigration-china/ on URL https://news.google.

03/07/2024 06:10:30 AM - Created a chunk of size 336, which is longer than the specified 300
03/07/2024 06:10:30 AM - Created a chunk of size 373, which is longer than the specified 300
03/07/2024 06:10:30 AM - Created a chunk of size 314, which is longer than the specified 300
03/07/2024 06:10:30 AM - Created a chunk of size 311, which is longer than the specified 300
03/07/2024 06:10:30 AM - Created a chunk of size 334, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
video: 0.4188539082916955
pelosi: 0.3769685174625259
house: 0.3350831266333564
speaker: 0.29319773580418684
mccarthy: 0.20942695414584775
shows: 0.1675415633166782
facebook: 0.1675415633166782
kid: 0.12565617248750865
going: 0.12565617248750865
like: 0.12565617248750865
RAG: Getting new evidence...


03/07/2024 06:11:48 AM - Created a chunk of size 556, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 349, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 376, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 329, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 394, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 490, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 310, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 457, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 665, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 431, which is longer than the specified 300
03/07/2024 06:11:48 AM - Created a chunk of size 410, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
florida: 0.38313051408846055
politifact: 0.25542034272564035
video: 0.25542034272564035
store: 0.22349279988493534
shelves: 0.19156525704423027
posts: 0.19156525704423027
truck: 0.19156525704423027
palmetto: 0.19156525704423027
boycott: 0.15963771420352524
said: 0.15963771420352524
RAG: Getting new evidence...


03/07/2024 06:13:30 AM - Created a chunk of size 301, which is longer than the specified 300
03/07/2024 06:13:30 AM - Created a chunk of size 368, which is longer than the specified 300
03/07/2024 06:13:30 AM - Created a chunk of size 369, which is longer than the specified 300
03/07/2024 06:13:30 AM - Created a chunk of size 456, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
prices: 0.4528272225013865
drugs: 0.3441486891010538
times: 0.25358324460077647
sanders: 0.25358324460077647
insulin: 0.19924397790061008
drug: 0.19924397790061008
countries: 0.19924397790061008
price: 0.1811308890005546
said: 0.16301780010049916
10: 0.14490471120044368
RAG: Getting new evidence...


03/07/2024 06:14:23 AM - Created a chunk of size 315, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 329, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 325, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 651, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 407, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 383, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 328, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 452, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 508, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 557, which is longer than the specified 300
03/07/2024 06:14:23 AM - Created a chunk of size 430, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
que: 0.39764455282427935
la: 0.3408381881350966
en: 0.3408381881350966
salud: 0.2082900038603368
el: 0.2082900038603368
del: 0.18935454896394255
para: 0.1704190940675483
los: 0.15148363917115404
seguro: 0.15148363917115404
cobertura: 0.15148363917115404
RAG: Getting new evidence...


03/07/2024 06:17:19 AM - Created a chunk of size 331, which is longer than the specified 300
03/07/2024 06:17:19 AM - Created a chunk of size 416, which is longer than the specified 300
03/07/2024 06:17:19 AM - Created a chunk of size 341, which is longer than the specified 300
03/07/2024 06:17:19 AM - Created a chunk of size 377, which is longer than the specified 300
03/07/2024 06:17:19 AM - Created a chunk of size 357, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
cuts: 0.28307388759492286
budget: 0.24263476079564816
house: 0.24263476079564816
biden: 0.22241519739601082
said: 0.18197607059673612
debt: 0.18197607059673612
agents: 0.18197607059673612
000: 0.18197607059673612
republicans: 0.18197607059673612
spending: 0.16175650719709878
RAG: Getting new evidence...


03/07/2024 06:17:52 AM - Created a chunk of size 350, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 342, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 336, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 332, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 341, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 305, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 325, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 328, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 338, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 340, which is longer than the specified 300
03/07/2024 06:17:52 AM - Created a chunk of size 356, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
biden: 0.6403450952448875
video: 0.2286946768731741
hansen: 0.2058252091858567
clips: 0.16008627381122187
man: 0.16008627381122187
predator: 0.13721680612390447
instagram: 0.13721680612390447
catch: 0.11434733843658705
cuomo: 0.11434733843658705
news: 0.11434733843658705
RAG: Getting new evidence...
Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
state: 0.39764968724065797
year: 0.27207610179623964
eligible: 0.23021823998143356
pre: 0.23021823998143356
public: 0.2092893090740305
districts: 0.2092893090740305
said: 0.18836037816662746
budget: 0.1674314472592244
olds: 0.1674314472592244
96: 0.1674314472592244
RAG: Getting new evidence...


03/07/2024 06:20:11 AM - Created a chunk of size 508, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 412, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 385, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 556, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 527, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 427, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 303, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 301, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 350, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 435, which is longer than the specified 300
03/07/2024 06:20:11 AM - Created a chunk of size 466, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
covid: 0.3783870158723217
19: 0.3310886388882815
study: 0.3310886388882815
brain: 0.23649188492020107
vaccines: 0.23649188492020107
vaccine: 0.18919350793616085
tweet: 0.18919350793616085
video: 0.14189513095212064
news: 0.14189513095212064
article: 0.14189513095212064
RAG: Getting new evidence...


03/07/2024 06:24:35 AM - Created a chunk of size 1290, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 1913, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 659, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 698, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 717, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 1730, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 2232, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 1063, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 1207, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 1120, which is longer than the specified 300
03/07/2024 06:24:35 AM - Created a chunk of size 1455, which is

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
women: 0.392540507864431
tdap: 0.392540507864431
pregnant: 0.3645019001598288
said: 0.22430886163681774
pregnancy: 0.22430886163681774
vaccine: 0.22430886163681774
pertussis: 0.1962702539322155
tetanus: 0.14019303852301107
diphtheria: 0.14019303852301107
post: 0.11215443081840887
RAG: Getting new evidence...


03/07/2024 06:27:02 AM - Created a chunk of size 303, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 457, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 473, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 319, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 342, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 348, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 410, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 487, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 371, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 460, which is longer than the specified 300
03/07/2024 06:27:02 AM - Created a chunk of size 512, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
border: 0.35445877847928337
bridge: 0.20254787341673333
video: 0.20254787341673333
paso: 0.20254787341673333
people: 0.20254787341673333
el: 0.20254787341673333
2023: 0.15191090506255
march: 0.15191090506255
migrants: 0.15191090506255
officials: 0.15191090506255
RAG: Getting new evidence...


03/07/2024 06:29:32 AM - Created a chunk of size 362, which is longer than the specified 300
03/07/2024 06:29:32 AM - Created a chunk of size 303, which is longer than the specified 300
03/07/2024 06:29:32 AM - Created a chunk of size 305, which is longer than the specified 300
03/07/2024 06:29:32 AM - Created a chunk of size 405, which is longer than the specified 300
03/07/2024 06:29:32 AM - Created a chunk of size 347, which is longer than the specified 300
03/07/2024 06:29:32 AM - Created a chunk of size 309, which is longer than the specified 300
03/07/2024 06:29:32 AM - Created a chunk of size 336, which is longer than the specified 300
03/07/2024 06:29:32 AM - Created a chunk of size 402, which is longer than the specified 300
03/07/2024 06:29:32 AM - Created a chunk of size 402, which is longer than the specified 300
03/07/2024 06:29:32 AM - Created a chunk of size 382, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
fednow: 0.3426990932579271
service: 0.2899761558336306
reserve: 0.2636146871214824
federal: 0.2636146871214824
digital: 0.23725321840933414
currency: 0.23725321840933414
fees: 0.2108917496971859
central: 0.2108917496971859
new: 0.18453028098503765
payment: 0.18453028098503765
RAG: Getting new evidence...


03/07/2024 06:31:07 AM - Created a chunk of size 777, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 474, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 560, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 653, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 795, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 578, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 402, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 331, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 403, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 793, which is longer than the specified 300
03/07/2024 06:31:07 AM - Created a chunk of size 462, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
climate: 0.32852627812475244
years: 0.23466162723196604
change: 0.23466162723196604
000: 0.23466162723196604
post: 0.18772930178557284
warming: 0.18772930178557284
claim: 0.14079697633917962
gases: 0.14079697633917962
degree: 0.14079697633917962
catt: 0.14079697633917962
RAG: Getting new evidence...


03/07/2024 06:32:26 AM - newspaper ignoring lxml node error: Input object is not an XML element: HtmlProcessingInstruction
03/07/2024 06:32:29 AM - Created a chunk of size 336, which is longer than the specified 300
03/07/2024 06:32:29 AM - Created a chunk of size 309, which is longer than the specified 300
03/07/2024 06:32:29 AM - Created a chunk of size 308, which is longer than the specified 300
03/07/2024 06:32:29 AM - Created a chunk of size 329, which is longer than the specified 300
03/07/2024 06:32:29 AM - Created a chunk of size 446, which is longer than the specified 300
03/07/2024 06:32:29 AM - Created a chunk of size 575, which is longer than the specified 300
03/07/2024 06:32:29 AM - Created a chunk of size 360, which is longer than the specified 300
03/07/2024 06:32:29 AM - Created a chunk of size 329, which is longer than the specified 300
03/07/2024 06:32:29 AM - Created a chunk of size 524, which is longer than the specified 300
03/07/2024 06:32:29 AM - Created a chunk

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
gender: 0.37283929381053577
parents: 0.35090757064521017
law: 0.3070441243145589
children: 0.24124895481858197
care: 0.21931723165325634
state: 0.21931723165325634
washington: 0.17545378532260508
child: 0.17545378532260508
affirming: 0.17545378532260508
health: 0.15352206215727945
RAG: Getting new evidence...


03/07/2024 06:36:22 AM - Created a chunk of size 375, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 302, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 606, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 457, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 477, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 347, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 570, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 491, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 391, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 340, which is longer than the specified 300
03/07/2024 06:36:22 AM - Created a chunk of size 461, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
foxx: 0.5099019513592785
jamie: 0.27456258919345766
claim: 0.23533936216582083
family: 0.23533936216582083
corinne: 0.19611613513818404
posted: 0.15689290811054724
facebook: 0.15689290811054724
support: 0.15689290811054724
media: 0.15689290811054724
instagram: 0.11766968108291041
RAG: Getting new evidence...


03/07/2024 06:39:38 AM - Created a chunk of size 371, which is longer than the specified 300
03/07/2024 06:39:38 AM - Created a chunk of size 469, which is longer than the specified 300
03/07/2024 06:39:38 AM - Created a chunk of size 414, which is longer than the specified 300
03/07/2024 06:39:38 AM - Created a chunk of size 311, which is longer than the specified 300
03/07/2024 06:39:38 AM - Created a chunk of size 353, which is longer than the specified 300
03/07/2024 06:39:38 AM - Created a chunk of size 302, which is longer than the specified 300
03/07/2024 06:39:38 AM - Created a chunk of size 357, which is longer than the specified 300
03/07/2024 06:39:38 AM - Created a chunk of size 451, which is longer than the specified 300
03/07/2024 06:39:38 AM - Created a chunk of size 411, which is longer than the specified 300
03/07/2024 06:39:38 AM - Created a chunk of size 586, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
food: 0.39214303531439504
human: 0.27148363983304274
calcium: 0.27148363983304274
bones: 0.21115394209236657
remains: 0.21115394209236657
video: 0.21115394209236657
teeth: 0.1809890932220285
phosphate: 0.1809890932220285
hydrolysis: 0.1508242443516904
alkaline: 0.1508242443516904
RAG: Getting new evidence...


03/07/2024 06:41:15 AM - Created a chunk of size 761, which is longer than the specified 300
03/07/2024 06:41:15 AM - Created a chunk of size 682, which is longer than the specified 300
03/07/2024 06:41:15 AM - Created a chunk of size 399, which is longer than the specified 300
03/07/2024 06:41:15 AM - Created a chunk of size 321, which is longer than the specified 300
03/07/2024 06:41:15 AM - Created a chunk of size 490, which is longer than the specified 300
03/07/2024 06:41:15 AM - Created a chunk of size 328, which is longer than the specified 300
03/07/2024 06:41:15 AM - Created a chunk of size 338, which is longer than the specified 300
03/07/2024 06:41:15 AM - Created a chunk of size 484, which is longer than the specified 300
03/07/2024 06:41:15 AM - Created a chunk of size 369, which is longer than the specified 300


Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
tax: 0.6342399861119264
desantis: 0.30689031586060955
taxes: 0.2659716070791949
fair: 0.24551225268848764
trump: 0.20459354390707304
said: 0.16367483512565842
sales: 0.16367483512565842
ad: 0.12275612634424382
congress: 0.10229677195353652
act: 0.10229677195353652
RAG: Getting new evidence...
An error occurred while fetching the article: Article `download()` failed with 403 Client Error: Max restarts limit reached for url: https://www.forbes.com/sites/saradorn/2023/05/19/trump-ad-claims-desantis-wants-to-raise-taxes-ron-desalestax/ on URL https://news.google.com/rss/articles/CBMibmh0dHBzOi8vd3d3LmZvcmJlcy5jb20vc2l0ZXMvc2FyYWRvcm4vMjAyMy8wNS8xOS90cnVtcC1hZC1jbGFpbXMtZGVzYW50aXMtd2FudHMtdG8tcmFpc2UtdGF4ZXMtcm9uLWRlc2FsZXN0YXgv0gFyaHR0cHM6Ly93d3cuZm9yYmVzLmNvbS9zaXRlcy9zYXJhZG9ybi8yMDIzLzA1LzE5L3RydW1wLWFkLWNsYWltcy1kZXNhbnRpcy13YW50cy10by1yYWlzZS10YXhlcy1yb24tZGVzYWxlc3RheC9hbXAv?oc=5&hl=en-US&gl=US&c

03/07/2024 06:42:01 AM - Created a chunk of size 330, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 337, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 351, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 349, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 361, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 316, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 313, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 473, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 443, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 372, which is longer than the specified 300
03/07/2024 06:42:01 AM - Created a chunk of size 305, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
que: 0.4528120578009407
la: 0.3937496154790789
el: 0.3149996923832631
en: 0.29531221160930915
los: 0.19687480773953944
frontera: 0.15749984619163154
42: 0.15749984619163154
título: 0.15749984619163154
del: 0.1378123654176776
unidos: 0.1378123654176776
RAG: Getting new evidence...
Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
image: 0.36178730264621084
photo: 0.36178730264621084
ai: 0.28942984211696865
trump: 0.2170723815877265
images: 0.14471492105848432
showed: 0.14471492105848432
hands: 0.14471492105848432
generators: 0.14471492105848432
generated: 0.14471492105848432
october: 0.14471492105848432
RAG: Getting new evidence...


03/07/2024 06:44:22 AM - Created a chunk of size 302, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 433, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 318, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 419, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 516, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 368, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 320, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 318, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 509, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 479, which is longer than the specified 300
03/07/2024 06:44:22 AM - Created a chunk of size 827, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
latino: 0.4130921946615823
businesses: 0.3951316644589048
owned: 0.2514474228374849
report: 0.2155263624321299
venture: 0.2155263624321299
said: 0.1975658322294524
business: 0.1975658322294524
capital: 0.16164477182409742
new: 0.16164477182409742
data: 0.16164477182409742
RAG: Getting new evidence...


03/07/2024 06:46:41 AM - Created a chunk of size 309, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 353, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 307, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 359, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 350, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 425, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 412, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 338, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 388, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 390, which is longer than the specified 300
03/07/2024 06:46:41 AM - Created a chunk of size 362, which is longer 

Chunking the articles
Posting new evidence to vector database...
Retrieving keywords...
biden: 0.464139560816457
child: 0.464139560816457
photo: 0.36099743619057767
shows: 0.154713186938819
inappropriately: 0.154713186938819
altered: 0.154713186938819
october: 0.154713186938819
post: 0.10314212462587934
facebook: 0.10314212462587934
flag: 0.10314212462587934
RAG: Getting new evidence...
