In [None]:
%pip install sentence-transformers

In [2]:
import sys; sys.path.append('..')

import pandas as pd

from common.polymarket import load_market_data
from common.news import MARKET_KEYWORDS, fetch_news_for_period, filter_by_relevance, save_news_to_csv

In [4]:
# Configuration
EVENT_ID = 13551
MARKET_NAME = "ukraine"
DATA_FILE = "trump-wins-ends-ukraine-war-in-90-days-2c482.json"

# Load market data
data = load_market_data(filename=DATA_FILE)
event_info = data['event']
start_date = event_info['start_date']
end_date = event_info['end_date']

# Get keywords for this market
keywords = MARKET_KEYWORDS.get((EVENT_ID, MARKET_NAME), [])

print(f"Event: {event_info['title']}")
print(f"Period: {start_date.date()} to {end_date.date()}")
print(f"Duration: {(end_date - start_date).days} days")
print(f"Keywords: {keywords}")

Loading times: 100%|██████████| 258658/258658 [00:34<00:00, 7408.16point/s]

Event: Trump ends Ukraine war in first 90 days?
Period: 2024-10-22 to 2025-04-20
Duration: 179 days
Keywords: ['Trump', 'Ukraine', 'war', 'ceasefire', 'peace', 'negotiations', 'Zelensky']





In [3]:
all_articles = fetch_news_for_period(keywords, start_date, end_date)
print(f"\nTotal articles fetched: {len(all_articles)}")

Fetching news: 100%|██████████| 286/286 [07:33<00:00,  1.59s/req, articles=29716]


Total articles fetched: 29716





In [5]:
# Filter articles by relevance using cross-encoder
query_text = f"{event_info['title']} " + " ".join(keywords)
print(f"Query for relevance: {query_text}")
relevant_articles = filter_by_relevance(all_articles, query_text, threshold=-4.5)

Query for relevance: Trump ends Ukraine war in first 90 days? Trump Ukraine war ceasefire peace negotiations Zelensky
Scoring 29716 articles for relevance...


Batches: 100%|██████████| 929/929 [00:18<00:00, 51.46it/s]


Filtered to 1463 relevant articles (threshold=-4.5)


In [None]:
# Save relevant articles to CSV
csv_filename = f"news_{MARKET_NAME}_{EVENT_ID}.csv"
save_news_to_csv(relevant_articles, csv_filename)

### Other markets

In [3]:
MARKETS_TO_PROCESS = [
    (903193, "kamala", "presidential-election-winner-2024-Kamala.json"),
    (903193, "trump", "presidential-election-winner-2024-Trump.json"),
    (21257, "israel", "israel-x-hamas-ceasefire-before-july-2025-79c05.json"),
    (12641, "tiktok", "tiktok-banned-in-the-us-before-may-2025-52cc0.json"),
]

for event_id, market_name, data_file in MARKETS_TO_PROCESS:
    print(f"\n{'='*60}")
    print(f"Processing: {market_name} (event {event_id})")
    print('='*60)
    
    data = load_market_data(filename=data_file)
    event_info = data['event']
    start_date = event_info['start_date']
    end_date = event_info['end_date']
    
    keywords = MARKET_KEYWORDS.get((event_id, market_name), [])
    print(f"Period: {start_date.date()} to {end_date.date()}")
    print(f"Keywords: {keywords}")
    
    articles = fetch_news_for_period(keywords, start_date, end_date)
    print(f"Total articles fetched: {len(articles)}")
    
    query_text = f"{event_info['title']} " + " ".join(keywords)
    relevant = filter_by_relevance(articles, query_text, threshold=-4.5)
    
    csv_filename = f"news_{market_name}_{event_id}.csv"
    save_news_to_csv(relevant, csv_filename)


Processing: kamala (event 903193)


Loading times: 100%|██████████| 439715/439715 [01:01<00:00, 7115.59point/s]


Period: 2024-01-04 to 2024-11-05
Keywords: ['election', 'Democrat', 'Harris', 'Kamala', 'vice president']


Fetching news: 100%|██████████| 484/484 [12:14<00:00,  1.52s/req, articles=57147]


Total articles fetched: 57147
Scoring 57147 articles for relevance...


Batches: 100%|██████████| 1786/1786 [00:29<00:00, 59.69it/s]


Filtered to 1726 relevant articles (threshold=-4.5)


Saving to CSV: 100%|██████████| 1726/1726 [00:00<00:00, 315950.28row/s]


Saved 1726 articles to /Users/imakarov/STUDY/ML-News-Market-Analysis/notebooks/../data/news_kamala_903193.csv

Processing: trump (event 903193)


Loading times: 100%|██████████| 439803/439803 [00:58<00:00, 7454.70point/s]


Period: 2024-01-04 to 2024-11-05
Keywords: ['election', 'Republican', 'Trump', 'Donald', 'MAGA']


Fetching news: 100%|██████████| 484/484 [11:26<00:00,  1.42s/req, articles=43529]


Total articles fetched: 43529
Scoring 43529 articles for relevance...


Batches: 100%|██████████| 1361/1361 [00:22<00:00, 61.78it/s]


Filtered to 1088 relevant articles (threshold=-4.5)


Saving to CSV: 100%|██████████| 1088/1088 [00:00<00:00, 277985.06row/s]


Saved 1088 articles to /Users/imakarov/STUDY/ML-News-Market-Analysis/notebooks/../data/news_trump_903193.csv

Processing: israel (event 21257)


Loading times: 100%|██████████| 147814/147814 [00:20<00:00, 7369.10point/s]


Period: 2025-03-19 to 2025-06-30
Keywords: ['Israel', 'Hamas', 'ceasefire', 'Gaza', 'war', 'truce', 'hostages']


Fetching news: 100%|██████████| 165/165 [03:55<00:00,  1.43s/req, articles=6791]


Total articles fetched: 6791
Scoring 6791 articles for relevance...


Batches: 100%|██████████| 213/213 [00:04<00:00, 53.16it/s]


Filtered to 201 relevant articles (threshold=-4.5)


Saving to CSV: 100%|██████████| 201/201 [00:00<00:00, 76711.11row/s]


Saved 201 articles to /Users/imakarov/STUDY/ML-News-Market-Analysis/notebooks/../data/news_israel_21257.csv

Processing: tiktok (event 12641)


Loading times: 100%|██████████| 181433/181433 [00:24<00:00, 7550.70point/s]


Period: 2024-09-18 to 2025-04-30
Keywords: ['TikTok', 'ByteDance', 'China', 'ban', 'US', 'regulation', 'app']


Fetching news: 363req [07:05,  1.17s/req, articles=13563]                        


Total articles fetched: 13563
Scoring 13563 articles for relevance...


Batches: 100%|██████████| 424/424 [00:08<00:00, 51.91it/s]


Filtered to 247 relevant articles (threshold=-4.5)


Saving to CSV: 100%|██████████| 247/247 [00:00<00:00, 211686.37row/s]

Saved 247 articles to /Users/imakarov/STUDY/ML-News-Market-Analysis/notebooks/../data/news_tiktok_12641.csv



