In [None]:
import torch
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
import torch  # For tensor computations and running BERT
import requests  # For fetching news articles via API
import pandas as pd  # For organizing and analyzing article data
from transformers import pipeline  # For summarization and sentiment analysis
from sentence_transformers import SentenceTransformer  # For BERT-based embeddings
from sklearn.cluster import KMeans  # For topic modeling using clustering


# Load models
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Summarization Function
def summarize_article(content):
    if len(content.split()) < 50:  # If the content is short, return as is
        return content
    summary = summarizer(content, max_length=60, min_length=25, do_sample=False)
    return summary[0]['summary_text']

# Sentiment Analysis Function
def analyze_sentiment(content):
    sentiment = sentiment_analyzer(content)
    label = sentiment[0]['label']  # e.g. '3 stars'
    return int(label.split()[0])   # convert '3 stars' → 3 (integer)

# Topic Modeling Function
def extract_topics(contents):
    embeddings = embedding_model.encode(contents)
    num_samples = len(contents)

    # Dynamically adjust the number of clusters
    num_clusters = min(5, num_samples)  # Choose the lesser of 5 or the number of articles

    if num_clusters <= 1:
        return {0: contents}  # Return all contents under one topic if we have only one article

    clustering_model = KMeans(n_clusters=num_clusters)
    clustering_model.fit(embeddings)
    cluster_labels = clustering_model.labels_

    topics = {i: [] for i in range(num_clusters)}
    for i, label in enumerate(cluster_labels):
        topics[label].append(contents[i])
    return topics




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Device set to use cpu


config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/669M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
import requests
from datetime import datetime, timedelta

def fetch_location_news(api_key, location, days_back=30):
    BASE_URL = "https://api.thenewsapi.com/v1/news/all"
    start_date = (datetime.utcnow() - timedelta(days=days_back)).strftime("%Y-%m-%d")
    all_articles = []
    page = 1

    while True:
        params = {
            "api_token": api_key,
            "language": "en",
            "search": location,
            "limit": 100,           # max per page
            "page": page,
            "published_after": start_date
        }

        response = requests.get(BASE_URL, params=params)
        print(f"Fetching page {page}... status: {response.status_code}")
        if response.status_code != 200:
            print(f"Error: {response.text}")
            break

        data = response.json().get("data", [])
        if not data:
            print("No more articles found.")
            break

        filtered_data = [
            article for article in data
            if article.get("relevance_score") is not None and article["relevance_score"] > 5
        ]

        all_articles.extend(filtered_data)
        page += 1

    print(f"Fetched {len(all_articles)} relevant articles about '{location}' since {start_date}")
    return all_articles


In [None]:
api_key = "Vjt328mOBEGo2AVyLJo3FIlpvvt3FLtkI16vIj4X"
arizona_articles = fetch_location_news(api_key, location="Arizona")

# Preview the results
for article in arizona_articles[:5]:
    print(f"[{article['published_at']}] {article['title']} ({article['source']})")
    print(article['url'])
    print("-" * 60)


Fetching page 1... status: 200
Fetching page 2... status: 200
Fetching page 3... status: 200
Fetching page 4... status: 200
Fetching page 5... status: 200
Fetching page 6... status: 200
Fetching page 7... status: 200
Fetching page 8... status: 200
Fetching page 9... status: 200
Fetching page 10... status: 200
Fetching page 11... status: 200
Fetching page 12... status: 200
Fetching page 13... status: 200
Fetching page 14... status: 200
Fetching page 15... status: 200
Fetching page 16... status: 200
Fetching page 17... status: 200
Fetching page 18... status: 200
Fetching page 19... status: 200
Fetching page 20... status: 200
Fetching page 21... status: 200
Fetching page 22... status: 200
Fetching page 23... status: 200
Fetching page 24... status: 200
Fetching page 25... status: 200
Fetching page 26... status: 200
Fetching page 27... status: 200
Fetching page 28... status: 200
Fetching page 29... status: 200
Fetching page 30... status: 200
Fetching page 31... status: 200
Fetching page 32.

In [None]:
arizona_articles

[{'uuid': 'fbbba121-2dd1-4584-b62e-351e9adc07ce',
  'title': 'Ranking Arizona: Top 10 Italian restaurants in Arizona for 2025',
  'description': 'Here are the Top 10\xa0Italian restaurants in Arizona, based on public voting for the 2025 edition of Ranking Arizona.',
  'keywords': '',
  'snippet': 'Featured\n\nHere are the Top 10 Italian restaurants in Arizona, based on public voting for the 2025 edition of Ranking Arizona, the state’s biggest and most com...',
  'url': 'https://azbigmedia.com/lifestyle/ranking-arizona-top-10-italian-restaurants-in-arizona-for-2025/',
  'image_url': 'https://azbigmedia.com/wp-content/uploads/2025/03/Italian-restaurants-in-Arizona.png',
  'language': 'en',
  'published_at': '2025-03-22T12:34:16.000000Z',
  'source': 'agriculture.einnews.com',
  'categories': ['food'],
  'relevance_score': 23.638367},
 {'uuid': '151c305c-5f03-47a0-a727-fa4f2f84ed03',
  'title': 'Arizona State faces No. 22 Arizona after Sanon’s 28-point showing',
  'description': 'Breaking

In [None]:
def analyze_news(news_articles):
    results = []

    for article in news_articles:
        title = article.get("title", "No title")
        description = article.get("description", "No description")
        content = article.get("snippet", "No content")
        if not content:
            continue

        # Summarization
        summary = summarize_article(content)

        # Sentiment Analysis
        sentiment = analyze_sentiment(content)

        # Prepare content for topic modeling
        results.append({
            "title": title,
            "summary": summary,
            "sentiment": sentiment
        })

    # Topic Modeling
    contents = [article['summary'] for article in results]
    topics = extract_topics(contents)

    return results, topics


analyzed_articles, topics = analyze_news(arizona_articles)

# Display results
for article in analyzed_articles:
    print(f"Title: {article['title']}")
    print(f"Summary: {article['summary']}")
    print(f"Sentiment: {article['sentiment']}")
    print('-' * 80)

# Display topics
print("\nDetected Topics:")
for topic_id, articles in topics.items():
    print(f"\nTopic {topic_id + 1}:")
    for art in articles[:3]:  # Show first 3 articles per topic
        print(f"- {art}")


Title: Ranking Arizona: Top 10 Italian restaurants in Arizona for 2025
Summary: Featured

Here are the Top 10 Italian restaurants in Arizona, based on public voting for the 2025 edition of Ranking Arizona, the state’s biggest and most com...
Sentiment: 5
--------------------------------------------------------------------------------
Title: Arizona State faces No. 22 Arizona after Sanon’s 28-point showing
Summary: Arizona State Sun Devils (13-16, 4-14 Big 12) at Arizona Wildcats (19-10, 13-6 Big 12)

Tucson, Arizona; Tuesday, 11 p.m. EST

BOTTOM LINE: Arizona State visits...
Sentiment: 3
--------------------------------------------------------------------------------
Title: Koa Peat Commits To Arizona
Summary: Koa Peat, one of the highest-ranked uncommitted high school prospects in the country, has committed to Arizona. Peat chose Arizona over Arizona State, Baylor, H...
Sentiment: 1
--------------------------------------------------------------------------------
Title: Henri Veesaar s

In [None]:
import sqlite3

def store_full_analyzed_articles(location_name, raw_articles, analyzed_articles, db_name="location_news_full.db"):
    conn = sqlite3.connect(db_name)  #  saves to your local directory
    c = conn.cursor()

    # Create table with extended fields
    c.execute('''
        CREATE TABLE IF NOT EXISTS news_analysis (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            location TEXT,
            title TEXT,
            description TEXT,
            snippet TEXT,
            url TEXT,
            image_url TEXT,
            language TEXT,
            published_at TEXT,
            source TEXT,
            categories TEXT,
            relevance_score REAL,
            summary TEXT,
            sentiment TEXT
        )
    ''')

    # Insert each article with enriched metadata
    for raw, analyzed in zip(raw_articles, analyzed_articles):
        c.execute('''
            INSERT INTO news_analysis (
                location, title, description, snippet, url, image_url, language,
                published_at, source, categories, relevance_score, summary, sentiment
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', (
            location_name,
            raw.get("title"),
            raw.get("description"),
            raw.get("snippet"),
            raw.get("url"),
            raw.get("image_url"),
            raw.get("language"),
            raw.get("published_at"),
            raw.get("source"),
            ", ".join(raw.get("categories", [])) if raw.get("categories") else None,
            raw.get("relevance_score"),
            analyzed.get("summary"),
            analyzed.get("sentiment")
        ))

    conn.commit()
    conn.close()
    return db_name


In [None]:
db_file_path = store_full_analyzed_articles("Arizona", arizona_articles, analyzed_articles)
print(f"Saved to: {db_file_path}")

import pandas as pd
df = pd.read_sql("SELECT * FROM news_analysis", sqlite3.connect("location_news_full.db"))
df

Saved to: location_news_full.db


Unnamed: 0,id,location,title,description,snippet,url,image_url,language,published_at,source,categories,relevance_score,summary,sentiment
0,1,Arizona,Ranking Arizona: Top 10 Italian restaurants in...,Here are the Top 10 Italian restaurants in Ari...,Featured\n\nHere are the Top 10 Italian restau...,https://azbigmedia.com/lifestyle/ranking-arizo...,https://azbigmedia.com/wp-content/uploads/2025...,en,2025-03-22T12:34:16.000000Z,agriculture.einnews.com,food,23.638367,Featured\n\nHere are the Top 10 Italian restau...,5 stars
1,2,Arizona,Arizona State faces No. 22 Arizona after Sanon...,"Breaking News, Sports, Manitoba, Canada","Arizona State Sun Devils (13-16, 4-14 Big 12) ...",https://www.winnipegfreepress.com/uncategorize...,https://www.winnipegfreepress.com/wp-content/u...,en,2025-03-03T08:45:47.000000Z,winnipegfreepress.com,general,23.514057,"Arizona State Sun Devils (13-16, 4-14 Big 12) ...",3 stars
2,3,Arizona,Koa Peat Commits To Arizona,Koa Peat Commits To Arizona - RealGM Wiretap,"Koa Peat, one of the highest-ranked uncommitte...",https://basketball.realgm.com/wiretap/279797/K...,https://basketball.realgm.com/images/nba/4.2/w...,en,2025-03-27T18:51:35.000000Z,basketball.realgm.com,sports,22.228500,"Koa Peat, one of the highest-ranked uncommitte...",1 star
3,4,Arizona,"Henri Veesaar scores 22 points, No. 24 Arizona...","Breaking News, Sports, Manitoba, Canada","TUCSON, Ariz. (AP) — Henri Veesaar scored 22 p...",https://www.winnipegfreepress.com/uncategorize...,https://www.winnipegfreepress.com/wp-content/u...,en,2025-03-05T06:36:09.000000Z,winnipegfreepress.com,,22.208956,"TUCSON, Ariz. (AP) — Henri Veesaar scored 22 p...",1 star
4,5,Arizona,"Arizona vs. Arizona State odds, prediction, ti...",SportsLine's model just revealed its college b...,The No. 24 Arizona Wildcats look to clinch a t...,https://www.cbssports.com/college-basketball/n...,https://sportshub.cbsistatic.com/i/r/2025/02/0...,en,2025-03-04T15:30:12.000000Z,cbssports.com,sports,22.017650,The No. 24 Arizona Wildcats look to clinch a t...,4 stars
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,596,Arizona,"USWNT star Sophia Wilson and husband, Arizona ...",A timetable for the forward's return to NWSL a...,U.S. women's national team striker and Portlan...,https://www.cbssports.com/soccer/news/uswnt-st...,https://sportshub.cbsistatic.com/i/r/2025/03/0...,en,2025-03-05T17:48:00.000000Z,cbssports.com,sports,13.868391,U.S. women's national team striker and Portlan...,4 stars
596,597,Arizona,Judge to allow camera in Lori Vallow Daybell's...,Maricopa County Superior Court Judge Justin Be...,Estimated read time: 2-3 minutes\n\nPHOENIX — ...,https://www.ksl.com/article/51273085/judge-to-...,https://img.ksl.com/slc/3046/304618/30461899.j...,en,2025-03-12T00:18:00.000000Z,ksl.com,,13.868391,Estimated read time: 2-3 minutes\n\nPHOENIX — ...,3 stars
597,598,Arizona,Minnesota’s singing McDonald’s drive-thru star...,Daniel Marshall gained fans during COVID by si...,Daniel Marshall gained fans during COVID singi...,https://www.kare11.com/article/news/local/land...,https://media.kare11.com/assets/KARE/images/df...,en,2025-03-18T02:00:00.000000Z,kare11.com,general,13.856299,Daniel Marshall gained fans during COVID singi...,3 stars
598,599,Arizona,Duke gets No. 1 regional seed for March Madnes...,"Breaking News, Sports, Manitoba, Canada",Duke headlines the NCAA Tournament’s East Regi...,https://www.winnipegfreepress.com/uncategorize...,https://www.winnipegfreepress.com/wp-content/u...,en,2025-03-16T22:36:23.000000Z,winnipegfreepress.com,general,13.810841,Duke headlines the NCAA Tournament’s East Regi...,5 stars


In [None]:
# calculate state score

def compute_state_sentiment(analyzed_articles):
    scores = [article['sentiment'] for article in analyzed_articles]

    if not scores:
        return None, "No articles"

    avg = sum(scores) / len(scores)



    return round(avg, 2)


In [None]:
compute_state_sentiment(analyzed_articles)

2.61

In [None]:
# Generalize it to all the states

In [None]:
import pandas as pd

In [None]:
depo_location=pd.read_csv('Regeneron_cleaned_triallocation.csv')
depo_location

Unnamed: 0,NCT ID,Title,Status,Lead Sponsor,Collaborators,Facility,State,Country,Zip Code
0,NCT01507831,Long-term Safety and Tolerability of Alirocuma...,COMPLETED,Sanofi,Regeneron Pharmaceuticals,Investigational Site Number 840159,Alabama,United States,35801
1,NCT01507831,Long-term Safety and Tolerability of Alirocuma...,COMPLETED,Sanofi,Regeneron Pharmaceuticals,Investigational Site Number 840028,Arizona,United States,85295
2,NCT01507831,Long-term Safety and Tolerability of Alirocuma...,COMPLETED,Sanofi,Regeneron Pharmaceuticals,Investigational Site Number 840035,Arizona,United States,85635
3,NCT01507831,Long-term Safety and Tolerability of Alirocuma...,COMPLETED,Sanofi,Regeneron Pharmaceuticals,Investigational Site Number 840052,Arizona,United States,85282
4,NCT01507831,Long-term Safety and Tolerability of Alirocuma...,COMPLETED,Sanofi,Regeneron Pharmaceuticals,Investigational Site Number 840065,Arizona,United States,85282
...,...,...,...,...,...,...,...,...,...
16378,NCT01773954,Repeated Eye Injections of Aflibercept for Tre...,COMPLETED,MidAtlantic Retina,Regeneron Pharmaceuticals,Mid Atlantic Retina- Wills Eye Institute,Pennsylvania,United States,19107
16379,NCT05036733,Effects of Interleukin (IL)- 4R-alpha Inhibiti...,COMPLETED,University of Michigan,Regeneron Pharmaceuticals,University of Michigan,Michigan,United States,48109
16380,NCT06439654,Atlantic Lipid Lowering Treatment Optimization...,NOT_YET_RECRUITING,Atlantic Health System,Regeneron Pharmaceuticals,Atlantic Medical Group,New Jersey,United States,7066
16381,NCT06439654,Atlantic Lipid Lowering Treatment Optimization...,NOT_YET_RECRUITING,Atlantic Health System,Regeneron Pharmaceuticals,Atlantic Medical Group,New Jersey,United States,7081


In [None]:
depo_location

In [None]:
# Get a list of unique states from the 'State' column
unique_states = depo_location['State'].dropna().unique()
unique_states.sort()
unique_states.tolist()
len(unique_states)

496

In [None]:
us_states = depo_location[depo_location['Country'] == 'United States']['State'].dropna().unique()
us_states.sort()
us_states.tolist()
len(us_states)

49

In [None]:
api_key='EpCH2HzrFgqr9fEUdYubTdYp0BIRXjE4cnHcVlnO'

In [None]:
# limited version

def fetch_location_news(api_key, location="Arizona", days_back=30, max_pages=2):
    BASE_URL = "https://api.thenewsapi.com/v1/news/all"
    start_date = (datetime.utcnow() - timedelta(days=days_back)).strftime("%Y-%m-%d")
    all_articles = []
    page = 1

    while page <= max_pages:
        params = {
            "api_token": api_key,
            "language": "en",
            "search": location,
            "limit": 100,
            "page": page,
            "published_after": start_date
        }

        response = requests.get(BASE_URL, params=params)
        if response.status_code != 200:
            print(f"[{location}] Page {page} - Error: {response.text}")
            break

        data = response.json().get("data", [])
        if not data:
            break

        filtered_data = [
            article for article in data
            if article.get("relevance_score") and article["relevance_score"] > 5
        ]

        all_articles.extend(filtered_data)
        page += 1

    return all_articles


In [None]:
def analyze_all_states(api_key, db_name="location_news_full.db"):
    sentiment_scores = {}

    for state in us_states:
        print(f"\nProcessing: {state}")
        raw_articles = fetch_location_news(api_key, state)

        if not raw_articles:
            print(f"No articles found for {state}")
            sentiment_scores[state] = None
            continue

        analyzed_articles, _ = analyze_news(raw_articles)

        # Store into SQLite
        store_full_analyzed_articles(state, raw_articles, analyzed_articles, db_name)

        # Compute sentiment
        score = compute_state_sentiment(analyzed_articles)
        sentiment_scores[state] = score
        print(f"{state} sentiment score: {score}")

    return sentiment_scores


In [None]:
sentiment_scores= analyze_all_states(api_key, db_name="location_news_full.db")


📰 Processing: Alabama
✅ Alabama sentiment score: 3.83

📰 Processing: Arizona
✅ Arizona sentiment score: 2.5

📰 Processing: Arkansas
✅ Arkansas sentiment score: 3.17

📰 Processing: California
✅ California sentiment score: 2.67

📰 Processing: Colorado
✅ Colorado sentiment score: 2.83

📰 Processing: Connecticut
✅ Connecticut sentiment score: 1.67

📰 Processing: Delaware
✅ Delaware sentiment score: 2.67

📰 Processing: District of Columbia
✅ District of Columbia sentiment score: 1.5

📰 Processing: Florida
✅ Florida sentiment score: 2.67

📰 Processing: Georgia
✅ Georgia sentiment score: 2.67

📰 Processing: Hawaii
✅ Hawaii sentiment score: 3.0

📰 Processing: Idaho
✅ Idaho sentiment score: 2.0

📰 Processing: Illinois
✅ Illinois sentiment score: 3.17

📰 Processing: Indiana
✅ Indiana sentiment score: 3.17

📰 Processing: Iowa
✅ Iowa sentiment score: 2.67

📰 Processing: Kansas
✅ Kansas sentiment score: 3.67

📰 Processing: Kentucky
✅ Kentucky sentiment score: 2.0

📰 Processing: Louisiana
✅ Louisia

In [None]:
sentiment_scores

{'Alabama': 3.83,
 'Arizona': 2.5,
 'Arkansas': 3.17,
 'California': 2.67,
 'Colorado': 2.83,
 'Connecticut': 1.67,
 'Delaware': 2.67,
 'District of Columbia': 1.5,
 'Florida': 2.67,
 'Georgia': 2.67,
 'Hawaii': 3.0,
 'Idaho': 2.0,
 'Illinois': 3.17,
 'Indiana': 3.17,
 'Iowa': 2.67,
 'Kansas': 3.67,
 'Kentucky': 2.0,
 'Louisiana': 2.67,
 'Maine': 2.5,
 'Maryland': 3.33,
 'Massachusetts': 1.83,
 'Michigan': 1.67,
 'Minnesota': 2.5,
 'Mississippi': 2.5,
 'Missouri': 3.17,
 'Montana': 2.67,
 'Nebraska': 2.5,
 'Nevada': 4.67,
 'New Hampshire': 2.17,
 'New Jersey': 3.0,
 'New Mexico': 3.0,
 'New York': 3.5,
 'North Carolina': 1.83,
 'North Dakota': 1.67,
 'Ohio': 3.83,
 'Oklahoma': 3.33,
 'Oregon': 3.67,
 'Pennsylvania': 2.67,
 'Rhode Island': 3.5,
 'South Carolina': 2.17,
 'South Dakota': 1.0,
 'Tennessee': 3.0,
 'Texas': 3.83,
 'Utah': 3.0,
 'Vermont': 4.33,
 'Virginia': 2.5,
 'Washington': 3.17,
 'West Virginia': 2.5,
 'Wisconsin': 4.0}

In [None]:

import csv
sentiment_data = [{"State": state, "Sentiment Score": score} for state, score in sentiment_scores.items()]
csv_filename = "/mnt/data/state_sentiment_scores.csv"

with open("state_sentiment_scores.csv", mode="w", newline="") as file:
    writer = csv.DictWriter(file, fieldnames=["State", "Sentiment Score"])
    writer.writeheader()
    writer.writerows(sentiment_data)

print("✅ Saved to state_sentiment_scores.csv")

✅ Saved to state_sentiment_scores.csv


In [None]:
db_file_path = store_full_analyzed_articles("Arizona", arizona_articles, analyzed_articles)
print(f"Saved to: {db_file_path}")

import pandas as pd
df = pd.read_sql("SELECT * FROM news_analysis", sqlite3.connect("location_news_full.db"))
df

Saved to: location_news_full.db


Unnamed: 0,id,location,title,description,snippet,url,image_url,language,published_at,source,categories,relevance_score,summary,sentiment
0,1,Arizona,Ranking Arizona: Top 10 Italian restaurants in...,Here are the Top 10 Italian restaurants in Ari...,Featured\n\nHere are the Top 10 Italian restau...,https://azbigmedia.com/lifestyle/ranking-arizo...,https://azbigmedia.com/wp-content/uploads/2025...,en,2025-03-22T12:34:16.000000Z,agriculture.einnews.com,food,23.638367,Featured\n\nHere are the Top 10 Italian restau...,5 stars
1,2,Arizona,Arizona State faces No. 22 Arizona after Sanon...,"Breaking News, Sports, Manitoba, Canada","Arizona State Sun Devils (13-16, 4-14 Big 12) ...",https://www.winnipegfreepress.com/uncategorize...,https://www.winnipegfreepress.com/wp-content/u...,en,2025-03-03T08:45:47.000000Z,winnipegfreepress.com,general,23.514057,"Arizona State Sun Devils (13-16, 4-14 Big 12) ...",3 stars
2,3,Arizona,Koa Peat Commits To Arizona,Koa Peat Commits To Arizona - RealGM Wiretap,"Koa Peat, one of the highest-ranked uncommitte...",https://basketball.realgm.com/wiretap/279797/K...,https://basketball.realgm.com/images/nba/4.2/w...,en,2025-03-27T18:51:35.000000Z,basketball.realgm.com,sports,22.228500,"Koa Peat, one of the highest-ranked uncommitte...",1 star
3,4,Arizona,"Henri Veesaar scores 22 points, No. 24 Arizona...","Breaking News, Sports, Manitoba, Canada","TUCSON, Ariz. (AP) — Henri Veesaar scored 22 p...",https://www.winnipegfreepress.com/uncategorize...,https://www.winnipegfreepress.com/wp-content/u...,en,2025-03-05T06:36:09.000000Z,winnipegfreepress.com,,22.208956,"TUCSON, Ariz. (AP) — Henri Veesaar scored 22 p...",1 star
4,5,Arizona,"Arizona vs. Arizona State odds, prediction, ti...",SportsLine's model just revealed its college b...,The No. 24 Arizona Wildcats look to clinch a t...,https://www.cbssports.com/college-basketball/n...,https://sportshub.cbsistatic.com/i/r/2025/02/0...,en,2025-03-04T15:30:12.000000Z,cbssports.com,sports,22.017650,The No. 24 Arizona Wildcats look to clinch a t...,4 stars
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,1196,Arizona,"USWNT star Sophia Wilson and husband, Arizona ...",A timetable for the forward's return to NWSL a...,U.S. women's national team striker and Portlan...,https://www.cbssports.com/soccer/news/uswnt-st...,https://sportshub.cbsistatic.com/i/r/2025/03/0...,en,2025-03-05T17:48:00.000000Z,cbssports.com,sports,13.868391,U.S. women's national team striker and Portlan...,4
1196,1197,Arizona,Judge to allow camera in Lori Vallow Daybell's...,Maricopa County Superior Court Judge Justin Be...,Estimated read time: 2-3 minutes\n\nPHOENIX — ...,https://www.ksl.com/article/51273085/judge-to-...,https://img.ksl.com/slc/3046/304618/30461899.j...,en,2025-03-12T00:18:00.000000Z,ksl.com,,13.868391,Estimated read time: 2-3 minutes\n\nPHOENIX — ...,3
1197,1198,Arizona,Minnesota’s singing McDonald’s drive-thru star...,Daniel Marshall gained fans during COVID by si...,Daniel Marshall gained fans during COVID singi...,https://www.kare11.com/article/news/local/land...,https://media.kare11.com/assets/KARE/images/df...,en,2025-03-18T02:00:00.000000Z,kare11.com,general,13.856299,Daniel Marshall gained fans during COVID singi...,3
1198,1199,Arizona,Duke gets No. 1 regional seed for March Madnes...,"Breaking News, Sports, Manitoba, Canada",Duke headlines the NCAA Tournament’s East Regi...,https://www.winnipegfreepress.com/uncategorize...,https://www.winnipegfreepress.com/wp-content/u...,en,2025-03-16T22:36:23.000000Z,winnipegfreepress.com,general,13.810841,Duke headlines the NCAA Tournament’s East Regi...,5


In [None]:
df = pd.read_csv("active_sites_filtered.csv")
countries = df["Country"].dropna().unique().tolist()

In [None]:
len(countries)

25