<a href="https://colab.research.google.com/github/tharunkumardeveloper/AI-INTEL/blob/main/AI_INTEL_API_FULL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 📘 AI_INTEL: Methods & API Documentation

This notebook contains the **actual working methods** from AI_INTEL, combined with FastAPI endpoints.


In [2]:
# Quick environment check
!python --version
import sys
print("python sys.version:", sys.version.splitlines()[0])


Python 3.12.11
python sys.version: 3.12.11 (main, Jun  4 2025, 08:56:18) [GCC 11.4.0]


In [None]:
## --- Extracted from original AI_INTEL.ipynb ---
#Installing Dependencies
!pip -q install streamlit==1.37.1 pandas numpy requests praw feedparser scikit-learn transformers torch nltk wordcloud matplotlib==3.8.4
!pip -q install newsapi-python==0.2.7
!npm -g install localtunnel >/dev/null



In [None]:
## --- Extracted from original AI_INTEL.ipynb ---
!pip install "watchdog>=6,<7"
!pip uninstall -y google-adk


In [5]:
## --- Extracted from original AI_INTEL.ipynb ---
import os

# ======== STEP 1: News API key (NewsAPI.org) ========
# Create a key and paste here:
os.environ["NEWSAPI_KEY"] = "fcfc712a42a14f208e6e41d98895c0cc"   # <-- required for NewsAPI

# ======== STEP 2: Reddit app creds (https://www.reddit.com/prefs/apps) ========
# Create a "script" app, then paste:
os.environ["REDDIT_CLIENT_ID"] = "kz-_pL7PBgKUF5l6V4eJbg"
os.environ["REDDIT_CLIENT_SECRET"] = "CeJYetQTYUSmg044UgfDiXQ7GhTtdQ"
os.environ["REDDIT_USER_AGENT"] = "TharunKumarC/0.1 by Charming_Risk7231"

# STEP 3 YOUTUBE DATA API:
os.environ["YOUTUBE_API_KEY"] = "AIzaSyCs4ZMmv7dlmOWMlISFO9sHiQztjSpbqgQ"

# STEP 4: TWITTER API KEY:
os.environ["TWITTER_BEARER_TOKEN"] = "AAAAAAAAAAAAAAAAAAAAAP8U3wEAAAAAwVRaPf0WVt3iDjqf5Jd7o8AYa20%3DJuGnz9OlzMtTqIG3BFtVBq903QR5Zxk2zVlCFpms2CWZkwx9ju"

In [6]:
## --- Extracted from original AI_INTEL.ipynb ---
#SETTING UP COLLECTORS
import time, re, json, requests, math, feedparser, pandas as pd
from datetime import datetime, timedelta, timezone
import praw

# ------------------- Shared Settings -------------------
QUERY = '("artificial intelligence" OR "machine learning" OR "deep learning" OR "neural networks" OR "AI research" OR "AI chip" OR "AI model" OR "AI startup" OR "AI tool" OR "AI ethics" OR "AI governance" OR "LLM" OR "ChatGPT" OR "OpenAI" OR "Anthropic" OR "DeepMind")'
TODAY = datetime.now(timezone.utc)
FROM = (TODAY - timedelta(days=3)).strftime("%Y-%m-%d")  # last 3 days

def norm(s):
    return None if s is None else str(s).strip()

def base_row():
    # Standard schema for all sources
    return {
        "source": "",
        "title": "",
        "text": "",
        "url": "",
        "author": "",
        "published_at": "",
        "score": None,
        "raw": {}
    }

# ---------- NewsAPI ----------
def fetch_newsapi(query=QUERY, page_size=100, pages=2):
    key = os.getenv("NEWSAPI_KEY", "")
    if not key:
        print("NewsAPI key not set — skipping.")
        return []
    rows=[]
    for page in range(1, pages+1):
        url="https://newsapi.org/v2/everything"
        params={
            "q": query, "language":"en", "from": FROM,
            "sortBy":"publishedAt", "pageSize": page_size, "page": page, "apiKey": key
        }
        r = requests.get(url, params=params, timeout=30)
        if r.status_code!=200:
            print("NewsAPI err:", r.text[:200]); break
        data=r.json()
        arts=data.get("articles", [])
        for a in arts:
            row=base_row()
            row.update({
                "source":"News",
                "title": norm(a.get("title")),
                "text": norm(a.get("description") or a.get("content")),
                "url": norm(a.get("url")),
                "author": norm((a.get("source") or {}).get("name")),
                "published_at": norm(a.get("publishedAt")),
                "raw": a
            })
            rows.append(row)
        if len(arts)<page_size: break
        time.sleep(0.5)
    return rows

# ---------- Reddit ----------
def fetch_reddit(query=QUERY, limit=200):
    cid=os.getenv("REDDIT_CLIENT_ID","")
    csec=os.getenv("REDDIT_CLIENT_SECRET","")
    ua=os.getenv("REDDIT_USER_AGENT","")
    if not (cid and csec and ua):
        print("Reddit creds not set — skipping.")
        return []
    reddit=praw.Reddit(client_id=cid, client_secret=csec, user_agent=ua)
    subs = ["MachineLearning", "artificial", "DeepLearning", "OpenAI", "LocalLLaMA", "ChatGPT", "datascience", "computervision", "reinforcementlearning"]
    rows=[]
    for s in subs:
        for p in reddit.subreddit(s).search(query, sort="new", time_filter="week", limit=math.ceil(limit/len(subs))):
            row=base_row()
            row.update({
                "source":"Reddit",
                "title": norm(p.title),
                "text": norm(p.selftext),
                "url": f"https://www.reddit.com{p.permalink}",
                "author": norm(str(p.author)),
                "published_at": datetime.fromtimestamp(p.created_utc, tz=timezone.utc).isoformat(),
                "score": int(p.score),
                "raw": {"subreddit":s, "id":p.id}
            })
            rows.append(row)
    return rows

# ---------- Hacker News ----------
def fetch_hn(query=QUERY, hits_per_page=100, pages=2):
    rows=[]
    for p in range(pages):
        r=requests.get(
            "https://hn.algolia.com/api/v1/search_by_date",
            params={"query":query, "tags":"story", "hitsPerPage":hits_per_page, "page":p},
            timeout=30
        )
        if r.status_code!=200:
            print("HN err:", r.text[:200]); break
        for h in r.json().get("hits", []):
            row=base_row()
            row.update({
                "source":"HackerNews",
                "title": norm(h.get("title")),
                "text": norm(h.get("story_text")),
                "url": norm(h.get("url") or f"https://news.ycombinator.com/item?id={h.get('objectID')}"),
                "author": norm(h.get("author")),
                "published_at": datetime.fromtimestamp(h.get("created_at_i"), tz=timezone.utc).isoformat(),
                "score": h.get("points"),
                "raw": h
            })
            rows.append(row)
        time.sleep(0.3)
    return rows

# ---------- arXiv ----------
def fetch_arxiv(max_results=200):
    search = 'cat:cs.AI+OR+cat:cs.CL+OR+cat:cs.LG+OR+cat:stat.ML'
    url = f"http://export.arxiv.org/api/query?search_query={search}&sortBy=submittedDate&sortOrder=descending&start=0&max_results={max_results}"
    feed = feedparser.parse(url)
    rows=[]
    for e in feed.entries:
        row=base_row()
        row.update({
            "source":"arXiv",
            "title": norm(e.get("title")),
            "text": norm(e.get("summary")),
            "url": norm(e.get("link")),
            "author": norm(", ".join([a.get("name","") for a in e.get("authors",[])])),
            "published_at": norm(e.get("published")),
            "raw": {"id": e.get("id")}
        })
        rows.append(row)
    return rows

# ---------- YouTube ----------
def fetch_youtube(query='"AI" OR "artificial intelligence" OR "machine learning" OR "deep learning" OR "neural networks" OR "chatgpt" OR "openai" ‘"AI" OR "artificial intelligence" OR "machine learning" OR "deep learning" OR "neural networks" OR "chatgpt" OR "openai” OR "ai"OR"artificial intelligence"OR"machine learning"OR"deep learning"OR"neural network"OR"generative ai"OR"transformers"OR"neural architecture"OR "chatgpt"OR"gpt"OR"llm"OR"large language model"OR"stablediffusion"OR"midjourney"OR"claude"OR"mistral"OR"falcon"OR"bert"OR"roberta"OR"t5"OR"vision transformer"OR "reinforcement learning"OR"self-supervised learning"OR"unsupervised learning"OR"openai"OR"anthropic"OR"deepmind"OR"hugging face"OR"nvidia"OR"google ai"OR"meta ai"OR "microsoft research"OR"ibm watson"OR"stability ai"OR"autonomous driving"OR"self-driving"OR"robotics"OR"computer vision"OR"speech recognition"OR"nlp"OR"natural language processing"OR"ai ethics"OR"ai governance"OR"ai safety"OR"recommendation system"OR"chatbot"OR"diffusion model"OR"deepfake"OR"autoML"OR"vector database"OR"rag"OR"retrieval augmented generation"’', max_results=50):
    key = os.getenv("YOUTUBE_API_KEY", "")
    if not key:
        print("YouTube API key not set — skipping.")
        return []
    url="https://www.googleapis.com/youtube/v3/search"
    params={
        "part":"snippet",
        "q": query,
        "type":"video",
        "maxResults": max_results,
        "order":"date",
        "publishedAfter": (TODAY - timedelta(days=3)).strftime("%Y-%m-%dT%H:%M:%SZ"),
        "key": key
    }
    r=requests.get(url, params=params, timeout=30)
    if r.status_code!=200:
        print("YouTube err:", r.text[:200]); return []
    data=r.json()
    rows=[]
    for item in data.get("items", []):
        sn=item["snippet"]
        row=base_row()
        row.update({
            "source":"YouTube",
            "title": norm(sn.get("title")),
            "text": norm(sn.get("description")),
            "url": f"https://www.youtube.com/watch?v={item['id']['videoId']}",
            "author": norm(sn.get("channelTitle")),
            "published_at": norm(sn.get("publishedAt")),
            "raw": item
        })
        rows.append(row)
    return rows

# ---------- Twitter v2 ----------
def fetch_twitter(query=QUERY, max_results=50):
    token=os.getenv("TWITTER_BEARER_TOKEN","")
    if not token:
        print("Twitter bearer token not set — skipping.")
        return []
    url="https://api.twitter.com/2/tweets/search/recent"
    headers={"Authorization": f"Bearer {token}"}
    params={
        "query": '("artificial intelligence" OR "machine learning" OR "deep learning" OR "neural network" OR "LLM" OR "chatgpt" OR "openai") lang:en -is:retweet -is:reply',
        "tweet.fields":"created_at,author_id,text,public_metrics,lang",
        "max_results": max_results
    }
    r=requests.get(url, headers=headers, params=params, timeout=30)
    if r.status_code!=200:
        print("Twitter err:", r.text[:200]); return []
    data=r.json()
    rows=[]
    for t in data.get("data", []):
        row=base_row()
        row.update({
            "source":"Twitter",
            "title": norm(t.get("text")[:80]+"..."),  # short preview
            "text": norm(t.get("text")),
            "url": f"https://twitter.com/i/web/status/{t.get('id')}",
            "author": norm(t.get("author_id")),
            "published_at": norm(t.get("created_at")),
            "score": t.get("public_metrics",{}).get("like_count",0),
            "raw": t
        })
        rows.append(row)
    return rows


In [7]:
## --- Extracted from original AI_INTEL.ipynb ---
# Run all collectors and combine results

# --- Collect from each source ---
news_rows   = fetch_newsapi(pages=5)        # ~300 news articles
reddit_rows = fetch_reddit(limit=200)       # ~200 reddit posts
hn_rows     = fetch_hn(pages=30)             # ~200 HN stories
arxiv_rows  = fetch_arxiv(max_results=150)  # latest 150 papers
yt_rows     = fetch_youtube(query='"AI" OR "artificial intelligence" OR "machine learning" OR "deep learning" OR "neural networks" OR "chatgpt" OR "openai" ‘"AI" OR "artificial intelligence" OR "machine learning" OR "deep learning" OR "neural networks" OR "chatgpt" OR "openai” OR "ai"OR"artificial intelligence"OR"machine learning"OR"deep learning"OR"neural network"OR"generative ai"OR"transformers"OR"neural architecture"OR "chatgpt"OR"gpt"OR"llm"OR"large language model"OR"stablediffusion"OR"midjourney"OR"claude"OR"mistral"OR"falcon"OR"bert"OR"roberta"OR"t5"OR"vision transformer"OR "reinforcement learning"OR"self-supervised learning"OR"unsupervised learning"OR"openai"OR"anthropic"OR"deepmind"OR"hugging face"OR"nvidia"OR"google ai"OR"meta ai"OR "microsoft research"OR"ibm watson"OR"stability ai"OR"autonomous driving"OR"self-driving"OR"robotics"OR"computer vision"OR"speech recognition"OR"nlp"OR"natural language processing"OR"ai ethics"OR"ai governance"OR"ai safety"OR"recommendation system"OR"chatbot"OR"diffusion model"OR"deepfake"OR"autoML"OR"vector database"OR"rag"OR"retrieval augmented generation"’', max_results=50)  # latest 25 YouTube videos
tw_rows     = fetch_twitter(max_results=50) # latest 50 tweets

# --- Merge everything ---
rows = news_rows + reddit_rows + hn_rows + arxiv_rows + yt_rows + tw_rows

# Show how many items per source
len(rows), {r["source"] for r in rows}


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

Twitter err: {"account_id":1960145033568378881,"product_name":"standard-basic","title":"UsageCapExceeded","period":"Monthly","scope":"Product","detail":"Usage cap exceeded: Monthly product cap","type":"https://api


(463, {'News', 'Reddit', 'YouTube', 'arXiv'})

In [8]:
## --- Extracted from original AI_INTEL.ipynb ---
# ==========================
# Clean, Sentiment & AI Filtering
# ==========================
import pandas as pd
import re
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
from sentence_transformers import SentenceTransformer, util
import spacy

nltk.download("vader_lexicon")

# --- Put rows into DataFrame ---
df = pd.DataFrame(rows)

# --- Clean text: remove URLs, trim spaces ---
def clean_text(txt):
    if not isinstance(txt, str):
        return ""
    txt = re.sub(r"http\S+", "", txt)  # remove links
    txt = re.sub(r"\s+", " ", txt).strip()
    return txt

df["text"] = df["text"].apply(clean_text)
df["title"] = df["title"].apply(clean_text)

# --- Sentiment Analysis with VADER ---
sid = SentimentIntensityAnalyzer()
df["sentiment_score"] = df["text"].apply(lambda x: sid.polarity_scores(x)["compound"])
df["sentiment"] = df["sentiment_score"].apply(
    lambda s: "positive" if s > 0.2 else ("negative" if s < -0.2 else "neutral")
)

# ========================
# 🔹 AI Relevance Filtering
# ========================
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
nlp = spacy.load("en_core_web_sm")

ai_keywords = set([
    "ai","artificial intelligence","machine learning","deep learning","neural network",
    "generative ai","transformers","neural architecture",
    "chatgpt","gpt","llm","large language model","stable diffusion","midjourney",
    "claude","mistral","falcon","bert","roberta","t5","vision transformer",
    "reinforcement learning","self-supervised learning","unsupervised learning",
    "openai","anthropic","deepmind","hugging face","nvidia","google ai","meta ai",
    "microsoft research","ibm watson","stability ai",
    "autonomous driving","self-driving","robotics","computer vision","speech recognition",
    "nlp","natural language processing","ai ethics","ai governance","ai safety",
    "recommendation system","chatbot",
    "diffusion model","deepfake","autoML","vector database","rag","retrieval augmented generation"
])
ai_keywords = {kw.lower() for kw in ai_keywords}

def is_ai_related(text, hashtags=[], title=""):
    if not text and not title:
        return False

    combined = (title + " " + text).lower()

    # --- Rule 1: Keyword check ---
    if any(kw in combined for kw in ai_keywords):
        return True

    # --- Rule 2: Semantic similarity ---
    query = "This text is about artificial intelligence, machine learning, neural networks, or AI research."
    emb_text = embed_model.encode(combined, convert_to_tensor=True)
    emb_query = embed_model.encode(query, convert_to_tensor=True)
    sim = util.cos_sim(emb_text, emb_query).item()

    # Lenient if keyword exists
    if any(kw in combined for kw in ai_keywords) and sim >= 0.55:
        return True

    # Stricter if only hashtags indicate AI
    if hashtags and any(h.lower() == "ai" for h in hashtags):
        if sim >= 0.70 and len(combined.split()) > 8:
            return True
        else:
            return False

    # --- Rule 3: NER-based check ---
    doc = nlp(combined)
    for ent in doc.ents:
        if ent.text.lower() in ai_keywords:
            return True

    # --- Rule 4: Length filter ---
    if len(combined.split()) < 5:
        return False

    return sim >= 0.55

# If hashtags column missing, add blank
if "hashtags" not in df.columns:
    df["hashtags"] = [[] for _ in range(len(df))]

# Apply AI filter
df["is_ai"] = df.apply(lambda row: is_ai_related(row["text"], row["hashtags"], row["title"]), axis=1)
df = df[df["is_ai"]].reset_index(drop=True)

# ========================
# 🔹 Extra Cleanup / Junk Removal
# ========================
def is_junk(row):
    txt = str(row["text"]).lower()
    title = str(row["title"]).lower()

    # 1. Very short / no polarity
    if len(txt) < 40 and row["sentiment_score"] == 0.0:
        return True

    # 2. Pure hashtag / link bait
    if txt.startswith("http") or txt.startswith("https"):
        return True
    if title.count("#") > 3:
        return True

    # 3. Meme / entertainment
    junk_terms = [
        "funny","😂","😭","😊","viral","shorts",
        "comedy","song","dance","motivation","facts",
        "podcast","trending","boy","girl"
    ]
    if any(term in txt for term in junk_terms) or any(term in title for term in junk_terms):
        return True

    # 4. Non-English noise unless AI keyword present
    if re.search(r"[ぁ-んァ-ン一-龥]", title) and not any(kw in txt for kw in ai_keywords):
        return True

    return False

df = df[~df.apply(is_junk, axis=1)].reset_index(drop=True)

# Remove duplicates
df = df.drop_duplicates(subset=["title"]).reset_index(drop=True)

# --- Preview ---
df.groupby("source").head(3)[["source","title","sentiment","sentiment_score"]]


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Unnamed: 0,source,title,sentiment,sentiment_score
0,News,ZenaTech Signs Offer to Acquire a UK Telecom S...,positive,0.3182
1,News,Parents Sue ChatGPT Over Their 16-Year-Old Son...,positive,0.2846
2,News,The US really is unlike other rich countries w...,neutral,0.128
71,Reddit,[R] Computational power needs for Machine Lear...,positive,0.939
72,Reddit,"[P] Building a CartPole agent from scratch, in...",positive,0.9546
73,Reddit,[R] What makes active learning or self learnin...,negative,-0.8835
191,arXiv,Model Context Protocols in Adaptive Transport ...,positive,0.5719
192,arXiv,StepWiser: Stepwise Generative Judges for Wise...,positive,0.9752
193,arXiv,Predicting the Order of Upcoming Tokens Improv...,positive,0.3712
329,YouTube,트로트가수 김의영 혼자걷지마세요 트롯노래모음,neutral,0.0


In [9]:
## --- Extracted from original AI_INTEL.ipynb ---
# ==========================
# STEP: Finalize dataset & export for Streamlit
# ==========================
# What this cell does:
# 1) Builds a 'clean' field (title + text) used by the dashboard search/keywords
# 2) Normalizes timestamps to UTC and derives a date column for charts
# 3) Standardizes types and fills missing values
# 4) De-duplicates items by content + URL
# 5) Saves "ai_intel_clean.csv" for the Streamlit app to consume
# 6) Prints a tiny preview so you know it worked

import pandas as pd
import numpy as np

# 1) Build a 'clean' field for search & keyword extraction
df["clean"] = (
    df["title"].fillna("") + ". " + df["text"].fillna("")
).str.strip().str.replace(r"\s+", " ", regex=True)

# 2) Normalize timestamps (UTC) and derive a 'date' column
def to_ts(x):
    try:
        return pd.to_datetime(x, utc=True, errors="coerce")
    except Exception:
        return pd.NaT

df["published_at"] = df["published_at"].apply(to_ts)

# If any timestamps are missing, fill with "now" (UTC naive)
df["published_at"] = df["published_at"].fillna(pd.Timestamp.utcnow())

# Normalize tz: localize if naive, convert if already tz-aware
df["published_at"] = df["published_at"].apply(
    lambda x: x.tz_localize("UTC") if x.tzinfo is None else x.tz_convert("UTC")
)

# Derive date
df["date"] = df["published_at"].dt.date

# 3) Standardize columns/types and fill missing
df["source"] = df["source"].fillna("Unknown")
df["author"] = df["author"].fillna("")
df["url"] = df["url"].fillna("")
df["sentiment"] = df["sentiment"].fillna("neutral")
df["sentiment_score"] = pd.to_numeric(df["sentiment_score"], errors="coerce")
if "score" not in df.columns:
    df["score"] = np.nan
df["score"] = pd.to_numeric(df["score"], errors="coerce")

# 4) De-duplicate on content + URL to keep freshest unique items
dedup_key = df["clean"].str.lower().str.slice(0, 220) + "|" + df["url"]
df = df.loc[~dedup_key.duplicated()].reset_index(drop=True)

# Ensure 'raw' is JSON-serializable so CSV write doesn't choke later processing
df["raw"] = df["raw"].apply(lambda x: x if isinstance(x, (dict, list)) else {})

# 5) Save to CSV for Streamlit
cols = ["source","published_at","date","title","text","clean","url","author",
        "sentiment","sentiment_score","score","raw"]
df_to_save = df[[c for c in cols if c in df.columns]].copy()
df_to_save.to_csv("ai_intel_clean.csv", index=False)

# 6) Tiny preview
print("✅ Saved ai_intel_clean.csv with", df_to_save.shape[0], "rows.")
preview_cols = [c for c in ["source","published_at","title","sentiment","sentiment_score","url"] if c in df_to_save.columns]
display(df_to_save[preview_cols].head(5))

✅ Saved ai_intel_clean.csv with 333 rows.


Unnamed: 0,source,published_at,title,sentiment,sentiment_score,url
0,News,2025-08-26 12:15:00+00:00,ZenaTech Signs Offer to Acquire a UK Telecom S...,positive,0.3182,https://www.globenewswire.com/news-release/202...
1,News,2025-08-26 12:15:00+00:00,Parents Sue ChatGPT Over Their 16-Year-Old Son...,positive,0.2846,https://biztoc.com/x/1c26303c93ffdd70
2,News,2025-08-26 12:13:03+00:00,The US really is unlike other rich countries w...,neutral,0.128,https://theconversation.com/the-us-really-is-u...
3,News,2025-08-26 12:10:46+00:00,Earnings live: MongoDB stock soars on AI tailw...,positive,0.5574,https://finance.yahoo.com/news/live/earnings-l...
4,News,2025-08-26 12:03:51+00:00,"Kylin upgrades operating system with AI, suppo...",positive,0.4019,https://biztoc.com/x/bc8faf105ced4e9c


In [10]:
2## --- Extracted from original AI_INTEL.ipynb ---
from google.colab import drive
drive.mount('/content/drive')  # Mount Google Drive

# Save to a folder in Drive (change path as needed)
csv_path = '/content/drive/MyDrive/ai_intel_clean.csv'
df_to_save.to_csv(csv_path, index=False)

print("✅ Saved ai_intel_clean.csv to Google Drive:", csv_path)


Mounted at /content/drive
✅ Saved ai_intel_clean.csv to Google Drive: /content/drive/MyDrive/ai_intel_clean.csv







## ✅ Notes


- All methods are pulled directly from the original notebook.
- Each snippet is preserved as working codeskjkpal Headers have been reformatted for clarity.
