<a href="https://colab.research.google.com/github/karensheng/API-use-case-demo/blob/main/Trending_tweets_mentioning_a_stock_ticker_Aisa_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Search for tweets that mention a company or its stock ticker; filtering out retweets; with a minimum of 5K likes

In [None]:
# for constructing queries of Advanced Search, please reference https://github.com/igorbrigadir/twitter-advanced-search?tab=readme-ov-file

In [None]:
import os

# Load API key from environment variable
API_KEY = os.getenv("AISA_API_KEY", "YOUR AISA API KEY")

In [None]:
import requests
from urllib.parse import urlencode
import json

BASE_URL = "https://api.aisa.one/apis/v1/twitter/tweet/advanced_search"

QUERY_TYPE = "Latest"

QUERY_STRING = (
    "(NVDA or Nvidia) " #put company name or stock ticker
    "-filter:nativeretweets "
    "min_faves:1000" #minimum count of likes
)

# Define the maximum number of tweets to fetch
MAX_TWEETS_TO_FETCH = 50

INPUT_FILE = "response" # Define INPUT_FILE here

def fetch_tweets(next_cursor=None):
    PARAMS = {
        "queryType": QUERY_TYPE,
        "query": QUERY_STRING,
    }

    if next_cursor:
        PARAMS["nextCursor"] = next_cursor

    HEADERS = {
        "Authorization": f"Bearer {API_KEY}"
    }

    response = requests.get(
        BASE_URL,
        headers=HEADERS,
        params=PARAMS,
        timeout=30,
    )
    response.raise_for_status()
    return response.json()


if __name__ == "__main__":
    all_tweets = []
    next_cursor = None

    while True:
        print(f"Fetching tweets (current count: {len(all_tweets)})...")
        response_data = fetch_tweets(next_cursor)
        current_page_tweets = response_data.get("tweets", [])
        all_tweets.extend(current_page_tweets)

        next_cursor = response_data.get("next_cursor")
        has_next_page = response_data.get("has_next_page", False)

        if not has_next_page or not next_cursor or len(all_tweets) >= MAX_TWEETS_TO_FETCH:
            if len(all_tweets) >= MAX_TWEETS_TO_FETCH:
                print(f"Reached MAX_TWEETS_TO_FETCH ({MAX_TWEETS_TO_FETCH}). Stopping pagination.")
            elif not has_next_page or not next_cursor:
                print("No more pages or next_cursor available. Stopping pagination.")
            break

    data = {"tweets": all_tweets}
    # Save raw API response to INPUT_FILE
    with open(INPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    print(f"Total tweets returned: {len(all_tweets)}")

Fetching tweets (current count: 0)...
Fetching tweets (current count: 20)...
Fetching tweets (current count: 40)...
Reached MAX_TWEETS_TO_FETCH (50). Stopping pagination.
Total tweets returned: 60


In [None]:
import json
from datetime import datetime
from typing import Any, Dict, List, Optional

INPUT_FILE = "response"          # change to "response.json" if needed
OUTPUT_FILE = "normalized_tweets.json"


# ---------- helpers ----------

def parse_datetime(dt: Optional[str]) -> Optional[str]:
    if not dt:
        return None
    try:
        return datetime.strptime(
            dt, "%a %b %d %H:%M:%S %z %Y"
        ).isoformat()
    except Exception:
        return None


def extract_author(author: Dict[str, Any]) -> Dict[str, Any]:
    return {
        "id": author.get("id"),
        "handle": author.get("userName"),
        "name": author.get("name"),
        "followers": author.get("followers"),
        "following": author.get("following"),
        "verified_type": author.get("verifiedType"),
        "is_blue_verified": author.get("isBlueVerified"),
        "location": author.get("location"),
        "profile_picture": author.get("profilePicture"),
        "url": author.get("url") or author.get("twitterUrl"),
    }


def extract_urls(tweet: Dict[str, Any]) -> List[str]:
    return [
        u.get("expanded_url")
        for u in tweet.get("entities", {}).get("urls", [])
        if u.get("expanded_url")
    ]


def extract_symbols(tweet: Dict[str, Any]) -> List[str]:
    return [
        s.get("text")
        for s in tweet.get("entities", {}).get("symbols", [])
        if s.get("text")
    ]


def extract_hashtags(tweet: Dict[str, Any]) -> List[str]:
    return [
        h.get("text")
        for h in tweet.get("entities", {}).get("hashtags", [])
        if h.get("text")
    ]


def extract_media(tweet: Dict[str, Any]) -> List[Dict[str, Any]]:
    media_items = []
    for m in tweet.get("extendedEntities", {}).get("media", []):
        item = {
            "type": m.get("type"),
            "url": m.get("media_url_https"),
        }
        if m.get("type") == "video":
            vi = m.get("video_info", {})
            item["duration_ms"] = vi.get("duration_millis")
            item["variants"] = [
                {
                    "content_type": v.get("content_type"),
                    "bitrate": v.get("bitrate"),
                    "url": v.get("url"),
                }
                for v in vi.get("variants", [])
            ]
        media_items.append(item)
    return media_items


def extract_card(card: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
    if not card:
        return None

    def get_val(key):
        for kv in card.get("binding_values", []):
            if kv.get("key") == key:
                return kv.get("value", {}).get("string_value")
        return None

    return {
        "type": card.get("name"),
        "title": get_val("title"),
        "description": get_val("description"),
        "domain": get_val("domain"),
        "url": get_val("card_url"),
    }


# ---------- normalization ----------

def normalize_tweet(tweet: Dict[str, Any]) -> Dict[str, Any]:
    return {
        "id": tweet.get("id"),
        "conversation_id": tweet.get("conversationId"),
        "created_at": parse_datetime(tweet.get("createdAt")),
        "lang": tweet.get("lang"),
        "text": tweet.get("text"),
        "like_count": tweet.get("likeCount"),
    }

In [None]:
with open(INPUT_FILE, 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

tweets = raw_data.get('tweets', [])

normalized_tweets = []
for tweet in tweets:
    normalized_tweets.append(normalize_tweet(tweet))

with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
    json.dump(normalized_tweets, f, ensure_ascii=False, indent=2)

print(f"Normalized {len(normalized_tweets)} tweets and saved to '{OUTPUT_FILE}'.")

Normalized 60 tweets and saved to 'normalized_tweets.json'.


In [None]:
import pandas as pd
import json

with open('normalized_tweets.json', 'r', encoding='utf-8') as f:
    normalized_tweets = json.load(f)

df = pd.DataFrame(normalized_tweets)

# Sort by like_count in descending order
df_sorted = df.sort_values(by='like_count', ascending=False)
display(df_sorted.head())

Unnamed: 0,id,conversation_id,created_at,lang,text,like_count
12,2008578333777572185,2008578333777572185,2026-01-06T16:36:07+00:00,en,ðŸ‡ºðŸ‡¸ $4.6 TRILLION NVIDIA CEO JUST SAID THEY ARE...,4255
32,2008578333777572185,2008578333777572185,2026-01-06T16:36:07+00:00,en,ðŸ‡ºðŸ‡¸ $4.6 TRILLION NVIDIA CEO JUST SAID THEY ARE...,4255
52,2008578333777572185,2008578333777572185,2026-01-06T16:36:07+00:00,en,ðŸ‡ºðŸ‡¸ $4.6 TRILLION NVIDIA CEO JUST SAID THEY ARE...,4255
15,2008547388731125961,2008547388731125961,2026-01-06T14:33:09+00:00,en,yeah man check out these cool videos with 0 pu...,3864
35,2008547388731125961,2008547388731125961,2026-01-06T14:33:09+00:00,en,yeah man check out these cool videos with 0 pu...,3864


In [None]:
initial_tweet_count = len(df_sorted)
df_unique = df_sorted.drop_duplicates(subset=['id'], keep='first')
duplicates_removed = initial_tweet_count - len(df_unique)

print(f"Initial number of tweets: {initial_tweet_count}")
print(f"Number of duplicate tweets removed: {duplicates_removed}")
print(f"Number of unique tweets remaining: {len(df_unique)}")
display(df_unique.head(20))

Initial number of tweets: 60
Number of duplicate tweets removed: 40
Number of unique tweets remaining: 20


Unnamed: 0,id,conversation_id,created_at,lang,text,like_count
12,2008578333777572185,2008578333777572185,2026-01-06T16:36:07+00:00,en,ðŸ‡ºðŸ‡¸ $4.6 TRILLION NVIDIA CEO JUST SAID THEY ARE...,4255
15,2008547388731125961,2008547388731125961,2026-01-06T14:33:09+00:00,en,yeah man check out these cool videos with 0 pu...,3864
47,2008630412080205864,2008630412080205864,2026-01-06T20:03:03+00:00,en,Nvidia CEO Jensen Huang in new interview today...,2966
19,2008505440896962567,2008505440896962567,2026-01-06T11:46:27+00:00,tr,Nvidia da hep 725 mb'lÄ±k gÃ¼ncelleme yapÄ±yor. B...,2824
49,2008581922977636776,2008581922977636776,2026-01-06T16:50:22+00:00,en,"â‚¿REAKING: NVIDIA CEO Jensen Huang says, â€œBitco...",2710
26,2008631034099675472,2008631034099675472,2026-01-06T20:05:31+00:00,en,BREAKING: Elon Musk's xAI announces it has rai...,2480
57,2008520320865685709,2008520320865685709,2026-01-06T12:45:35+00:00,pt,novas cenas de gameplay de Resident Evil Requi...,2271
11,2008581445976289680,2008581445976289680,2026-01-06T16:48:29+00:00,en,JUST IN: $5 TRILLION NVIDIA CEO JUST SAID IT I...,2179
53,2008577044276511140,2008577044276511140,2026-01-06T16:30:59+00:00,en,Welcome to 2026 where &gt; $100B is erased fro...,2028
44,2008640419819925699,2008640419819925699,2026-01-06T20:42:49+00:00,en,Jensen Huang says he isn't concerned with Cali...,2028
