## Dependencies

In [None]:
!pip install macrocosmos dotenv pandas

## Setup

In [14]:
import pandas as pd

from datetime import datetime, timezone, timedelta
from dotenv import load_dotenv
import os
import macrocosmos

load_dotenv()

client = macrocosmos.AsyncSn13Client(api_key=os.environ.get("MACROCOSMOS_API_KEY"))

In [28]:
end_dt = datetime.now(timezone.utc)
start_dt = end_dt - timedelta(days=7) # up to minute precision

start_dt.isoformat(), end_dt.isoformat() 

('2025-09-17T09:55:40.506139+00:00', '2025-09-24T09:55:40.506139+00:00')

## Pull Tweets

In [31]:
resp = await client.sn13.OnDemandData(
    source='x', # or 'reddit', 'youtube' -- more examples on following sections
    usernames=None, # None or username(s) ex. ['elonmusk'] or ['elonmusk', 'MikeTyson']
    keywords=["#bitcoin"], # keyword (ex. 'ai'), hashtag (ex. '#bittensor'), or cashtag ('$AAPL')
    start_date=start_dt.date().isoformat(),
    end_date=end_dt.date().isoformat(),  # up to minute precision
    limit=10, # up to 1000
)

f"Response Status: {resp['status']}", f"Data Available: {not resp['meta'].get('no_data_available', False)}"

('Response Status: success', 'Data Available: True')

In [33]:
data = resp.get('data', [])
df = pd.json_normalize(data)

df.head(3)

Unnamed: 0,text,datetime,uri,source,content_size_bytes,label,tweet.quote_count,tweet.id,tweet.quoted_tweet_id,tweet.reply_count,...,user.verified,user.id,user.user_location,user.following_count,user.display_name,user.followers_count,user.user_description,user.user_blue_verified,user.profile_image_url,user.username
0,"Spring loaded \n$128,000 next \n#bitcoin",2025-09-23T23:59:57+00:00,https://x.com/NetHogger/status/197063930410504...,X,1090.0,#bitcoin,1.0,1970639304105042187,,0.0,...,False,1434219190525726720,"California, USA",8.0,NetHogger,956.0,Comics | Technical Analyst 🇸🇻🇬🇹| \nE...,False,https://pbs.twimg.com/profile_images/195401515...,NetHogger
1,So an influencer faked @Dennis a headline and...,2025-09-23T23:59:49+00:00,https://x.com/CapnBit/status/1970639271917994155,X,1162.0,#bitcoin,0.0,1970639271917994155,,0.0,...,True,3403230149,,754.0,CapnBit💎,520.0,Sharing cutting-edge news 📡 wealth insights 💎,True,https://pbs.twimg.com/profile_images/193387493...,CapnBit
2,"現在のBitcoin価格: ¥16,553,957 ($112,010) 1ドル147.79...",2025-09-23T23:59:40+00:00,https://x.com/Bitcoin784388/status/19706392342...,X,1772.0,#bitcoin,0.0,1970639234274074740,,0.0,...,False,1944954455038025732,Japan,288.0,リアルタイムBTC価格トラッカー,121.0,ビットコインの最新価格を85分ごとに自動更新でお届けするBOTアカウントです。円とドルでのB...,False,https://pbs.twimg.com/profile_images/194503286...,Bitcoin784388


In [35]:
df.dtypes

text                           object
datetime                       object
uri                            object
source                         object
content_size_bytes            float64
label                          object
tweet.quote_count             float64
tweet.id                       object
tweet.quoted_tweet_id          object
tweet.reply_count             float64
tweet.like_count              float64
tweet.conversation_id          object
tweet.hashtags                 object
tweet.retweet_count           float64
tweet.is_quote                   bool
tweet.bookmark_count          float64
tweet.in_reply_to_user_id      object
tweet.language                 object
tweet.is_retweet                 bool
tweet.is_reply                   bool
tweet.view_count              float64
tweet.in_reply_to              object
tweet.in_reply_to_username     object
user.cover_picture_url         object
user.verified                    bool
user.id                        object
user.user_lo

### Most Influential Users by Followers

In [43]:
print(f"\nTop 5 users by followers:")

# Coerce followers to numeric and group by user (id preferred to dedupe)
df['user.followers_count'] = pd.to_numeric(df['user.followers_count'], errors='coerce').fillna(0)
user_cols = ['user.id','user.display_name','user.username','user.followers_count']

# Drop rows without any user id/username to avoid nonsense groups
users = df[user_cols].copy()

grouped = (users.groupby('user.username', dropna=True)
                .agg({'user.display_name':'last',
                        'user.followers_count':'max'})
                .reset_index()
                .rename(columns={'user.username':'user.username'}))

top_users = grouped.sort_values('user.followers_count', ascending=False).head(5)

for i, row in top_users.reset_index(drop=True).iterrows():
    name = row.get('user.display_name', '')
    uname = row.get('user.username', '')
    followers = int(row['user.followers_count'])
    # Ensure a readable line even if username is missing
    label = f"{name} ({uname})" if uname else name
    print(f"{i+1}. 👤 {label} — {followers} followers")


Top 5 users by followers:
1. 👤 Mythical (themythicalart) — 2350 followers
2. 👤 Bpay News (bpaynews) — 1868 followers
3. 👤 NetHogger (NetHogger) — 956 followers
4. 👤 Margarida Barbosa (margaridaB_9) — 770 followers
5. 👤 SHIBAINU MX (j_vander_berg) — 737 followers


### Most Liked Tweets

In [45]:
print(f"\nMost 5 liked tweets:")

top_liked = (
    df.sort_values('tweet.like_count', ascending=False)
        .loc[:, ['tweet.like_count','text','user.display_name','user.username','uri']]
        .head(5)
)

# Pretty print
for i, row in top_liked.reset_index().iterrows():
    print(f"{i+1}. ❤️ {int(row['tweet.like_count'])} Likes | {row['user.display_name']} (@{row['user.username']})")
    print(f"   {row['text']}")
    print(f"   {row['uri']}")
    
    print()
    print('=====')
    print()


Most 5 liked tweets:
1. ❤️ 1 Likes | Margarida Barbosa (@margaridaB_9)
   $AEVO 
LONG
LEV 50-75x
ENTRY: 0.11692
               
TP:
0.1202 ✅
Join our telegram community.
Click below ⬇️#bitcoin  #crypto #btc #eth #altcoin #trading #signal #memecoin #forex #xau #investing #gold #blockchain #AEVOUSDT
   https://x.com/margaridaB_9/status/1970639066413580748

=====

2. ❤️ 1 Likes | Tech & Finance Pulse NG (@TechNaijaWire)
   Outflows like this show how sensitive crypto ETFs are to Fed moves, traders take profits fast. But long-term, these dips often reset leverage and clear the way for stronger inflows when the dust settles. #Bitcoin #Ethereum #ETFs
   https://x.com/TechNaijaWire/status/1970638644529770880

=====

3. ❤️ 1 Likes | Lola Coin (@lolacoinews)
   Cloud mining platforms offer new passive income opportunities for investors --&gt;#crypto #btc #bitcoin #cryptocurrency #blockchain #trading #cryptonews #cryptomarket #cryptotrading #cryptocurrencies #cryptomining #ethereum #mining #reg

## Scrape Reddit Posts + Comments

In [52]:
resp = await client.sn13.OnDemandData(
    source='reddit',
    usernames=None,
    keywords=["r/Bitcoin"], # to combine subreddit and keywords/hashtags, you can pass a list ex. ['r/Bitcoin', 'ethereum']
    start_date=start_dt.date().isoformat(),
    end_date=end_dt.date().isoformat(),
    limit=100, # up to 1000
)

f"Response Status: {resp['status']}", f"Data Available: {not resp['meta'].get('no_data_available', False)}"

('Response Status: success', 'Data Available: True')

In [53]:
data = resp.get('data', [])
df = pd.json_normalize(data)

df.head(3)

Unnamed: 0,id,communityName,upvote_ratio,url,media,label,num_comments,datetime,uri,source,dataType,content_size_bytes,createdAt,title,score,parentId,body,username,is_nsfw
0,t3_1noxi4q,r/Bitcoin,0.72,https://www.reddit.com/r/Bitcoin/comments/1nox...,[https://i.redd.it/3eddjkq360rf1.jpeg],r/bitcoin,14.0,2025-09-23T23:59:17+00:00,https://www.reddit.com/r/Bitcoin/comments/1nox...,REDDIT,post,557.0,2025-09-23T23:59:00+00:00,How many years to freedom? Depends on your wee...,21.0,,note: this chart factors in Bitcoin’s price ap...,Kazgarth_,False
1,t3_1noxan7,r/Bitcoin,0.67,https://www.reddit.com/r/Bitcoin/comments/1nox...,"[https://v.redd.it/5jy92phw40rf1, https://exte...",r/bitcoin,0.0,2025-09-23T23:49:47+00:00,https://www.reddit.com/r/Bitcoin/comments/1nox...,REDDIT,post,518.0,2025-09-23T23:49:00+00:00,Just shipped bitcoin gift tracker,1.0,,,sbounmy,False
2,t3_1nowsku,r/Bitcoin,0.26,https://www.reddit.com/r/Bitcoin/comments/1now...,"[https://i.redd.it/7bfi0tjx00rf1.jpg, https://...",r/bitcoin,9.0,2025-09-23T23:27:11+00:00,https://www.reddit.com/r/Bitcoin/comments/1now...,REDDIT,post,835.0,2025-09-23T23:27:00+00:00,Look at this clown,0.0,,Context: i asked the bitcoin community what th...,Abberate96,False


In [51]:
df.dtypes

id                     object
url                    object
upvote_ratio          float64
communityName          object
media                  object
label                  object
num_comments          float64
datetime               object
uri                    object
source                 object
dataType               object
content_size_bytes    float64
title                  object
createdAt              object
score                 float64
parentId               object
body                   object
username               object
is_nsfw                  bool
dtype: object

### Most Liked Posts

In [61]:
posts = df[df['dataType'] == 'post'].copy()

top_posts = (
    posts.sort_values('score', ascending=False)
            .loc[:, ['score','title','url','uri','username','createdAt','datetime','communityName','num_comments','upvote_ratio', 'id']]
            .head(5)
)
print(f"\nMost liked posts:")
for i, r in top_posts.reset_index(drop=True).iterrows():
    title = r['title'] if pd.notna(r['title']) else '(no title)'
    link = r['url'] if pd.notna(r['url']) else r['uri']
    ts = r['createdAt'] if pd.notna(r['createdAt']) else r['datetime']
    print(f"{i+1}. ❤️ {int(r['score'])} | {title} — by {r['username']} | {ts} | {r['communityName']}")
    print(f"   {link}")
    
    print()
    print('=====')
    print()


Most liked posts:
1. ❤️ 1071 | I was about to reset an older phone, and there was still a tab open... — by LazyLifeguard | 2025-09-23T14:38:00+00:00 | r/Bitcoin
   https://www.reddit.com/r/Bitcoin/comments/1noj60v/i_was_about_to_reset_an_older_phone_and_there_was/

=====

2. ❤️ 909 | How Bitcoin actually evolved over the years — by cjpogi1118 | 2025-09-23T17:51:00+00:00 | r/Bitcoin
   https://www.reddit.com/r/Bitcoin/comments/1nooat6/how_bitcoin_actually_evolved_over_the_years/

=====

3. ❤️ 568 | Bitcoin is money — by Fit_Negotiation_1207 | 2025-09-23T21:12:00+00:00 | r/Bitcoin
   https://www.reddit.com/r/Bitcoin/comments/1notkpx/bitcoin_is_money/

=====

4. ❤️ 477 | I bought 0.87 bitcoins today 🤣 — by Boring-Might-8058 | 2025-09-23T20:35:00+00:00 | r/Bitcoin
   https://www.reddit.com/r/Bitcoin/comments/1nosmij/i_bought_087_bitcoins_today/

=====

5. ❤️ 412 | The power of conviction and innovation, but only when you believe — by News-Principal-160 | 2025-09-23T12:25:00+00:00 | r/Bitc

### Most Influential Users

In [57]:
user_activity = (
    df.assign(item_count=1)
        .groupby('username', dropna=True)
        .agg(total_items=('item_count','sum'),
            total_score=('score','sum'))
        .reset_index()
        .sort_values(['total_items','total_score'], ascending=[False, False])
        .head(10)
)
print("\nMost users (by activity; tie-break by total score):")
for i, r in user_activity.reset_index(drop=True).iterrows():
    uname = r['username']
    items = int(r['total_items'])
    sc = int(r['total_score'])
    print(f"{i+1}. 👤 {uname} — {items} items, {sc} total score")


Most users (by activity; tie-break by total score):
1. 👤 Immediate-Coat-9162 — 3 items, 262 total score
2. 👤 Koji-c3w — 2 items, 194 total score
3. 👤 unthocks — 2 items, 18 total score
4. 👤 grzeszu82 — 2 items, 5 total score
5. 👤 LazyLifeguard — 1 items, 1071 total score
6. 👤 cjpogi1118 — 1 items, 909 total score
7. 👤 Fit_Negotiation_1207 — 1 items, 568 total score
8. 👤 Boring-Might-8058 — 1 items, 477 total score
9. 👤 News-Principal-160 — 1 items, 412 total score
10. 👤 QUESTION_NERD — 1 items, 386 total score


## Scrape YouTube Videos + Transcripts ( Beta )

In [88]:
start_dt.date().isoformat(), end_dt.date().isoformat()

('2025-09-17', '2025-09-24')

In [87]:
resp = await client.sn13.OnDemandData(
    source='youtube',
    usernames=None,
    keywords=None,
    start_date=start_dt.date().isoformat(),
    end_date=end_dt.date().isoformat(),
    limit=100, # up to 1000
)

f"Response Status: {resp['status']}", f"Data Available: {not resp['meta'].get('no_data_available', False)}"

('Response Status: success', 'Data Available: False')

In [74]:
data = resp.get('data', [])
df = pd.json_normalize(data)

df.head(3)

In [None]:
df.dtypes