# Twitter WebScraper

### Necessary Imports

In [56]:
#!pip install snscrape

In [57]:
import pandas as pd
from tqdm.notebook import tqdm
import snscrape.modules.twitter as sntwitter

### Working with a single tweet

In [58]:
# Define and initiate twitter search scraper
# Looking for recent tweets with #python
scraper = sntwitter.TwitterSearchScraper('#python')

In [59]:
# run method to get single tweet example (first retrieved)
for tweet in scraper.get_items():
    break

In [60]:
# Print all tweet information
tweet

Tweet(url='https://twitter.com/themis277/status/1597662900298317824', date=datetime.datetime(2022, 11, 29, 18, 44, 53, tzinfo=datetime.timezone.utc), content='تعلم أساسيات لغة آر (مادة تعليمية مصغرة)\nhttps://t.co/RPc04nmTfu\n#research #DataScience #MachineLearning #data #Rstats #Python #100DaysOfCode #coding #udemyfree #Java #javascript #Developers #DeepLearning #Analytics #bravome #برافو_مي #البيانات #علم_البيانات #التعلم_الآلة', renderedContent='تعلم أساسيات لغة آر (مادة تعليمية مصغرة)\npxl.to/r4ds-ar-unihan…\n#research #DataScience #MachineLearning #data #Rstats #Python #100DaysOfCode #coding #udemyfree #Java #javascript #Developers #DeepLearning #Analytics #bravome #برافو_مي #البيانات #علم_البيانات #التعلم_الآلة', id=1597662900298317824, user=User(username='themis277', id=1094022475736903687, displayname='Themis277', description='( Management Information Systems)!\nAn educational Platform Explains Topics about #datascience and MIS topics ..etc\nin Arabic and English\nBy: @fahadmre

In [61]:
type(tweet)

snscrape.modules.twitter.Tweet

In [62]:
tweet.date

datetime.datetime(2022, 11, 29, 18, 44, 53, tzinfo=datetime.timezone.utc)

In [63]:
tweet.id

1597662900298317824

In [64]:
tweet.content

'تعلم أساسيات لغة آر (مادة تعليمية مصغرة)\nhttps://t.co/RPc04nmTfu\n#research #DataScience #MachineLearning #data #Rstats #Python #100DaysOfCode #coding #udemyfree #Java #javascript #Developers #DeepLearning #Analytics #bravome #برافو_مي #البيانات #علم_البيانات #التعلم_الآلة'

In [65]:
tweet.user.username

'themis277'

In [66]:
tweet.likeCount

0

In [67]:
tweet.retweetCount

0

In [68]:
data = [
    tweet.date,
    tweet.id,
    tweet.content,
    tweet.user.username,
    tweet.likeCount,
    tweet.retweetCount
]

data

[datetime.datetime(2022, 11, 29, 18, 44, 53, tzinfo=datetime.timezone.utc),
 1597662900298317824,
 'تعلم أساسيات لغة آر (مادة تعليمية مصغرة)\nhttps://t.co/RPc04nmTfu\n#research #DataScience #MachineLearning #data #Rstats #Python #100DaysOfCode #coding #udemyfree #Java #javascript #Developers #DeepLearning #Analytics #bravome #برافو_مي #البيانات #علم_البيانات #التعلم_الآلة',
 'themis277',
 0,
 0]

### Working with multiple tweets

In [69]:
scraper = sntwitter.TwitterSearchScraper('#python')

tweets = []
n_tweets = 1000
for i, tweet in tqdm(enumerate(scraper.get_items()),total=n_tweets):
    
    data = [
        tweet.date,
        tweet.id,
        tweet.content,
        tweet.user.username,
        tweet.likeCount,
        tweet.retweetCount
        ]
    tweets.append(data)
    if i > n_tweets:
        break

  0%|          | 0/1000 [00:00<?, ?it/s]

In [70]:
len(tweets)

1002

In [71]:
tweet_df = pd.DataFrame(
    tweets, columns=['date', 'id', 'content', 'username', 'like_count', 'retweet_count']
)
tweet_df.head()

Unnamed: 0,date,id,content,username,like_count,retweet_count
0,2022-11-29 18:44:53+00:00,1597662900298317824,تعلم أساسيات لغة آر (مادة تعليمية مصغرة)\nhttp...,themis277,0,0
1,2022-11-29 18:44:50+00:00,1597662887048548353,Sa wakas. Day 12 aka number guessing game done...,debbiesoria,1,0
2,2022-11-29 18:44:46+00:00,1597662873870012416,Management Information Systems Student’s Journ...,themis277,0,0
3,2022-11-29 18:44:30+00:00,1597662803254726656,SOLANA PRICE PREDICTION\n\nPRICE : RISE⤴ in 60...,cryptojohnsmith,0,0
4,2022-11-29 18:44:29+00:00,1597662799630831616,@driscollis @SaveToNotion #thread #python,Adarssha,0,0


In [72]:
# Save to csv
# tweet_df.to_csv('python_tweets.csv')

### Scrape Elon Musks Tweets

#### Scrape Elon Musk's previous 1000 tweets

In [73]:
scraper = sntwitter.TwitterSearchScraper('from:elonmusk')
tweets = []
n_tweets = 1000
for i, tweet in tqdm(enumerate(scraper.get_items()),total=n_tweets):
    
    data = [
        tweet.date,
        tweet.id,
        tweet.user.username,
        tweet.content,
        tweet.likeCount,
        tweet.retweetCount
        ]
    tweets.append(data)
    if i > n_tweets:
        break

  0%|          | 0/1000 [00:00<?, ?it/s]

In [74]:
elon_tweet_df = pd.DataFrame(
    tweets, columns=['date', 'id', 'username', 'content', 'like_count', 'retweet_count']
)
elon_tweet_df.head()

Unnamed: 0,date,id,username,content,like_count,retweet_count
0,2022-11-29 18:43:30+00:00,1597662553194127360,elonmusk,The people have spoken …,1780,136
1,2022-11-29 18:34:00+00:00,1597660162487029760,elonmusk,"If Community Notes can correct me, then obviou...",6296,379
2,2022-11-29 18:25:12+00:00,1597657945978073088,elonmusk,@TheQuartering I think I have been very specific,1854,119
3,2022-11-29 16:33:32+00:00,1597629846611726336,elonmusk,@DavidSacks It’s a real problem. Apple and Goo...,87196,9893
4,2022-11-29 16:28:19+00:00,1597628531294769152,elonmusk,@cb_doge @stillgray @ezralevant @latimes That ...,4171,285


#### Scrape all of Elon Musk's tweets

In [75]:
scraper = sntwitter.TwitterSearchScraper('from:elonmusk')
tweets = []
for i, tweet in tqdm(enumerate(scraper.get_items())):
    
    data = [
        tweet.date,
        tweet.id,
        tweet.user.username,
        tweet.content,
        tweet.likeCount,
        tweet.retweetCount
        ]
    tweets.append(data)

0it [00:00, ?it/s]

In [76]:
len(tweets)

18387

In [79]:
elon_tweet_df = pd.DataFrame(
    tweets, columns=['date', 'id', 'username', 'content', 'like_count', 'retweet_count']
)
elon_tweet_df

Unnamed: 0,date,id,username,content,like_count,retweet_count
0,2022-11-29 18:43:30+00:00,1597662553194127360,elonmusk,The people have spoken …,4967,346
1,2022-11-29 18:34:00+00:00,1597660162487029760,elonmusk,"If Community Notes can correct me, then obviou...",7140,412
2,2022-11-29 18:25:12+00:00,1597657945978073088,elonmusk,@TheQuartering I think I have been very specific,1952,125
3,2022-11-29 16:33:32+00:00,1597629846611726336,elonmusk,@DavidSacks It’s a real problem. Apple and Goo...,87913,9970
4,2022-11-29 16:28:19+00:00,1597628531294769152,elonmusk,@cb_doge @stillgray @ezralevant @latimes That ...,4194,290
...,...,...,...,...,...,...
18382,2011-12-03 08:22:07+00:00,142881284019060736,elonmusk,That was a total non sequitur btw,142,15
18383,2011-12-03 08:20:28+00:00,142880871391838208,elonmusk,"Great Voltaire quote, arguably better than Twa...",84,33
18384,2011-12-01 10:29:04+00:00,142188458125963264,elonmusk,I made the volume on the Model S http://t.co/w...,76,13
18385,2011-12-01 09:55:11+00:00,142179928203460608,elonmusk,Went to Iceland on Sat to ride bumper cars on ...,190,22


In [80]:
# Save to csv
elon_tweet_df.to_csv('elon_tweets.csv')