# Scraping Twitter Using snscrape's Python Wrapper
<br>Package Github: https://github.com/JustAnotherArchivist/snscrape

<b>Dependencies: </b> 
- <b>Python</b> version must be <b>3.8</b> or higher.
- <b>Development version of snscrape</b>
- <b>Pandas</b>

In [34]:
# Run the pip install to install snscrape library from git
#!pip install git+https://github.com/JustAnotherArchivist/snscrape.git

In [35]:
# Imports
import snscrape.modules.twitter as sntwitter
import pandas as pd

# Query by Username
**Description**: The code will scrape Twitter for 250 tweets for the given username. These tweets are exported to a CSV file with Pandas.

In [36]:
# Setting variables to be used below
maxTweets = 250

# Creating list to append tweet data to
tweets_list1 = []

# username to scrape tweets from
user_name = 'tim_cook'

# Using TwitterSearchScraper to scrape data and append tweets to list
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(f'from:{user_name}').get_items()):
    if i > maxTweets:
        break
    tweets_list1.append([tweet.date, tweet.id, tweet.rawContent, tweet.user.username])

In [37]:
# Creating a dataframe from the tweets list above
tweets_df1 = pd.DataFrame(tweets_list1, columns=['Date_Time', 'Tweet Id', 'Text', 'Username'])

# Display first 5 entries from dataframe
tweets_df1.head()

Unnamed: 0,Date_Time,Tweet Id,Text,Username
0,2023-06-09 15:21:52+00:00,1667190281886912512,What an incredible WWDC! Thank you to develope...,tim_cook
1,2023-06-08 00:27:44+00:00,1666602875701837824,@JuanpaZurita Great seeing you at Apple Park!,tim_cook
2,2023-06-06 15:08:55+00:00,1666099857621385216,The new MacBook Air is the world’s best — and ...,tim_cook
3,2023-06-05 19:43:37+00:00,1665806601436160000,https://t.co/7JIDCKC5Ux,tim_cook
4,2023-06-05 19:43:37+00:00,1665806600261763072,Welcome to the era of spatial computing with A...,tim_cook


In [38]:
# Export dataframe into a CSV
tweets_df1.to_csv('username-tweets.csv', sep=',', index=False)

# Query by Text Search
**Description**: The code will scrape twitter for 500 tweets for the given text. The time range is set from January 1st, 2020 to June 11th, 2023, by a text search then provide a CSV file with Pandas.

In [39]:
# Setting variables to be used below
maxTweets = 100

# Creating list to append tweet data to
tweets_list2 = []


# Using TwitterSearchScraper to scrape data and append tweets to list
for i, tweet in enumerate(sntwitter.TwitterSearchScraper('its the elephants since:2020-01-01 until:2023-06-11').get_items()):
    if i > maxTweets:
        break
    tweets_list2.append([tweet.date, tweet.id, tweet.rawContent, tweet.user.username])
    
print("Scrapping Completed")

Scrapping Completed


In [40]:
# Creating a dataframe from the tweets list above
tweets_df2 = pd.DataFrame(tweets_list2, columns=['Date_Time', 'Tweet Id', 'Text', 'Username'])

# Display first 5 entries from dataframe
tweets_df2.head()

Unnamed: 0,Date_Time,Tweet Id,Text,Username
0,2023-06-10 18:05:01+00:00,1667593725252362246,"""The Elephants"" is one of Salvador Dali's most...",clarkehimself
1,2023-06-10 12:06:32+00:00,1667503510877765633,@Suparnastar @UKinMexico @LauraAlvarezJC @jere...,ElephantsRule
2,2023-06-10 02:30:39+00:00,1667358585230348288,@jemma16176693 @alexsaltertv @ava_marie_v @M_C...,AustinFarle
3,2023-06-10 02:23:19+00:00,1667356741716312064,"If trophy hunting is banned, what is your plan...",AustinFarle
4,2023-06-09 15:55:06+00:00,1667198645383340034,"@ianfoltan Sure, Australian onions might have ...",Ric4rdoTM


In [41]:
# Export dataframe into a CSV
tweets_df2.to_csv('text-query-tweets.csv', sep=',', index=False)