# Extracting Unique Tweets from Twitter using Developer API
###### Author: Abdullah Altammami; Jing Gao; Nathanael George;
###### Weill Cornell Graduate School of Medical Sciences, New York, United States

## Import the Tweepy package and setup the user API

In [None]:
import tweepy

In [None]:
consumer_key = "same as api key"  
consumer_secret = "same as api secret" 
access_key = "Your access key"
access_secret = "Your access secret"

# Not required:
# bearer_token = "Your bearer token"

In [None]:
# Twitter authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)   
auth.set_access_token(access_key, access_secret) 

# Creating an API object 
api = tweepy.API(auth)

## Extract the unique tweets
* Conditions:
  * Query: "Mask" and "NYC"
  * Language: English
  * Tweet mode: "Extended" ---- will contain the full text of the tweets when the tweets are long
  * `items(3000)`: Extract 3000 tweets

* Filter the result:
  * `if "RT @" not in text`: exclude all the retweets since they might not be objective enough to represent the public opinion towards the policies


In [None]:
# Create 4 empty lists to store the measurements

text_list = []
fav_count = []
ret_count = []
time_created = []

# Extract the tweets into one single variable

hashtag_tweets = tweepy.Cursor(api.search, q=["Mask", "NYC"], lang = "en", tweet_mode='extended').items(3000)

# Filter the tweets

for i, tweet in enumerate(hashtag_tweets):
    text = tweet._json["full_text"]
    if "RT @" not in text:
      print(i)
      print(text)
      print(tweet.favorite_count)
      print(tweet.retweet_count)
      print(tweet.created_at)
      print("-------------------")

      text_list.append(text)
      
      fav_count.append(tweet.favorite_count)
      
      ret_count.append(tweet.retweet_count)
      
      time_created.append(tweet.created_at)



In [None]:
# Check the length of the text list

len(text_list)

319

In [None]:
# Check the length of the favorite count (should be same as text list)

len(fav_count)

319

## Export the data

In [None]:
# Transform the data into dataframe

import pandas as pd

data = pd.DataFrame({"text": text_list,
                     "fav_count": fav_count,
                     "ret_count": ret_count,
                     "time_created": time_created})


In [None]:
data

Unnamed: 0,text,fav_count,ret_count,time_created
0,"This is the spirit of NYC, @NYCMayor! Take a w...",1,0,2022-03-31 00:49:21
1,@safeschoolsny @RepBillFoster @RepPerlmutter @...,0,0,2022-03-31 00:46:12
2,"@graciefacelove Yes, there is. And the vaccina...",1,0,2022-03-31 00:27:07
3,@patricklsimpson Not surprised Nassau county i...,0,0,2022-03-30 23:19:54
4,@NYCComptroller @CMShahanaHanif @CabanD22 We h...,1,0,2022-03-30 23:12:03
...,...,...,...,...
314,@KLM_UK Heyyyy. Long time happy customer here....,0,0,2022-03-26 11:54:16
315,@HughThunkIt TWO WEEKS AFTER THEY DROPPED MASK...,13,1,2022-03-26 11:24:58
316,Is now really the time to do away with mask ma...,2,0,2022-03-26 11:20:38
317,@Noam_NYC @BtSIsrael It’s time to pull the mas...,0,0,2022-03-26 11:07:06


In [None]:
# Convert the dataframe into Excel file
# You can also convert the data directly into csv format using 
# data.to_csv("data.csv", index=False)

data.to_excel("data.xlsx", index=False)