In [1]:
from __future__ import unicode_literals
import json 
import requests
import pandas 
import os
import youtube_dl
import numpy as np

In [2]:
# Function to load json file
def loadKeys(key_file:str):
    with open(key_file) as f:
        key_dict = json.load(f)
    return key_dict['api_key'], key_dict['api_secret'], key_dict['bearer_token'], key_dict['token'], key_dict['token_secret']

In [94]:
# Creating a url for the api to search from
def create_url():
    query = "(cat) -is:retweet has:media -has:videos lang:en  -is:retweet"
    # Tweet fields are adjustable.
    # Options include:
    # attachments, author_id, context_annotations,
    # conversation_id, created_at, entities, geo, id,
    # in_reply_to_user_id, lang, non_public_metrics, organic_metrics,
    # possibly_sensitive, promoted_metrics, public_metrics, referenced_tweets,
    # source, text, and withheld
    tweet_fields = "media.fields=type,url"
    expansions = "expansions=attachments.media_keys"
    max_results = "max_results=100"
    url = "https://api.twitter.com/2/tweets/search/recent?query={}&{}&{}&{}".format(
        query, tweet_fields, expansions, max_results
    )
    return url


In [95]:
# Creating authorization 
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

In [96]:
# Connectign to the url api with the token headers
def connect_to_endpoint(url, headers):
    response = requests.request("GET", url, headers=headers)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [97]:
# Creating json file from a response file and filename
def create_json(filename, json_response):
    with open(filename, "w") as write_file:
        json.dump(json_response, write_file, indent=4)

In [98]:
# loading up data of a json file
def load_json(filename):
    file = open(filename)
    data = json.load(file)
    file.close()
    return data

In [99]:
# Downloading gif file with given url 
def downloadGIF(url, filename):
    try:
        ydl_opts = {
            'outtmpl': 'gifs/{}.mp4'.format(filename)
        }
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        return True
    except:
        print("Could Not Download File")
        return False

In [100]:
# Class TweetData holds the mediaKey, TweetURL, Text, TweetID for one data object in a json file
class TweetData:
    def __init__(self, jsonData):
        # Media Key Of Tweet
        mediaKey = jsonData['attachments']['media_keys'][0]
        # Splitting the text into the text and url
        tempTextArray = jsonData['text'].rpartition("https://")
        # Creating URL
        tweetURL = tempTextArray[1] + tempTextArray[2]
        # Creating tweet text
        text = tempTextArray[0]
        # Setting mediaKey, URL, and text
        self.mediaKey = mediaKey
        self.tweetURL = tweetURL
        self.text = text
        self.tweetID = jsonData['id']

In [104]:
# Create CSVandGIFS 
def createCSVandGIFS(data:dict, curFileCounter:int):
    # Headers for the data
    headers = ["Tweet ID", "Media Key", "Tweet URL", "Tweet Text", "GIF Title"]
    # Initializing 2d array that will contain all of the data
    tweetData = np.array([headers])   
    # Counts the current eligble tweet number (only counting tweets with gifs)
    curFileCounter = curFileCounter
    # Iterate over all of the collected tweets
    for temp in data['data']:
        # Creating a tweet obejct that holds the data
        tempTweet = TweetData(temp)
        # Check if the current media tweet contains a gif
        if "16_" in tempTweet.mediaKey: 
            # Create File Name
            curGifTitle =  "GIF_File_{}".format(curFileCounter)
            # Try to download new file, if an error occurs and cant download gif, the file is not added
            if downloadGIF(tempTweet.tweetURL, curGifTitle):
                # If Downloading causes no error we add this twitter obeject to our array 
                curFileCounter += 1 
                # Create a numpy row to add to data
                curRow = np.array([[
                    tempTweet.tweetID,
                    tempTweet.mediaKey,
                    tempTweet.tweetURL,
                    tempTweet.text,
                    curGifTitle
                ]])
                # Adding indivisual data to whole data
                tweetData = np.append(tweetData, curRow, axis=0)   
    # Create csv of the data compiled
    tweetDF = pandas.DataFrame(data = tweetData[1:,:],  columns=tweetData[0])
    tweetDF.to_csv('twitterData.csv', index = False )
    return tweetDF

In [103]:
#def main()
api_key, api_secret, bear_token, token, token_secret = loadKeys("keys.json")
bearer_token = bear_token
url = create_url()
headers = create_headers(bearer_token)
json_response = connect_to_endpoint(url, headers)
create_json("data_file.json", json_response)
data = load_json("data_file.json")
tweetDF = createCSVandGIFS(data, 0)


200
[generic] E1r4RSWvmz: Requesting header
[generic] E1r4RSWvmz: Downloading webpage
[generic] E1r4RSWvmz: Extracting information
[redirect] Following redirect to https://twitter.com/pitt_geoff/status/1369801813378015236/photo/1
[twitter] 1369801813378015236: Downloading guest token
[twitter] 1369801813378015236: Downloading JSON metadata
[download] Destination: gifs\GIF_File_0.mp4
[download] 100% of 48.76KiB in 00:00                   
[generic] 1SbgbltG6F: Requesting header
[generic] 1SbgbltG6F: Downloading webpage
[generic] 1SbgbltG6F: Extracting information
[redirect] Following redirect to https://twitter.com/stabhergently/status/1369801743991767042/photo/1
[twitter] 1369801743991767042: Downloading guest token
[twitter] 1369801743991767042: Downloading JSON metadata
[download] Destination: gifs\GIF_File_1.mp4
[download] 100% of 178.33KiB in 00:00                  
[generic] WglztiEXMT: Requesting header
[generic] WglztiEXMT: Downloading webpage
[generic] WglztiEXMT: Extracting in

In [105]:
tweetDF

Unnamed: 0,Tweet ID,Media Key,Tweet URL,Tweet Text,GIF Title
0,1369801813378015236,16_1369801808130891777,https://t.co/E1r4RSWvmz,@gun_control_ca Voting PPC won’t elect the Lib...,GIF_File_0
1,1369801743991767042,16_1369801736119009281,https://t.co/1SbgbltG6F,Buying your cat a new bed just to watch her si...,GIF_File_1
2,1369801690497572866,16_1369801683040169984,https://t.co/WglztiEXMT,@B3thyVA this is that saddest cat i ever saw,GIF_File_2
3,1369801492442451968,16_1369801485937123330,https://t.co/BnvU8O4Eqk,@RWATXRI @essentialyonce4 All I keep hearing i...,GIF_File_3
4,1369800901658042376,16_1369800894611681284,https://t.co/33eINyKLaK,@BUZZNET Meg is the cat....,GIF_File_4
5,1369800251356381187,16_1369800245639528453,https://t.co/jcFefuXwSu,@UncleSnotty I’m in Herd “Grumpy Old Fart” and...,GIF_File_5
6,1369800194276003841,16_1369800186017452032,https://t.co/gOlJeIF1u2,@so_treu Remember that cat who told me about #...,GIF_File_6
7,1369800174801977344,16_1369800167403192325,https://t.co/ymJ9cmQfeT,@jillycp ⚠️ Disclaimer: Fairy wishes are subje...,GIF_File_7
8,1369800059643179008,16_1369800049115402241,https://t.co/XzVPIzIrL9,@jasonzada The wise old cat understands.,GIF_File_8
