# Get Old Tweets

In [1]:
import got3 as got
import sqlite3
import pandas as pd
import re
import datetime

`validate` function (for checking datetime format)

In [2]:
def validate(date_text):
    try:
        datetime.datetime.strptime(date_text, '%Y-%m-%d')
    except ValueError:
        raise ValueError("Incorrect data format, should be YYYY-MM-DD")

`project_setup` function

In [18]:
def project_setup():
    projname = input("Project name? ")
    global dbname
    dbname = (projname + ".db")
    
    print("Searches can by done by search term(s), by username(s), or by both in combination")
    
    # QUERYSEARCH
    print("")
    print("Search terms, one or several separated by comma")
    print("Leave empty to only search by username")
    global keywords
    keywords = ""
    keywords = input('e.g. monkey,"time for bananas",#ape2020,"donkey kong": ')
    
    # USERNAMES
    print("")
    print("Usernames, one or several separated by space")
    print("Leave empty to only search by terms")
    global usernames
    usernames = ""
    usernames = input('e.g. @nintendo @jupyter: ')
    usernames = [un for un in usernames.split()]
    
    # DATES
    print("")
    print("Enter date range for search in YYYY-NN-DD format")
    global since
    since = (input("start date UTC (included in search): "))
    validate(since)
    global until
    until = (input("end date UTC (excluded from search): "))
    validate(until)
    
    # TOPTWEETS
    print("")
    print("Do you want to get only the Top Tweets?")
    global toptweets
    top_t = input("y/n? ")
    if top_t == "y":
        toptweets = True
    else:
        toptweets = False
    
    #MAXTWEETS
    print("")
    print("\nEnter maximum number of tweets to get per keyword, or set 0 to get all possible tweets")
    global maxtweets
    maxtweets = (input("max tweets "))
    if maxtweets.isnumeric():
        maxtweets = int(maxtweets)
        pass
    else:
        print("You did not enter a numeric value")
        sys.exit()

`create_database` function

In [4]:
def create_database():
    try:
        conn = sqlite3.connect(dbname)
        c = conn.cursor()
        c.execute("""CREATE TABLE tweets (
        tweet_id TEXT,
        author TEXT,
        in_reply_to TEXT,
        tweet TEXT,
        date TEXT,
        retweets INT,
        favourites INT,
        mentions TEXT,
        hashtags TEXT,
        geo TEXT)
        """)
        conn.close()
    except:
        print("A database with this name already exists")
        sys.exit()       

`run_search` function

In [27]:
def run_search():
    
    for kw in keywords.split(","):
        
        conn = sqlite3.connect(dbname)
    
        tweetCriteria = got.manager.TweetCriteria()

        # Set the search parameters that we always set       
        tweetCriteria.setMaxTweets(maxtweets)
        tweetCriteria.setSince(since)
        tweetCriteria.setUntil(until)
        tweetCriteria.setTopTweets(toptweets)
        tweetCriteria.setEmoji("unicode")

        if len(keywords) != 0:
            tweetCriteria.setQuerySearch(kw)
        if len(usernames) != 0:
            tweetCriteria.setUsername(usernames)
        

        tweets = got.manager.TweetManager.getTweets(tweetCriteria)
        for t in tweets:
            tweet_id = t.id
            author = t.username
            in_reply_to = t.to
            tweet = t.text
            date = t.date
            retweets = t.retweets
            favourites = t.favorites
            mentions = t.mentions
            hashtags = t.hashtags
            geo = t.geo
            
            
            conn.execute('INSERT INTO tweets (tweet_id, author, in_reply_to, tweet, date, retweets, favourites, mentions, hashtags,geo) VALUES (?,?,?,?,?,?,?,?,?,?)',\
                         (tweet_id, author, in_reply_to, tweet, date, retweets, favourites, mentions, hashtags, geo))
            conn.commit()

`remove_duplicates` function

In [6]:
def remove_duplicates():

    conn = sqlite3.connect(dbname)
    cur = conn.cursor()
    cur.execute("CREATE TABLE temp_table as SELECT DISTINCT * FROM tweets")
    cur.execute("DELETE from tweets")
    conn.commit()

    cur.execute("INSERT INTO tweets SELECT * FROM temp_table")
    cur.execute("DELETE from temp_table")
    conn.commit()

`preview_data` function

In [7]:
def preview_data():
    conn = sqlite3.connect(dbname)
    df = pd.read_sql_query("SELECT * FROM tweets", conn)
    print(df.head(20))

In [23]:
def main():
    project_setup()
    create_database()
    run_search()
    remove_duplicates()
    #preview_data()

In [28]:
main()

Project name?  asfj0jå4


Searches can by done by search term(s), by username(s), or by both in combination

Search terms, one or several separated by comma
Leave empty to only search by username


e.g. monkey,"time for bananas",#ape2020,"donkey kong":  monkey,"love this",#metoo



Usernames, one or several separated by space
Leave empty to only search by terms


e.g. @nintendo @jupyter:  



Enter date range for search in YYYY-NN-DD format


start date UTC (included in search):  2015-01-01
end date UTC (excluded from search):  2020-01-01



Do you want to get only the Top Tweets?


y/n?  y




Enter maximum number of tweets to get per keyword, or set 0 to get all possible tweets


max tweets  10


               tweet_id          author      in_reply_to  \
0   1212159343929393152   VickieLFisher  VictorF05184461   
1   1212158659565932551     homechekker     TheJasonPugh   
2   1212155716670234626     ultimatejjp             None   
3   1212155477817094146   SinbadTattCat             None   
4   1212154384349646850    Monkey_Pants             None   
5   1212153904525455360     sams_monkey             None   
6   1212150199814098945      wildstersx             None   
7   1212149867276980224     PostCultRev             None   
8   1212149077137215488  claricecsorcha             None   
9   1212146709507084288     evangerules             None   
10  1212161306683305984       ElyseFitz             None   
11  1212160838414618625    iamwillsteel             None   
12  1212160705601904640     DEEJAYGARNA             None   
13  1212160283118096384   thewritertype             None   
14  1212160206718865414    badniggafela             None   
15  1212159827008339969  lizzysawrusrex 