In [1]:
import GetOldTweets3 as got
# Remember, if you do not have GOT3 installe, you first need to do it using !pip command

The cell below contains simplified version of the script you can use to verify if the output is correct.
In this cell you have to remember to manually change the following:
- setMaxTweets(9999) --> limitation on number of tweets to be scrapped. If you get an error, use smaller number.
- setSince('2015-04-16') --> use the yyyy-mm-dd format
- setUntil('2015-04-17') --> unless the city is small and with few tweets, trying to scrape more than one day will result in an error (421: too many requests)
- setNear('52.52, 13.40') --> use decimal notation; to double-check you can switch latitude with longitute to verify the output
- setWithin("50km") --> you can increase or decrease the radius, with setNear coordinates as a centre of the circle

In [2]:
# It creates, and later overwrites 'output.csv' file; Remember to force encoding to utf-8 to correctly represent diacritical marks
with open('output.csv', 'w', encoding="utf-8") as f:
    
    #This is the basic GetOldTweets3 script with the variables in ()
    tweetCriteria = got.manager.TweetCriteria().setMaxTweets(10).setSince('2014-04-20').setUntil('2014-04-21').setNear('48.13, 11.58').setWithin("55km");
    tweets = got.manager.TweetManager.getTweets(tweetCriteria);
    
    #Verification if the tweet.text is long enough (10 characters by default - you may change this)
    for tweet in tweets:
        if len(tweet.text) > 10:
            #Format of the data saved in the .csv file #Remove all commas from tweet text so csv file will read with corrent number of columns
            f.write(f"{tweet.id},{tweet.date},{tweet.username},{tweet.text.replace(',', '')}'\n'")

The script in the next cells allows you to see the number of tweets scrapped to the file and the tweets without opening the file.

In [3]:
# We ignore errors (non-unicode signs, like some emojis)
with open('output.csv', errors='ignore', encoding="utf-8") as f:
    row_count = sum(1 for row in f)
    print(row_count)

11


In [4]:
with open('output.csv', errors='ignore', encoding="utf-8") as f:
    print(f.read())

458031211999277056,2014-04-20 23:55:25+00:00,MunichDE,clear -> mostly cloudy temperature down 12°C -> 7°C humidity up 66% -> 87% wind 11km/h -> 6km/h pressure 1006mb falling'
'458031118990974976,2014-04-20 23:55:03+00:00,kartenquizde,Wo liegt Miesbach? http://www.kartenquiz.de/Miesbach #Miesbach #quiz'
'458030731894460416,2014-04-20 23:53:30+00:00,EthubWerner,@andreasdotorg @das_heilige Nicht verschlucken'
'458030701905195008,2014-04-20 23:53:23+00:00,lucianayuka,Dyed my hair reddish brown yesterday and I still can't get used to it every time I look at myself in the mirror!!'
'458030279391997952,2014-04-20 23:51:43+00:00,EthubWerner,@das_heilige Gut gekontert.Weiter so :-) @andreasdotorg'
'458030109602357248,2014-04-20 23:51:02+00:00,DigitalVeteran_,#Robocop ist visionär: Da wurde 1987 die Macht von privaten Sicherheitsfirmen und deren Einfluss auf staatliche Organisationen prophezeit.'
'458029972805144577,2014-04-20 23:50:29+00:00,maan_daa,@itsdudamartins mas eu não sou o inferno'
'45

And now the GOT3 function I created:
- by passing the values in the function () you can run the script for a given date and location,
- it will create separate .csv file,
- if you get an error, change the 'setMaxTweets(9999)' value in the function script
- you can also change the default radius of 50km, if the city is bigger or smaller
- the other variable that may need to be changed is the 'len(tweet.text)'

In [6]:
def GOTcity(start_date, stop_date, location, file_name):
    """Dates in 'yyyy-mm-dd' format; Location in decimals '48.86, 2.35' radius set at 55km, filename in 'Paris20140420.csv' format"""
    with open(file_name, 'w', encoding="utf-8") as f:
        tweetCriteria = got.manager.TweetCriteria().setMaxTweets(9999).setSince(start_date).setUntil(stop_date).setNear(location).setWithin("55km");
        tweets = got.manager.TweetManager.getTweets(tweetCriteria);
        for tweet in tweets:
            if len(tweet.text) > 10:
                f.write(f"{tweet.id},{tweet.date},{tweet.username},{tweet.text.replace(',', '')}'\n'")

## Munich

In [85]:
#20140420 
GOTcity('2014-04-20', '2014-04-21', '48.13, 11.58', 'Munich20140420.csv')

In [86]:
#20140720 
GOTcity('2014-07-20', '2014-07-21', '48.13, 11.58', 'Munich20140720.csv')

In [87]:
#20141020 
GOTcity('2014-10-20', '2014-10-21', '48.13, 11.58', 'Munich20141020.csv')

In [88]:
#20150120 
GOTcity('2015-01-20', '2015-01-21', '48.13, 11.58', 'Munich20150120.csv')

In [89]:
#20150420 
GOTcity('2015-04-20', '2015-04-21', '48.13, 11.58', 'Munich20150420.csv')

In [91]:
#20150720 
GOTcity('2015-07-20', '2015-07-21', '48.13, 11.58', 'Munich20150720.csv')

In [92]:
#20151020 
GOTcity('2015-10-20', '2015-10-21', '48.13, 11.58', 'Munich20151020.csv')

In [93]:
#20160120 
GOTcity('2016-01-20', '2016-01-21', '48.13, 11.58', 'Munich20160120.csv')

In [94]:
#20160420 
GOTcity('2016-04-20', '2016-04-21', '48.13, 11.58', 'Munich20160420.csv')

In [95]:
#20160720 
GOTcity('2016-07-20', '2016-07-21', '48.13, 11.58', 'Munich20160720.csv')

In [96]:
#20161020 
GOTcity('2016-10-20', '2016-10-21', '48.13, 11.58', 'Munich20161020.csv')

In [97]:
#20170120 
GOTcity('2017-01-20', '2017-01-21', '48.13, 11.58', 'Munich20170120.csv')

In [98]:
#20170420 
GOTcity('2017-04-20', '2017-04-21', '48.13, 11.58', 'Munich20170420.csv')

In [99]:
#20170720 
GOTcity('2017-07-20', '2017-07-21', '48.13, 11.58', 'Munich20170720.csv')

In [100]:
#20171020 
GOTcity('2017-10-20', '2017-10-21', '48.13, 11.58', 'Munich20171020.csv')

In [101]:
#20180120 
GOTcity('2018-01-20', '2018-01-21', '48.13, 11.58', 'Munich20180120.csv')

In [102]:
#20180420 
GOTcity('2018-04-20', '2018-04-21', '48.13, 11.58', 'Munich20180420.csv')

In [103]:
#20180720 
GOTcity('2018-07-20', '2018-07-21', '48.13, 11.58', 'Munich20180720.csv')

In [104]:
#20181020 
GOTcity('2018-10-20', '2018-10-21', '48.13, 11.58', 'Munich20181020.csv')

In [105]:
#20190120 
GOTcity('2019-01-20', '2019-01-21', '48.13, 11.58', 'Munich20190120.csv')

In [106]:
#20190420 
GOTcity('2019-04-20', '2019-04-21', '48.13, 11.58', 'Munich20190420.csv')

In [107]:
#20190720 
GOTcity('2019-07-20', '2019-07-21', '48.13, 11.58', 'Munich20190720.csv')

In [108]:
#20191020 
GOTcity('2019-10-20', '2019-10-21', '48.13, 11.58', 'Munich20191020.csv')

In [109]:
#20200120 
GOTcity('2020-01-20', '2020-01-21', '48.13, 11.58', 'Munich20200120.csv')

In [113]:
#20200420 
GOTcity('2020-04-20', '2020-04-21', '48.13, 11.58', 'Munich20200420.csv')

In [8]:
with open('Munich20140420.csv', errors='ignore', encoding="utf-8") as f:
    print(f.read())

458031211999277056,2014-04-20 23:55:25+00:00,MunichDE,clear -> mostly cloudy temperature down 12°C -> 7°C humidity up 66% -> 87% wind 11km/h -> 6km/h pressure 1006mb falling'
'458031118990974976,2014-04-20 23:55:03+00:00,kartenquizde,Wo liegt Miesbach? http://www.kartenquiz.de/Miesbach #Miesbach #quiz'
'458030731894460416,2014-04-20 23:53:30+00:00,EthubWerner,@andreasdotorg @das_heilige Nicht verschlucken'
'458030701905195008,2014-04-20 23:53:23+00:00,lucianayuka,Dyed my hair reddish brown yesterday and I still can't get used to it every time I look at myself in the mirror!!'
'458030279391997952,2014-04-20 23:51:43+00:00,EthubWerner,@das_heilige Gut gekontert.Weiter so :-) @andreasdotorg'
'458030109602357248,2014-04-20 23:51:02+00:00,DigitalVeteran_,#Robocop ist visionär: Da wurde 1987 die Macht von privaten Sicherheitsfirmen und deren Einfluss auf staatliche Organisationen prophezeit.'
'458029972805144577,2014-04-20 23:50:29+00:00,maan_daa,@itsdudamartins mas eu não sou o inferno'
'45

## Paris

In [7]:
#20140420 
GOTcity('2014-04-20', '2014-04-21', '48.8566, 2.3522', 'Paris20140420.csv')
#20140720 
GOTcity('2014-07-20', '2014-07-21', '48.8566, 2.3522', 'Paris20140720.csv')
#20141020 
GOTcity('2014-10-20', '2014-10-21', '48.8566, 2.3522', 'Paris20141020.csv')

In [None]:
#20150120 
GOTcity('2015-01-20', '2015-01-21', '48.8566, 2.3522', 'Paris20150120.csv')
#20150420 
GOTcity('2015-04-20', '2015-04-21', '48.8566, 2.3522', 'Paris20150420.csv')
#20150720 
GOTcity('2015-07-20', '2015-07-21', '48.8566, 2.3522', 'Paris20150720.csv')
#20151020 
GOTcity('2015-10-20', '2015-10-21', '48.8566, 2.3522', 'Paris20151020.csv')


In [None]:
#20160120 
GOTcity('2016-01-20', '2016-01-21', '48.8566, 2.3522', 'Paris20160120.csv')
#20160420 
GOTcity('2016-04-20', '2016-04-21', '48.8566, 2.3522', 'Paris20160420.csv')
#20160720 
GOTcity('2016-07-20', '2016-07-21', '48.8566, 2.3522', 'Paris20160720.csv')
#20161020 
GOTcity('2016-10-20', '2016-10-21', '48.8566, 2.3522', 'Paris20161020.csv')


In [None]:
#20170120 
GOTcity('2017-01-20', '2017-01-21', '48.8566, 2.3522', 'Paris20170120.csv')
#20170420 
GOTcity('2017-04-20', '2017-04-21', '48.8566, 2.3522', 'Paris20170420.csv')
#20170720 
GOTcity('2017-07-20', '2017-07-21', '48.8566, 2.3522', 'Paris20170720.csv')
#20171020 
GOTcity('2017-10-20', '2017-10-21', '48.8566, 2.3522', 'Paris20171020.csv')


In [None]:
#20180120 
GOTcity('2018-01-20', '2018-01-21', '48.8566, 2.3522', 'Paris20180120.csv')
#20180420 
GOTcity('2018-04-20', '2018-04-21', '48.8566, 2.3522', 'Paris20180420.csv')
#20180720 
GOTcity('2018-07-20', '2018-07-21', '48.8566, 2.3522', 'Paris20180720.csv')
#20181020 
GOTcity('2018-10-20', '2018-10-21', '48.8566, 2.3522', 'Paris20181020.csv')


In [None]:
#20190120 
GOTcity('2019-01-20', '2019-01-21', '48.8566, 2.3522', 'Paris20190120.csv')
#20190420 
GOTcity('2019-04-20', '2019-04-21', '48.8566, 2.3522', 'Paris20190420.csv')
#20190720 
GOTcity('2019-07-20', '2019-07-21', '48.8566, 2.3522', 'Paris20190720.csv')
#20191020 
GOTcity('2019-10-20', '2019-10-21', '48.8566, 2.3522', 'Paris20191020.csv')
#20200120 
GOTcity('2020-01-20', '2020-01-21', '48.8566, 2.3522', 'Paris20200120.csv')
#20200420 
GOTcity('2020-04-20', '2020-04-21', '48.8566, 2.3522', 'Paris20200420.csv')