In [1]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import pandas as pd
from openpyxl import load_workbook

import configparser 
config = configparser.ConfigParser()
config.read('config.ini')
ip = config['DEFAULT']['IP']
port = config['DEFAULT']['MongoDB-Port']

from pymongo import MongoClient
client = MongoClient(ip, int(port))

import twitter

consumer_key = config['TWITTER']['CK']
consumer_secret = config['TWITTER']['CS']
access_token_key = config['TWITTER']['ATK']
access_token_secret = config['TWITTER']['ATS']

api = twitter.Api(consumer_key=consumer_key,
                  consumer_secret=consumer_secret,
                  access_token_key=access_token_key,
                  access_token_secret=access_token_secret,
                  tweet_mode= 'extended')


In [2]:
db_twitter = client["Twitter"]

In [3]:
search_terms = ['notallmen', 'feminazi', 'vaw', 'family violence', 'domestic violence', 'toxic masculinity', 'Rosie Batty', 'OurWatchAus', 'freefromviolence', 'mrbenjaminlaw', 'howiwillchange']

In [4]:
tweets_by_term = {}
for term in search_terms:
    tweets_by_term[term] = list(db_twitter["twitter-temp"].find({"$text": {"$search": "\"" + term + "\"", "$caseSensitive": False}}))
    print(term, ':', len(tweets_by_term[term]))

notallmen : 106
feminazi : 146
vaw : 839
family violence : 3501
domestic violence : 3469
toxic masculinity : 475
Rosie Batty : 123
OurWatchAus : 419
freefromviolence : 91
mrbenjaminlaw : 4071
howiwillchange : 96


In [12]:
# get full text if possible
print('Text extended:')
for term in search_terms:
    count = 0
    for tweet in tweets_by_term[term][:10]:
        if tweet['doc']['truncated']:
            try: 
                tweet['doc']['text'] = api.GetStatus(tweet['id']).full_text
                count += 1
            except:
                pass
    print(term, ':', count)

Text extended:
notallmen : 0
feminazi : 0
vaw : 2
family violence : 2
domestic violence : 3
toxic masculinity : 0
Rosie Batty : 0
OurWatchAus : 3
freefromviolence : 0
mrbenjaminlaw : 0
howiwillchange : 0


In [13]:
# add link field
print('Link added:')
for term in search_terms:
    count = 0
    for tweet in tweets_by_term[term]:
        link = 'twitter.com/' + tweet['doc']['user']['screen_name'] + '/status/' + tweet['id']
        tweet['doc']['link_to_tweet'] = link
        count += 1
    print(term, ':', count)

Link added:
notallmen : 106
feminazi : 146
vaw : 839
family violence : 3501
domestic violence : 3469
toxic masculinity : 475
Rosie Batty : 123
OurWatchAus : 419
freefromviolence : 91
mrbenjaminlaw : 4071
howiwillchange : 96


In [43]:
# aggregate by month
tweets_by_month_term = {}
for m in range(6, 13):
    tweets_by_month_term[m] = {}
    for term in search_terms:
        tweets_by_month_term[m][term] = []

In [44]:
for term in search_terms:
    for tweet in tweets_by_term[term]:
        m = tweet['key'][2]
        doc = tweet['doc']
        doc['entities'] = 0
        doc['metadata'] = 0
        doc['user'] = 0
        tweets_by_month_term[m][term].append(doc)

In [45]:
# output to file
for m in range(6, 13):
    for term in search_terms:
        df =  pd.DataFrame(tweets_by_month_term[m][term])
        
        path = 'output/2017-' + str(m) + '.xlsx'
        book = load_workbook(path)
        writer = pd.ExcelWriter(path, engine='openpyxl')
        writer.book = book
        writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

        df.to_excel(writer, term) 
        writer.save()
        
print('Save to Excel files complete')

Save to Excel files complete
