In [1]:
from pprint import pprint
import json
import csv
import tweepy
import yaml
import time

from pathlib import Path
from time import sleep
import progressbar
import re

In [2]:
# Set up cache directories
tweet_cache_dir = Path('/Users/kallewesterling/_twitter_cache/tweets/')
user_cache_dir = Path('/Users/kallewesterling/_twitter_cache/users/')

if not tweet_cache_dir.is_dir(): tweet_cache_dir.mkdir(parents=True)
if not user_cache_dir.is_dir(): user_cache_dir.mkdir(parents=True)

In [3]:
# Set up tweepy
class _TwitterCredentials():

    def __init__(self):
        with open('../conference-documentation/credentials.yml') as f: self._ = yaml.load(f)

    def __getitem__(self, i):
        return self._[i]

twitter_credentials = _TwitterCredentials()

auth = tweepy.OAuthHandler(twitter_credentials['consumer_key'], twitter_credentials['consumer_secret'])
auth.set_access_token(twitter_credentials['access_token'], twitter_credentials['access_token_secret'])
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [19]:
def expand_tags_tsv(file, _filter=False, filter_field=None, filter_value=None, done=[]):
    if Path(file).is_file(): _ = [Path(file)]
    elif Path(file).is_dir(): _ = Path(file).glob(pattern="*.tsv")
    else:
        raise RuntimeError(f"Cannot interpret passed argument: {file}.")
    for file in _:
        if file.name not in done:
            empty_rows, unable_to_interpret, saved_from_twitter, saved_from_tags = 0, 0, 0, 0

            with Path(file).open("r") as f:
                _len = len(f.readlines())
            with Path(file).open("r") as f:
                print(f"Reading {Path(file).name}...")
                time.sleep(1)
                bar = progressbar.ProgressBar(max_value=_len).start()
                reader = csv.DictReader(f, delimiter='\t')
                for i, rows in enumerate(reader):
                    stop = False

                    bar.update(i)
                    if not rows['created_at'] and not rows['from_user'] and not rows['text']:
                        pass
                    try:
                        id_int = int(rows['id_str'])
                        _dict = tags_to_dict(rows)
                        if _filter: 
                            try:
                                if filter_value in _dict[filter_field]: stop = False
                                else: stop = True
                            except TypeError:
                                stop = True

                        if not stop:
                            tweet_cache = tweet_cache_dir / _dict["id_str"]
                            if not tweet_cache.is_file():
                                ## First, check twitter for tweet here... 
                                try:
                                    live_tweet = api.get_status(_dict['id_str'], tweet_mode="extended")
                                    _json = live_tweet._json
                                    _json['json_source'] = 'Twitter'

                                    _json_user = _json['user']
                                    _json['user'] = _json_user['id']

                                    with Path(tweet_cache).open("w+") as f:
                                        json.dump(_json, f)

                                    user_cache = user_cache_dir / str(_json_user["id"])
                                    if not user_cache.is_file():
                                        with Path(user_cache).open("w+") as f:
                                            _json_user['json_source'] = 'Twitter'
                                            json.dump(_json_user, f)

                                    saved_from_twitter += 1

                                except tweepy.TweepError as e:
                                    ## No tweet available:
                                    with Path(tweet_cache).open("w+") as f:
                                        _dict['error'] = str(e)
                                        json.dump(_dict, f)
                                    saved_from_tags += 1

                                    try:
                                        user_cache = user_cache_dir / str(_dict["user"]["id_str"])
                                        if not user_cache.is_file():
                                            live_user = api.get_user(_dict["user"]["id_str"])
                                            _json_user = live_user._json

                                            # remove the user's latest status
                                            try: del _json_user['status']
                                            except KeyError: pass

                                            _json_user['json_source'] = 'Twitter'

                                            with Path(user_cache).open("w+") as f:
                                                json.dump(_json_user, f)

                                    except tweepy.TweepError as e:
                                        if not user_cache.is_file():
                                            _json_user = {"error": str(e)}
                                            with Path(user_cache).open("w+") as f:
                                                json.dump(_json_user, f)


                    except ValueError:
                        if rows['id_str'] == "":
                            empty_rows += 1
                        else:
                            try:
                                # print(f"Warning: Could not interpret row {i} in file {Path(file).name} due to unintelligibility but trying to find tweet ID...")
                                _ = re.search(pattern="\/statuses\/([0-9]+)'\)", string=str(rows))
                                g = _.groups()
                                # print(f"Good news! Found ID: {g}")
                                tweet_cache = tweet_cache_dir / g[0]
                                if not tweet_cache.is_file():
                                    ## First, check twitter for tweet here... 
                                    try:
                                        if _filter: 
                                            try:
                                                if filter_value in _dict[filter_field]: stop = False
                                                else: stop = True
                                            except TypeError:
                                                stop = True

                                        if not stop:
                                            live_tweet = api.get_status(g[0], tweet_mode="extended")
                                            _json = live_tweet._json
                                            _json['json_source'] = 'Twitter'

                                            _json_user = _json['user']
                                            _json['user'] = _json_user['id']

                                            with Path(tweet_cache).open("w+") as f:
                                                json.dump(_json, f)

                                            user_cache = user_cache_dir / str(_json_user["id"])
                                            if not user_cache.is_file():
                                                with Path(user_cache).open("w+") as f:
                                                    _json_user['json_source'] = 'Twitter'
                                                    json.dump(_json_user, f)
                                    except tweepy.TweepError as e:
                                        ## No tweet available:
                                        with Path(tweet_cache).open("w+") as f:
                                            _dict['error'] = str(e)
                                            json.dump(_dict, f)
                                    except:
                                        print(f"An error occurred in file {f}")
                            except:
                                unable_to_interpret += 1
                bar.finish()
                print(f"---- done processing {Path(file).name} - report: -----")
                print(f"- {empty_rows} empty rows encountered.")
                print(f"- Unable to interpret {unable_to_interpret} rows.")
                print("\n")
                print(f"- {saved_from_twitter} tweets saved from Twitter.")
                print(f"- {saved_from_tags} tweets saved from TAGS.")
                print("\n")

def tags_to_dict(rows):
    if not rows['id_str']:
        print("STOP")
        pprint(rows)
        exit()
    else:
        _dict = {
            'created_at': rows['created_at'],
            'id': int(rows['id_str']),
            'id_str': rows['id_str'],
                            'from_user': rows['from_user'],
            'full_text': rows['text'],
            'geo_coordinates': rows['geo_coordinates'],
            'lang': rows['user_lang'],
            'in_reply_to_user_id_str': rows['in_reply_to_user_id_str'],
            'in_reply_to_screen_name': rows['in_reply_to_screen_name'],
            'user': {
                'id_str': rows['from_user_id_str'],
                'followers_count': rows['user_followers_count'],
                'friends_count': rows['user_friends_count'],
            },
            'in_reply_to_status_id_str': rows['in_reply_to_status_id_str'],
            'source': rows['source'],
            'profile_image_url': rows['profile_image_url'],
            'entities_str': rows['entities_str'],
            'json_source': "TAGS"
        }
    return(_dict)

In [20]:
# The expand_tags_tsv function accepts whole directories or, if you prefer, individual TAGS archive sheets, saved as .tsv files
'''
expand_tags_tsv('../../datasets/tags-tsv/burlesque/TAGS - burlesque 1 - Archive.tsv', _filter=True, filter_field="full_text", filter_value="burlesque") # done
'''

'\nexpand_tags_tsv(\'../../datasets/tags-tsv/burlesque/TAGS - burlesque 1 - Archive.tsv\', _filter=True, filter_field="full_text", filter_value="burlesque") # done\n'

In [21]:
'''
expand_tags_tsv('../../datasets/tags-tsv/burlesque/TAGS - burlesque 10 - Archive.tsv', _filter=True, filter_field="full_text", filter_value="burlesque")
'''

'\nexpand_tags_tsv(\'../../datasets/tags-tsv/burlesque/TAGS - burlesque 10 - Archive.tsv\', _filter=True, filter_field="full_text", filter_value="burlesque")\n'

In [25]:
expand_tags_tsv('../../datasets/tags-tsv/burlesque/', _filter=True, filter_field="full_text", filter_value="burlesque", done=['TAGS - burlesque 60 - Archive.tsv'])

Reading TAGS - burlesque 49 - Archive.tsv...


100% (44922 of 44922) |##################| Elapsed Time: 0:00:04 Time:  0:00:04


---- done processing TAGS - burlesque 49 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 17 - Archive.tsv...


100% (47675 of 47675) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 17 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 39 - Archive.tsv...


100% (44997 of 44997) |##################| Elapsed Time: 0:00:03 Time:  0:00:03


---- done processing TAGS - burlesque 39 - Archive.tsv - report: -----
- 123 empty rows encountered.
- Unable to interpret 131 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 13 - Archive.tsv...


100% (56163 of 56163) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 13 - Archive.tsv - report: -----
- 68 empty rows encountered.
- Unable to interpret 3 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 14 - Archive.tsv...


100% (48854 of 48854) |##################| Elapsed Time: 0:00:03 Time:  0:00:03


---- done processing TAGS - burlesque 14 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 4 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 34 (1) - Archive.tsv...


100% (50410 of 50410) |##################| Elapsed Time: 0:00:03 Time:  0:00:03


---- done processing TAGS - burlesque 34 (1) - Archive.tsv - report: -----
- 73 empty rows encountered.
- Unable to interpret 3 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 10 - Archive.tsv...




100% (97144 of 97144) |##################| Elapsed Time: 0:00:13 Time:  0:00:13


---- done processing TAGS - burlesque 10 - Archive.tsv - report: -----
- 222 empty rows encountered.
- Unable to interpret 7 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 29 - Archive.tsv...


100% (51810 of 51810) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 29 - Archive.tsv - report: -----
- 69 empty rows encountered.
- Unable to interpret 1 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 8 - Archive.tsv...


100% (117096 of 117096) |################| Elapsed Time: 0:00:11 Time:  0:00:11


---- done processing TAGS - burlesque 8 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 4 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 33 (2) - Archive.tsv...


100% (49323 of 49323) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 33 (2) - Archive.tsv - report: -----
- 75 empty rows encountered.
- Unable to interpret 10 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 59 - Archive.tsv...










100% (148133 of 148133) |################| Elapsed Time: 0:00:18 Time:  0:00:18


---- done processing TAGS - burlesque 59 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 38 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 2 - Archive.tsv...




100% (117684 of 117684) |################| Elapsed Time: 0:00:13 Time:  0:00:13


---- done processing TAGS - burlesque 2 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 22 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 44 - Archive.tsv...


100% (43580 of 43580) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 44 - Archive.tsv - report: -----
- 74 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 53 - Archive.tsv...




100% (59842 of 59842) |##################| Elapsed Time: 0:00:08 Time:  0:00:08


---- done processing TAGS - burlesque 53 - Archive.tsv - report: -----
- 117 empty rows encountered.
- Unable to interpret 69 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 6 - Archive.tsv...


100% (117661 of 117661) |################| Elapsed Time: 0:00:10 Time:  0:00:10


---- done processing TAGS - burlesque 6 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 7 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 57 - Archive.tsv...


100% (53927 of 53927) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 57 - Archive.tsv - report: -----
- 45 empty rows encountered.
- Unable to interpret 66 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 40 - Archive.tsv...


100% (44846 of 44846) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 40 - Archive.tsv - report: -----
- 69 empty rows encountered.
- Unable to interpret 204 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 23 - Archive.tsv...




100% (50623 of 50623) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 23 - Archive.tsv - report: -----
- 46 empty rows encountered.
- Unable to interpret 11 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 30 - Archive.tsv...


100% (53126 of 53126) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 30 - Archive.tsv - report: -----
- 82 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 27 - Archive.tsv...


100% (52794 of 52794) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 27 - Archive.tsv - report: -----
- 37 empty rows encountered.
- Unable to interpret 1 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 19 - Archive.tsv...


100% (50265 of 50265) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 19 - Archive.tsv - report: -----
- 39 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 37 - Archive.tsv...


100% (47124 of 47124) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 37 - Archive.tsv - report: -----
- 99 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 20 - Archive.tsv...


100% (45062 of 45062) |##################| Elapsed Time: 0:00:03 Time:  0:00:03


---- done processing TAGS - burlesque 20 - Archive.tsv - report: -----
- 1 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 24 - Archive.tsv...


100% (50305 of 50305) |##################| Elapsed Time: 0:00:04 Time:  0:00:04


---- done processing TAGS - burlesque 24 - Archive.tsv - report: -----
- 82 empty rows encountered.
- Unable to interpret 4 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 33 - Archive.tsv...




100% (63836 of 63836) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 33 - Archive.tsv - report: -----
- 10395 empty rows encountered.
- Unable to interpret 8 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 34 (2) - Archive.tsv...


100% (54244 of 54244) |##################| Elapsed Time: 0:00:04 Time:  0:00:04


---- done processing TAGS - burlesque 34 (2) - Archive.tsv - report: -----
- 98 empty rows encountered.
- Unable to interpret 6 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 1 - Archive.tsv...






100% (84122 of 84122) |##################| Elapsed Time: 0:00:12 Time:  0:00:12


---- done processing TAGS - burlesque 1 - Archive.tsv - report: -----
- 346 empty rows encountered.
- Unable to interpret 27 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 50 - Archive.tsv...


100% (39771 of 39771) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 50 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 46 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 47 - Archive.tsv...




100% (43037 of 43037) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 47 - Archive.tsv - report: -----
- 48 empty rows encountered.
- Unable to interpret 174 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 5 - Archive.tsv...








100% (117559 of 117559) |################| Elapsed Time: 0:00:11 Time:  0:00:11


---- done processing TAGS - burlesque 5 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 17 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 43 - Archive.tsv...


100% (44820 of 44820) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 43 - Archive.tsv - report: -----
- 44 empty rows encountered.
- Unable to interpret 56 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 54 - Archive.tsv...


100% (55478 of 55478) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 54 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 116 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 32 - Archive.tsv...


100% (59200 of 59200) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 32 - Archive.tsv - report: -----
- 342 empty rows encountered.
- Unable to interpret 4 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 25 - Archive.tsv...


100% (49488 of 49488) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 25 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 10 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 21 - Archive.tsv...


100% (53083 of 53083) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 21 - Archive.tsv - report: -----
- 109 empty rows encountered.
- Unable to interpret 3 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 36 - Archive.tsv...


100% (57805 of 57805) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 36 - Archive.tsv - report: -----
- 128 empty rows encountered.
- Unable to interpret 4 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 18 - Archive.tsv...




100% (48175 of 48175) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 18 - Archive.tsv - report: -----
- 56 empty rows encountered.
- Unable to interpret 2 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 55 - Archive.tsv...


100% (53581 of 53581) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 55 - Archive.tsv - report: -----
- 50 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 42 - Archive.tsv...


100% (45291 of 45291) |##################| Elapsed Time: 0:00:04 Time:  0:00:04


---- done processing TAGS - burlesque 42 - Archive.tsv - report: -----
- 80 empty rows encountered.
- Unable to interpret 87 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 4 - Archive.tsv...








100% (117617 of 117617) |################| Elapsed Time: 0:00:11 Time:  0:00:11


---- done processing TAGS - burlesque 4 - Archive.tsv - report: -----
- 370 empty rows encountered.
- Unable to interpret 18 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 46 - Archive.tsv...


100% (43045 of 43045) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 46 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 42 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 51 - Archive.tsv...




100% (41059 of 41059) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 51 - Archive.tsv - report: -----
- 90 empty rows encountered.
- Unable to interpret 47 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 41 - Archive.tsv...


100% (45423 of 45423) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 41 - Archive.tsv - report: -----
- 93 empty rows encountered.
- Unable to interpret 80 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 56 - Archive.tsv...














100% (51358 of 51358) |##################| Elapsed Time: 0:00:08 Time:  0:00:08


---- done processing TAGS - burlesque 56 - Archive.tsv - report: -----
- 66 empty rows encountered.
- Unable to interpret 75 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 7 - Archive.tsv...




100% (117610 of 117610) |################| Elapsed Time: 0:00:13 Time:  0:00:13


---- done processing TAGS - burlesque 7 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 53 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 52 - Archive.tsv...


100% (51130 of 51130) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 52 - Archive.tsv - report: -----
- 54 empty rows encountered.
- Unable to interpret 2 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 45 - Archive.tsv...


100% (50142 of 50142) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 45 - Archive.tsv - report: -----
- 119 empty rows encountered.
- Unable to interpret 11 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 3 - Archive.tsv...










100% (117450 of 117450) |################| Elapsed Time: 0:00:11 Time:  0:00:11


---- done processing TAGS - burlesque 3 - Archive.tsv - report: -----
- 1 empty rows encountered.
- Unable to interpret 7 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 26 - Archive.tsv...




100% (47482 of 47482) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 26 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 5 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 31 - Archive.tsv...


100% (52269 of 52269) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 31 - Archive.tsv - report: -----
- 115 empty rows encountered.
- Unable to interpret 20 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 35 - Archive.tsv...




100% (55924 of 55924) |##################| Elapsed Time: 0:00:06 Time:  0:00:06


---- done processing TAGS - burlesque 35 - Archive.tsv - report: -----
- 92 empty rows encountered.
- Unable to interpret 4 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 22 - Archive.tsv...


100% (51390 of 51390) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 22 - Archive.tsv - report: -----
- 34 empty rows encountered.
- Unable to interpret 1 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 28 - Archive.tsv...


100% (53951 of 53951) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 28 - Archive.tsv - report: -----
- 167 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 11 - Archive.tsv...




100% (117652 of 117652) |################| Elapsed Time: 0:00:14 Time:  0:00:14


---- done processing TAGS - burlesque 11 - Archive.tsv - report: -----
- 1361 empty rows encountered.
- Unable to interpret 32 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 15 - Archive.tsv...


100% (48996 of 48996) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 15 - Archive.tsv - report: -----
- 52 empty rows encountered.
- Unable to interpret 5 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 58 - Archive.tsv...


100% (38235 of 38235) |##################| Elapsed Time: 0:00:04 Time:  0:00:04


---- done processing TAGS - burlesque 58 - Archive.tsv - report: -----
- 63 empty rows encountered.
- Unable to interpret 0 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 9 - Archive.tsv...


100% (117763 of 117763) |################| Elapsed Time: 0:00:10 Time:  0:00:10


---- done processing TAGS - burlesque 9 - Archive.tsv - report: -----
- 0 empty rows encountered.
- Unable to interpret 113 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 38 (?) - Archive.tsv...


100% (42750 of 42750) |##################| Elapsed Time: 0:00:04 Time:  0:00:04


---- done processing TAGS - burlesque 38 (?) - Archive.tsv - report: -----
- 58 empty rows encountered.
- Unable to interpret 1 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 48 - Archive.tsv...


100% (42870 of 42870) |##################| Elapsed Time: 0:00:05 Time:  0:00:05


---- done processing TAGS - burlesque 48 - Archive.tsv - report: -----
- 40 empty rows encountered.
- Unable to interpret 71 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 12 - Archive.tsv...


100% (117636 of 117636) |################| Elapsed Time: 0:00:11 Time:  0:00:11


---- done processing TAGS - burlesque 12 - Archive.tsv - report: -----
- 1281 empty rows encountered.
- Unable to interpret 35 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.


Reading TAGS - burlesque 16 - Archive.tsv...


100% (47044 of 47044) |##################| Elapsed Time: 0:00:04 Time:  0:00:04


---- done processing TAGS - burlesque 16 - Archive.tsv - report: -----
- 14 empty rows encountered.
- Unable to interpret 2 rows.


- 0 tweets saved from Twitter.
- 0 tweets saved from TAGS.




In [None]:
expand_tags_tsv('../../datasets/tags-tsv/male striptease/')
expand_tags_tsv('../../datasets/tags-tsv/boy-lesque/')
expand_tags_tsv('../../datasets/tags-tsv/male burlesque/')
expand_tags_tsv('../../datasets/tags-tsv/boylesque/')
expand_tags_tsv('../../datasets/tags-tsv/burlesk OR burleycue OR burly-q/')