# Twitter Timeline / User Profile Download

Download twitter timelines and save to disk locally, for model training purposes.  
Download twitter model user profiles, for quick save (i.e. user profile urls)

### Imports

In [1]:
import os
os.chdir('..')

In [2]:
import twitter
import datetime
import pickle
import itertools
import json

In [3]:
import assembly
from assembly import config as asmbl_config
from assembly import db as asmbl_db
from assembly import models as asmbl_models

### Initializations

In [4]:
#  Assembly initialization - will use configs defined for env in question
os.environ['ASSEMBLY_ENV'] = 'Development'
app = assembly.Assembly.init(__name__, {'default': []})

In [5]:
twitter_credentials = {
    'consumer_key':        'wbz78wFd0ywcShiTvqgDUV2ry',
    'consumer_secret':     '2qj0P3fygqa0n2LqU6M8LV485OWIAvXWEQOEVLWFNUBdKDcgjz',
    'access_token_key':    '80578720-t7bH4zwD6Q6sUQEFeCb8211wH04Y9ul0EWECo2ofU',
    'access_token_secret': 'PSsX8R4agpxAII9XCYHqE74KObPRWfl9tdG4Xd07olOn6'
}

In [6]:
twapi = twitter.Api(consumer_key=twitter_credentials['consumer_key'],
                    consumer_secret=twitter_credentials['consumer_secret'],
                    access_token_key=twitter_credentials['access_token_key'],
                    access_token_secret=twitter_credentials['access_token_secret'],
                    tweet_mode='extended')

### User Inputs

In [7]:
userprofile_save_location = '/Users/liangjh/workspace/tweemio-api/resources/model_user_profiles.json'
timelines_save_location = '/Users/liangjh/workspace/tweemio-api/data/timelines'

In [8]:
#  Full list of twitter handles, across multiple categories
#  Download full timelines (or as much as the API will give us)
twitter_handles = list(itertools.chain(*[spec['screen_names'] 
                                         for category,spec in app.config['SIMILARITY_COMPARISONS'].items()]))

In [30]:
# Rate limits prevent full download
# twitter_handles = twitter_handles[twitter_handles.index('jlo'):]

### Timeline Download, Save to Disk

In [10]:
def timeline_download(screen_name: str, twapi):
    '''
    Returns timeline for a given screen name
    twitter.Api is expected to be initialized
    '''
    print('Retrieving timeline for: {}'.format(screen_name))
    timeline = []
    max_id = None
    while True:        
        print(f'     max_id for iteration: {max_id}')
        tweets = twapi.GetUserTimeline(screen_name=screen_name, include_rts=False, count=200, max_id=max_id)
        timeline += tweets

        if len(tweets) < 1:
            break        
        next_max_id = min(tweets, key=lambda t: t.id).id
        if next_max_id == max_id:
            break
        max_id = next_max_id
    return timeline


In [None]:
#  Serialize to JSON, save w/ timestamp
# timeline_map = {}
for screen_name in twitter_handles:
    print(f'Downloading timeline for screen name: {screen_name}')
    user_timeline = timeline_download(screen_name, twapi)
    timeline_map[screen_name] = user_timeline

In [None]:
for screen_name, tline in timeline_map.items():
    print(f'writing {screen_name} to file...')
    tline_js = [tli._json for tli in tline]
    with open(f"{timelines_save_location}/{screen_name}-{datetime.date.today().strftime('%Y%m%d')}.json", "w") as outfile:
        json.dump(tline_js, outfile)

### User Profile Download, Persist to JSON

Users that are used to calibrate model and provide similarity scores against.   
Save some of their profile information to a JSON to allow front-end to render 

In [10]:
user_profile_map = {}
for screen_name in twitter_handles:
    print('getting user: ' + screen_name)
    usr = twapi.GetUser(screen_name=screen_name)
    user_profile_map[screen_name] = {
        'screen_name': screen_name,
        'name': usr.name,
        'description': usr.description,
        'profile_img': usr.profile_image_url_https
    }

getting user: realdonaldtrump
getting user: kellyannepolls
getting user: secpompeo
getting user: mike_pence
getting user: devinnunes
getting user: lindseygrahamsc
getting user: donaldjtrumpjr
getting user: rudygiuliani
getting user: govmikehuckabee
getting user: alandersh
getting user: repmattgaetz
getting user: jim_jordan
getting user: markmeadows
getting user: gopleader
getting user: betsydevosed
getting user: elisestefanik
getting user: seanhannity
getting user: gopchairwoman
getting user: charliekirk11
getting user: gtconway3d
getting user: steveschmidtses
getting user: jwgop
getting user: therickwilson
getting user: nhjennifer
getting user: ronsteslow
getting user: reedgalen
getting user: madrid_mike
getting user: donwinslow
getting user: scaramucci
getting user: justinamash
getting user: kasparov63
getting user: andrewyang
getting user: kamalaharris
getting user: petebuttigieg
getting user: joebiden
getting user: ewarren
getting user: berniesanders
getting user: corybooker
gettin

In [11]:
with open(userprofile_save_location, 'w') as outfile:
    json.dump(user_profile_map, outfile)