In [58]:
###############################################################################
# Information
###############################################################################
# Created by Linwood Creekmore 

# https://github.com/linwoodc3


In [59]:
###############################################################################
# Admin work; creating a normalized path to work on any OS for calls to keys or files
###############################################################################
import geohash
import os
path = os.path.normpath(os.path.join(os.path.normpath(os.path.expanduser("~")),"projects","LC3-Creations"))
import sys
sys.path.append(os.path.join(path,"timehash"))
import timehash

In [109]:
import sys
default_stdout = sys.stdout
default_stderr = sys.stderr
reload(sys)
sys.stdout = default_stdout
sys.stderr = default_stderr

In [60]:
#**********************************************************************
# Function to load our .txt files from disc into python json/dicts for analysis
#**********************************************************************

import json
import re

#shameless copy paste from json/decoder.py
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)

class ConcatJSONDecoder(json.JSONDecoder):
    def decode(self, s, _w=WHITESPACE.match):
        s_len = len(s)

        objs = []
        end = 0
        while end != s_len:
            obj, end = self.raw_decode(s, idx=_w(s, end).end())
            end = _w(s, end).end()
            objs.append(obj)
        return objs

In [61]:
#**********************************************************************
# Turn Twitter hashtags into a string for NLP or analysis
#**********************************************************************

def hashtag_getter(tweet):
    try:
        # do a test to see if the length of the entities is greater than zero, if not skip
        if len([l['text'] for l in tweet['entities']['hashtags']])>0:
            
            # join all hashtags into a list, split by "," and whitespace
            hashtags = [(", ".join([l['text'] for l in tweet['entities']['hashtags']]))]
        else:
            hashtags = ""
            
    # if we don't have hashtags, this exception prevents an error hangup   
    except:
        pass
    return hashtags

In [None]:
#**********************************************************************
# How to obfuscate API keys; store as json, run, then clear. DON'T add file to GITHUB 

# Everyone always said "hide your keys" but never say how; this is a way
#**********************************************************************
'''
apikeys = {}
apikeys['service']= {"keyname": "key"}

import json

# writing to directory two steps above current; in this case, to the repository's base directory
with open('../../apikeys.txt', 'w+') as outfile:
    json.dump(apikeys, outfile)
outfile.closed

#**********************************************************************
# Have new keys to add? Easy, just follow this process
# To add new keys to your file, 
# just open the .txt in a text editor like notepad, wordpad, sublime, etc
# And the new key manually using this example below
# anything with "new" below would just be added in manually
#**********************************************************************

apikeys = {"newservice": {"newconsumerkey": "newkey", "newconsumersecret": "newkey"},"existingservice":{"consumerkey":"key", "consumersecret":"key}}

'''

In [62]:
#**********************************************************************
# How to load your keys from text file; never add this to git
#**********************************************************************

# I created a nested dictionary with my API keys, then wrote that json to disk.  Now, I load the json and just pass the keys into the application

oauth = json.load(open(os.path.join(path,"apikeys.txt")), cls=ConcatJSONDecoder)

In [None]:
#**********************************************************************
# How to retrieve your key for a specific service
#**********************************************************************

# If you have NOT created the apikeys json file and saved to disc, this will error
oauth[0]['openmapquest']['consumerkey']

In [None]:
#**********************************************************************
# Passing in oauth values; you obviscate by loading your locally stored json
#**********************************************************************

# Import the necessary package to process data in JSON format
try:
    import json
except ImportError:
    import simplejson as json

# Import the necessary methods from "twitter" library
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream

# Variables that contains the user credentials to access Twitter API 
# If you have not followed the obfuscate process above, this will be empty and error out
ACCESS_TOKEN = oauth[0]['twitter']['accesstoken']
ACCESS_SECRET = oauth[0]['twitter']['accesssecret']
CONSUMER_KEY = oauth[0]['twitter']['consumerkey']
CONSUMER_SECRET = oauth[0]['twitter']['consumersecret']

oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)


In [None]:
#**********************************************************************
# Code to pull tweets from twitter stream

# I pulled this code from http://socialmedia-class.org/twittertutorial.html.  
# I used the locations filter, but you can alter to get a sample or pull 
# specific keywords.  Use the link above.  The only line you would alter is:
# iterator = twitter_stream.statuses.filter(
#**********************************************************************


# Initiate the connection to Twitter Streaming API
twitter_stream = TwitterStream(auth=oauth)

# Filter the public data following through Twitter
iterator = twitter_stream.statuses.filter(locations = '-105.024513,39.741353, -105.014846,39.747408 ')

# Print each tweet in the stream to the screen 
# Here we set it to stop after getting 1000 tweets. 
# You don't have to set it to stop, but can continue running 
# the Twitter API to collect data for days or even longer. 

with open('tweetstream.txt', 'w+') as outfile:
    for tweet in iterator:
        
        # Twitter Python Tool wraps the data returned by Twitter 
        # as a TwitterDictResponse object.
        # We convert it back to the JSON format to print/score
        #print json.dumps(tweet)  

        # The command below will do pretty printing for JSON data, try it out
        # print json.dumps(tweet, indent=4)
        json.dump(tweet, outfile)
    outfile.closed  

In [63]:
#**********************************************************************
# After the tweet streaming data collection is complete, this is how to load the file
#**********************************************************************

nflgame = json.load(open(os.path.join(path,"notebooks","twittertest","pats_vs_broncos.txt")), cls=ConcatJSONDecoder)
nflgame2 = json.load(open(os.path.join(path,"notebooks","twittertest","chiefs_vs_pats.txt")), cls=ConcatJSONDecoder)
nflgame3 = json.load(open(os.path.join(path,"notebooks","twittertest","skins_vs_pack.txt")), cls=ConcatJSONDecoder)
nflgame4 = [json.loads(l) for l in json.load(open(os.path.join(path,"notebooks","twittertest","viks_vs_hawks.txt")), cls=ConcatJSONDecoder)]

In [41]:
len(nflgame)

1469

In [57]:
#**********************************************************************
# Code to extract values from json tweet and write to csv
# http://blog.appliedinformaticsinc.com/how-to-parse-and-convert-json-to-csv-using-python/
#**********************************************************************
import csv
row = {"tweetdetails":{}}
polygons = 0
points = 0
game_data = row['tweetdetails']

# create a folder to store data
if not os.path.exists('./output'):
    os.makedirs('./output')

# open a file for writing
playoffs = open('./output/playoffs.csv', 'w+')

# create the csv writer object
csvwriter = csv.writer(playoffs)
count = 0
for l in nflgame:
    try:
        row['tweetdetails']['message_id']= l['id']
        row['tweetdetails']['epochtime']= int(float(l['timestamp_ms'])/float(1000))
        row['tweetdetails']['timehash']= timehash.encode(int(l['timestamp_ms'])/1000)
        row['tweetdetails']['polygon']=[(m[0],m[1]) for m in (l['place']['bounding_box']['coordinates'][0])]
        row['tweetdetails']['screen_name']= l['user']['screen_name']
        row['tweetdetails']['user_id']= l['user']['id_str']
        row['tweetdetails']['tweet']= l['text']
        row['tweetdetails']['hashtags']= hashtag_getter(l)
        polygons += 1
        try:
            if l['geo']['type'] == 'Point':
                row['tweetdetails']['Latitude']=l['geo']['coordinates'][0]
                row['tweetdetails']['Longitude']=l['geo']['coordinates'][1]
                row['tweetdetails']['geohash']=geohash.encode_uint64(l['coordinates']['coordinates'][1],l['coordinates']['coordinates'][0])
                points += 1
                
        except:
            row['tweetdetails']['Latitude']=""
            row['tweetdetails']['Longitude']=""
            row['tweetdetails']['geohash']=""
        
        
        header = row['tweetdetails'].keys()
        
        if count == 0:
            csvwriter.writerow(header)
            count += 1
        csvwriter.writerow(row['tweetdetails'].values())
            
    except:
        pass
        
playoffs.close() 

In [53]:
len(lengths)

1469

In [38]:
row['tweetdetails'].keys()

['user_id',
 'polygon',
 'tweet',
 'hashtags',
 'epochtime',
 'timehash',
 'geohash',
 'Longitude',
 'Latitude',
 'message_id',
 'screen_name']

In [46]:
import csv

game_data = row['tweetdetails']

# create a folder to store data
if not os.path.exists('./output'):
    os.makedirs('./output')

# open a file for writing
playoffs = open('./output/playoffs.csv', 'w+')

# create the csv writer object
csvwriter = csv.writer(playoffs)

count = 0
header = row['tweetdetails'].keys()
csvwriter.writerow(header)
csvwriter.writerow(row['tweetdetails'].values())
playoffs.close()

In [72]:
hashtag_getter(nflgame[4])

[u'Nunn, Transportation, VeteranJob, Job, Jobs, Hiring, CareerArc']

In [None]:
(float(l['timestamp_ms'])/float(1000))

In [64]:
#**********************************************************************
# Testing for common users at games
#**********************************************************************

users3 = []
for l in nflgame4:
    try:
        users3.append((l['user']['id_str'],l['user']['screen_name']))
    except:
        pass
print len(users3)

users1 = [(l['user']['id_str'],l['user']['screen_name']) for l in nflgame]
users2 = [(l['user']['id_str'],l['user']['screen_name']) for l in nflgame2]
users4 = [(l['user']['id_str'],l['user']['screen_name']) for l in nflgame3]

a =set(users1) & set(users2)
b = set(users2) & set(users4)
c = set(users1) & set(users4)
d = set(users3) & set(users4)
e = set(users3) & set(users2)
f = set(users3) & set(users1)


#**********************************************************************
# Super set of profiles who participated in at least two events
#**********************************************************************
((((a.union(b)).union(c)).union(d)).union(e)).union(f)

{(u'107886768', u'barb_hill'),
 (u'1254995785', u'DCBarno'),
 (u'131951051', u'rian5ca'),
 (u'1356310075', u'clozoya13'),
 (u'141302910', u'Kindred_Jobs'),
 (u'15999904', u'WilmingtonWX'),
 (u'1948302668', u'littlehotmess77'),
 (u'1950302455', u'Lorenzo_1599'),
 (u'2587789764', u'WorkWithSHC'),
 (u'2706556174', u'Crp94'),
 (u'27585679', u'gerrypizza'),
 (u'3345561723', u'ACM_Nicky'),
 (u'33978500', u'AaronMatas'),
 (u'356840344', u'PracticeWithUs'),
 (u'365012829', u'PTK473'),
 (u'546225760', u'kmgcareers'),
 (u'61101107', u'CurtNickisch'),
 (u'911570634', u'Jason6440')}

In [None]:
nflgame[18]

In [None]:
#**********************************************************************
# Geo point locations of tweets; store separately
#**********************************************************************

count = 0
for l in nflgame:
    try:
        if l['geo']['type'] == 'Point':
            print l['geo']['coordinates']
            count += 1
            print count
    except:
        pass
print float(count)/float(len(nflgame))

In [None]:
count =0
for l in nflgame[1:100]:
    try:
        print l['geo']
        count += 1
        print count
    except:
        pass
    

In [None]:
googlegeolocator.reverse([nflgame[4]['coordinates']['coordinates'][1],nflgame[4]['coordinates']['coordinates'][0]])

In [None]:
osmgeolocator.reverse((nflgame[111]['coordinates']['coordinates'][1],nflgame[111]['coordinates']['coordinates'][0]))[0]

In [None]:
# Use regex to return only text seperated by , (city, state) or text  by itself; ignores symbols

import re

for l in nflgame[80:115]:
    try:
        print re.search('[ ]?[A-Za-z]+[ ]?([A-Za-z]+)?[ ]?([A-Za-z]+)?(,)?[ ]?([A-Za-z]+)?((\.)[^ ])?([A-Za-z]+)?((\.)[^ ])?',l['user']['location']).group(0).strip()
    except:
        pass

In [155]:
for l in nflgame[18:23]:
    print len(([(m[0],m[1]) for m in (l['place']['bounding_box']['coordinates'][0])]))

In [None]:
import datetime
datetime.datetime.fromtimestamp((int(nflgame[20]['timestamp_ms'])/1000)).strftime('%Y-%m-%d %H:%M:%S')

In [None]:
# Use regex to return only text seperated by , (city, state) or text  by itself; ignores symbols

import re

for l in nflgame[1800:2000]:
    try:
        print l['text']
    except:
        pass

In [26]:
# loop over the original json
row = {"tweetdetails":{}}
hashtags=[]
for h in nflgame[1:10]:
    try:
        # do a test to see if the length of the entities is greater than zero, if not skip
        if len(", ".join([l['text'] for l in h['entities']['hashtags']])) >0:

            # print the hashtags as a string; each hashtag is split by a space
            print [(", ".join([l['text'] for l in h['entities']['hashtags']]))]         
            
    # if we don't have hashtags, this exception prevents an error hangup   
    except:
        pass
hashtags


[u'Nunn, Transportation, VeteranJob, Job, Jobs, Hiring, CareerArc']
[u'mountmorrisonsummit, mountainevans']
[u'broncos']


[]

In [None]:
#**********************************************************************
# Extract home locations of user as string, geocode, cache name and latitude
# and then store latitude and longitude of event for csv writing
#**********************************************************************



def caching(loc):
    test = {"cache_details":{}}
    try:
        from geopy.geocoders import GoogleV3, Bing, GeoNames, Nominatim
        google= GoogleV3(api_key=oauth[0]['google']['serverkey'])
        bing= Bing(api_key=oauth[0]['bing']['key'])
        geonames = GeoNames(username = oauth[0]['geonames']['username'])
        osm = Nominatim()
    except:
        print "You need to install the \'geopy\' module or edit input parameters"
        
    try:
        import re
    except:
        print "You need to install the \'re\' module"
        
    try: 
        import time
    except:
        print "You need to install the \'time\' module"
    
    
    
    locations =[]
    for l in nflgame[18:35]:
        try:
            locations.append(re.search('[ ]?[A-Za-z]+[ ]?([A-Za-z]+)?[ ]?([A-Za-z]+)?(,)?[ ]?([A-Za-z]+)?((\.)[^ ])?([A-Za-z]+)?((\.)[^ ])?',l['user']['location']).group(0).strip())
        except:
            pass
    test['cache_details']['locations']=locations
    for l in locations:
        if l in cachedlocs[0].keys():
            pass
        else:
            try:
                answer = geonames.geocode(l)
                cachedlocs.append({l:{"realname":answer[0],"latitude":answer[1][0],"longitude":answer[1][1]}})
                
                time.sleep(1)
            except: 
                pass
    return cachedlocs

In [102]:
loc = nflgame[1:20]
test = {"cache_details":{}}
try:
    from geopy.geocoders import GoogleV3, Bing, GeoNames, Nominatim
    google= GoogleV3(api_key=oauth[0]['google']['serverkey'])
    bing= Bing(api_key=oauth[0]['bing']['key'])
    geonames = GeoNames(username = oauth[0]['geonames']['username'])
    osm = Nominatim()
except:
    print "You need to install the \'geopy\' module or edit input parameters"

try:
    import re
except:
    print "You need to install the \'re\' module"

try: 
    import time
except:
    print "You need to install the \'time\' module"




for l in loc:
    try:
        
        m = locations.append(re.search('[ ]?[A-Za-z]+[ ]?([A-Za-z]+)?[ ]?([A-Za-z]+)?(,)?[ ]?([A-Za-z]+)?((\.)[^ ])?([A-Za-z]+)?((\.)[^ ])?',l['user']['location']).group(0).strip())
         
        
    except:
        pass
test['cache_details']['locations']=locations
test['cache_details']['cache']={}

        
    

In [79]:
nflgame[18]['user']['location']

u'Riverside, CA'

In [114]:
test['cache_details']['locations'] 

[]

In [83]:
from geopy.geocoders import GoogleV3, Bing, GeoNames,Nominatim

google= GoogleV3(api_key=oauth[0]['google']['serverkey'])
bing= Bing(api_key=oauth[0]['bing']['key'])
geonames = GeoNames(username='linwoodc3')
osm = Nominatim()
#geonames.geocode('kansas city metro')

In [84]:
b = google.geocode('Riverside, CA')

In [85]:
b[0],b[1]

(u'Riverside, CA, USA', (33.9533487, -117.3961564))

In [86]:
name = str(nflgame[18]['user']['location'])

In [95]:
test['cache_details']['cache'][name] = {"Name":b[0],"Point":b[1]}

In [138]:
test['cache_details']['locations']=[]

In [139]:
#**********************************************************************
# Extract home locations of user as string, geocode, cache name and latitude
# and then store latitude and longitude of event for csv writing
#**********************************************************************


for l in nflgame[20:550]:
    try:  
        m = re.search('[ ]?[A-Za-z]+[ ]?([A-Za-z]+)?[ ]?([A-Za-z]+)?(,)?[ ]?([A-Za-z]+)?((\.)[^ ])?([A-Za-z]+)?((\.)[^ ])?',l['user']['location']).group(0).strip().lower()
        print m 
        if m in test['cache_details']['locations']:
            print "It's there!!"
            pass
        else:
            test['cache_details']['locations'].append(m)
    except:
        pass

In [142]:
(test['cache_details']['locations'])

[u'providence, ri',
 u'adelaide, australia',
 u'california',
 u'denver, co',
 u'hq aspen beverly hills',
 u'islamic republic of',
 u'chicago, il',
 u'battle creek, mi',
 u'charlotte, nc',
 u'the internet',
 u'colorado, usa',
 u'houston, tx',
 u'the',
 u'colorado springs, co',
 u'coloroado',
 u'riverside, ca',
 u'mars',
 u'instagram',
 u'canto seven, ca',
 u'lamar, co',
 u'staywithme',
 u'aspen, co',
 u'littleton, co',
 u'denver',
 u'colorado, ny',
 u'colorado, with',
 u'denver,co',
 u'morrison co',
 u'centennial, colorado',
 u'flyin high in the',
 u'denver, colorado',
 u'mile high city',
 u'houstatlantavegas',
 u'bridgetown,',
 u'boulder, co',
 u'northern colorado',
 u'denver co',
 u'colorado',
 u'c',
 u'fort collins, co',
 u'ft',
 u'aurora, co',
 u'frisco, tx',
 u'riverside,ca',
 u'straight outta tha',
 u'queen city of the',
 u'broncoscountry denver,co',
 u'west palm beach, fl',
 u'atlanta, ga',
 u'detroit, denver',
 u'rva',
 u'es denver',
 u'csusb zta',
 u'riverside',
 u'slayin, tx',

In [141]:
len(set(test['cache_details']['locations']))

119

In [110]:
d = [{"first":"linwood","last":"creekmore"}]

bob = ["eit"]

for l in bob:
    if l in d[0].keys():
        print "What"
    else:
        print "No"


In [None]:
caching("Colorado")

In [None]:
cachedlocs

In [None]:
from geopy.geocoders import GoogleV3, Bing, GeoNames,Nominatim
googlegeolocator = GoogleV3()
binggeolocator = Bing(api_key=oauth[0]['bing']['key'])
geonamesgeolocator = GeoNames(username='linwoodc3')
osmgeolocator = Nominatim()
osmgeolocator.geocode('Richmond, Virginia')


cities=[]
for l in viks_vs_hawks[800:820]:
    try:
        cities.append(re.search('^ ?[^ ]?[A-Za-z]+ ?(,)?[ ]?[A-Za-z]+',l['user']['location']).group(0).strip())
    except:
        pass

In [None]:
import sys
sys.path.append('/Users/linwood/projects/LC3-Creations/timehash')

In [None]:
import timehash

print timehash.encode(int(nflgame[400]['timestamp_ms'])/1000), timehash.encode(int(nflgame[401]['timestamp_ms'])/1000) ,timehash.encode(int(nflgame[300]['timestamp_ms'])/1000)

In [None]:
geohash.encode_uint64(l['coordinates']['coordinates'][1],l['coordinates']['coordinates'][0])

In [None]:
import geohash

In [None]:
import geohash
for l in nflgame[200:225]:
    try:
        
        print l['geo']['coordinates']
    except:
        pass

In [None]:
for l in set(skins_vs_pack_locs):
    try:
        print geolocator.geocode(l)
    except:
        pass

In [None]:
from geopy.geocoders import GeoNames
geolocator = GeoNames(username='linwoodc3')

In [None]:
b= geolocator.geocode('Twin Cities')
print latitude
print longitude

In [None]:
b[0]

In [None]:
skins_vs_pack_truelocs = []
for l['user']['location'] in b:
    try:
        if len(geolocator.geocode(l['user']['location'])) > 0:
            skins_vs_pack_truelocs.append(l['user']['location'])
    except:
        pass   

In [None]:
len(skins_vs_pack_truelocs)

In [None]:
for l in b[1:10]:
    print l['user']['location']

In [None]:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

23.539129, -120.429975
62.138919, -57.846612
 
map = Basemap(projection='merc', lat_0 = 23, lon_0 = -120,
    resolution = 'h', area_thresh = 0.1,
    llcrnrlon=-120.429975, llcrnrlat=23.539129,
    urcrnrlon=-57.846612, urcrnrlat=62.138919) 
 
map.drawcoastlines()
map.drawcountries()
map.fillcontinents(color = 'coral')
map.drawmapboundary()



plt.show()

In [132]:
loc[0]['user']['location']={"Name":b[0],"Point":b[1]}

In [138]:
loc[1]['user']['location']

In [90]:
sys.path

['',
 '/Users/linwood/anaconda/envs/py27/lib/python27.zip',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7/plat-darwin',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7/plat-mac',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7/plat-mac/lib-scriptpackages',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7/lib-tk',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7/lib-old',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7/lib-dynload',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7/site-packages/setuptools-19.2-py2.7.egg',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7/site-packages',
 '/Users/linwood/anaconda/envs/py27/lib/python2.7/site-packages/IPython/extensions',
 '/Users/linwood/.ipython',
 '/Users/linwood/projects/LC3-Creations/timehash']