In [1]:
import datetime
import math
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.porter import *
import numpy as np
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
import string

In [2]:
tweets = pd.read_csv("./data/tweets_all.csv", encoding="latin1")

In [3]:
min_lat = tweets.LATITUDE.min()
min_long = tweets.LONGITUDE.min()

lat_range = tweets.LATITUDE.max() - tweets.LATITUDE.min()
long_range = tweets.LONGITUDE.max() - tweets.LONGITUDE.min()

lat_bin_size = lat_range/9
long_bin_size= long_range/9

In [4]:
tweets["long_bin"] = tweets.LONGITUDE.apply(lambda long: (min_long + (((long - min_long)//long_bin_size)*long_bin_size)))
tweets["lat_bin"] = tweets.LATITUDE.apply(lambda lat: (min_lat + (((lat - min_lat)//lat_bin_size)*lat_bin_size)))

In [5]:
tweets["long_bin"] = tweets["long_bin"].apply(lambda l: str(l))
tweets["lat_bin"] = tweets["lat_bin"].apply(lambda l: str(l))

In [6]:
tweets["lat_long_bin"] = tweets["lat_bin"] + "," + tweets["long_bin"]

In [7]:
def parse_tweets(tweet):

    tweet = re.sub('@[^\s]+','',tweet).strip()
    tweet = re.sub('#[^\s]+','',tweet).strip()
    tweet = re.sub(r'https?:\/\/.*[\r\n]*','',tweet).strip()
    tweet = " ".join(re.findall("[a-zA-Z]+", tweet))
    
    return tweet

In [8]:
tweets["parsed_tweets"] = tweets.MESSAGETEXT.apply(lambda tweet: parse_tweets(tweet))

In [9]:
# need to get pos tag
class StemmerTokenizer(object):

    def __init__(self):
        self.stemmer = PorterStemmer()
        
    def __call__(self, doc):
        is_noun = lambda pos: pos[:2] == 'NN'
        doc = " ".join(re.findall("[a-zA-Z]+", doc))
        tokenized = nltk.word_tokenize(doc)
        nouns = [word for (word, pos) in nltk.pos_tag(tokenized) if is_noun(pos)] 
        return [self.stemmer.stem(t) for t in word_tokenize(doc)]

In [10]:
tweets.head()

Unnamed: 0,id,Name,X,Y,LONGITUDE,LATITUDE,MESSAGEDATE,USERID,USERSCREENNAME,MESSAGETEXT,LOCATION,HASHTAGS,ISRETWEET,long_bin,lat_bin,lat_long_bin,parsed_tweets
0,550226818624942080,,-0.26215,51.391407,-0.26215,51.391407,31/12/2014 09:47:50,113918054,12Elbestreet,@julieo25 Not yet got tics for Liverpool waiti...,Kingston upon Thames; London,,0,-0.3293425440788269,51.375291188557945,"51.375291188557945,-0.3293425440788269",Not yet got tics for Liverpool waiting till Fr...
1,550226823314178048,,-0.149466,51.49256,-0.149466,51.492561,31/12/2014 09:47:52,465989904,alice_foster_95,So. Many. Accents #alicesadventureswithmegabus,London; England,alicesadventureswithmegabus,0,-0.2335423231124878,51.466681162516274,"51.466681162516274,-0.2335423231124878",So Many Accents
2,550226827944660992,,-0.014891,51.354042,-0.014891,51.354042,31/12/2014 09:47:53,55872342,MissGee_Pee,If it doesn't make you happy by December 31st....,New Addington; London,,0,-0.0419418811798095,51.32959620157877,"51.329596201578774,-0.04194188117980957",If it doesn t make you happy by December st le...
3,550226837537058816,,0.006698,51.53202,0.006698,51.532021,31/12/2014 09:47:55,147266450,vinita_ramtri,@westfieldstrat thanks; and are you open tomor...,Stratford; London,,0,-0.0419418811798095,51.512376149495445,"51.512376149495445,-0.04194188117980957",thanks and are you open tomorrow at all
4,550227214227505152,,0.214417,51.582535,0.214417,51.582535,31/12/2014 09:49:25,518928608,CallumGordon7,@WestHam_Central but he is,Romford; London,,0,0.1496585607528686,51.55807113647461,"51.55807113647461,0.14965856075286865",but he is


In [11]:
idf_by_lat_long_bin = {}

for lat_long_bin in tweets.lat_long_bin.unique():
    clf = TfidfVectorizer(tokenizer=StemmerTokenizer(), min_df=1, max_df=0.7, ngram_range=(1, 1), stop_words=set(stopwords.words('english')))
    try:
        clf.fit(tweets[tweets["lat_long_bin"] == lat_long_bin]["parsed_tweets"])
        word2idf = dict(zip(clf.get_feature_names(), clf.idf_))
        sorted_idf = sorted(word2idf.items(), key=lambda x: x[1], reverse=True)
    except ValueError:
        sorted_idf = []

    idf_by_lat_long_bin[lat_long_bin] = {
        "lat_long_bin": lat_long_bin,
        "sorted_idf": sorted_idf,
        "location": tweets[tweets["lat_long_bin"] == lat_long_bin]["LOCATION"]
    }
    
    print("Completed", lat_long_bin)

print("Finished!")

Completed 51.375291188557945,-0.3293425440788269
Completed 51.466681162516274,-0.2335423231124878
Completed 51.329596201578774,-0.04194188117980957
Completed 51.512376149495445,-0.04194188117980957
Completed 51.55807113647461,0.14965856075286865
Completed 51.649461110432945,-0.3293425440788269
Completed 51.603766123453774,-0.425142765045166
Completed 51.466681162516274,-0.13774210214614868
Completed 51.512376149495445,-0.13774210214614868
Completed 51.603766123453774,0.14965856075286865
Completed 51.329596201578774,-0.13774210214614868
Completed 51.42098617553711,-0.2335423231124878
Completed 51.512376149495445,-0.2335423231124878
Completed 51.42098617553711,0.05385833978652954
Completed 51.42098617553711,-0.3293425440788269
Completed 51.512376149495445,-0.5209429860115051
Completed 51.603766123453774,-0.2335423231124878
Completed 51.42098617553711,-0.5209429860115051
Completed 51.42098617553711,-0.425142765045166
Completed 51.466681162516274,-0.3293425440788269
Completed 51.5123761494

## Extract Spatial Key Terms

In [14]:
tweets = pd.read_csv("./data/tweets_all.csv", encoding="latin1")

In [15]:
tweets["datetime"] = pd.to_datetime(tweets.MESSAGEDATE)

In [16]:
tweets["datetime_to_nearest_hour"] = tweets.datetime.apply(lambda dt: datetime.datetime(dt.year, dt.month, dt.day, dt.hour))

In [17]:
tweets["parsed_tweets"] = tweets.MESSAGETEXT.apply(lambda tweet: parse_tweets(tweet))

In [18]:
idf_by_hourly_interval = {}

for hour_interval in tweets.datetime_to_nearest_hour.unique():
    clf = TfidfVectorizer(tokenizer=StemmerTokenizer(), min_df=5, max_df=0.7, ngram_range=(1, 1), stop_words=set(stopwords.words('english')))
    clf.fit(tweets[tweets["datetime_to_nearest_hour"] == hour_interval]["parsed_tweets"])
    word2idf = dict(zip(clf.get_feature_names(), clf.idf_))
    sorted_idf = sorted(word2idf.items(), key=lambda x: x[1], reverse=True)

    idf_by_hourly_interval[hour_interval] = {
        "hour_interval": hour_interval,
        "sorted_idf": sorted_idf
    }
    
    print("Completed", hour_interval)

print("Finished!")

Completed 2014-12-31T09:00:00.000000000
Completed 2014-12-31T10:00:00.000000000
Completed 2014-12-31T08:00:00.000000000
Completed 2014-12-31T06:00:00.000000000
Completed 2014-12-31T07:00:00.000000000
Completed 2014-12-31T04:00:00.000000000
Completed 2014-12-31T05:00:00.000000000
Completed 2014-12-31T14:00:00.000000000
Completed 2014-12-31T15:00:00.000000000
Completed 2014-12-31T11:00:00.000000000
Completed 2014-12-31T13:00:00.000000000
Completed 2014-12-31T12:00:00.000000000
Completed 2014-12-31T18:00:00.000000000
Completed 2014-12-31T19:00:00.000000000
Completed 2014-12-31T16:00:00.000000000
Completed 2014-12-31T17:00:00.000000000
Completed 2014-12-31T23:00:00.000000000
Completed 2015-01-01T00:00:00.000000000
Completed 2014-12-31T20:00:00.000000000
Completed 2014-12-31T22:00:00.000000000
Completed 2015-01-01T08:00:00.000000000
Completed 2015-01-01T09:00:00.000000000
Completed 2015-01-01T04:00:00.000000000
Completed 2015-01-01T05:00:00.000000000
Completed 2015-01-01T06:00:00.000000000


In [None]:
# TODO
# space
# idf_by_lat_long_bin["51.466681162516274,-0.16721909321271455"]["sorted_idf"][:1000]
# idf_by_lat_long_bin["51.512376149495445,-0.16721909321271455"]["sorted_idf"][:1000]
# idf_by_lat_long_bin["51.466681162516274,-0.12300360661286575"]["sorted_idf"][:1000]
# idf_by_lat_long_bin["51.512376149495445,-0.12300360661286575"]["sorted_idf"][:1000]
# idf_by_lat_long_bin["51.512376149495445,-0.07878812001301694"]["sorted_idf"][:1000]

# time
# idf_by_hourly_interval[np.datetime64('2015-01-01T00:00:00.000000000')]["sorted_idf"][:1000]
# idf_by_hourly_interval[np.datetime64('2015-01-01T18:00:00.000000000')]["sorted_idf"][:1000]
# idf_by_hourly_interval[np.datetime64('2015-01-01T19:00:00.000000000')]["sorted_idf"][:1000]
# idf_by_hourly_interval[np.datetime64('2015-01-01T21:00:00.000000000')]["sorted_idf"][:1000]
# idf_by_hourly_interval[np.datetime64('2015-01-01T16:00:00.000000000')]["sorted_idf"][:1000]

In [None]:
searchfor = ['foot', 'chelsea', "tottenham"]

tweets[tweets["parsed_tweets"].str.contains("|".join(searchfor))].groupby(['datetime_to_nearest_hour'])['id'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['count'], ascending=False) \
                             .head(5)

In [24]:
key_words = {}

def get_common_idf(location, time, total_count = 0):
    
    count = 0
    for idf_lat_long in idf_by_lat_long_bin[location]["sorted_idf"]:
        for idf_time in idf_by_hourly_interval[np.datetime64(time)]["sorted_idf"]:
            if (idf_lat_long[0] == idf_time[0]) and (idf_lat_long[1] > 9) or (idf_time[1] > 9) :
                count += 1
                print(idf_lat_long, idf_time)
                if idf_lat_long[0] in key_words:
                    key_words[idf_lat_long[0]] +=1
                else:
                    key_words[idf_lat_long[0]] = 1
    total_count += count
#     print(count)
    return total_count

In [25]:
len(list(idf_by_hourly_interval.keys()))

48

In [26]:
len(list(idf_by_lat_long_bin.keys()))

83

In [27]:
times = idf_by_hourly_interval.keys()
# times = ["2015-01-01T00:00:00.000000000", "2015-01-01T18:00:00.000000000", "2015-01-01T19:00:00.000000000", "2015-01-01T21:00:00.000000000", "2015-01-01T16:00:00.000000000"]
locations = idf_by_lat_long_bin.keys()
total_count = 0

for location in locations:
    for time in times:
        prev_count = total_count
        total_count = get_common_idf(location, time, total_count)
        if total_count > prev_count:
            print(location)
            print(time)
            print('--------------')

('freez', 9.188272370947859) ('freez', 6.672750365408472)
('gym', 9.188272370947859) ('gym', 6.385068292956691)
('wcw', 9.188272370947859) ('wcw', 6.518599685581214)
('xxxxx', 9.188272370947859) ('xxxxx', 6.672750365408472)
51.466681162516274,-0.2335423231124878
2014-12-31T09:00:00.000000000
--------------
('bastard', 9.188272370947859) ('bastard', 6.970411137910149)
('biggest', 9.188272370947859) ('biggest', 6.816260458082891)
('courtesi', 9.188272370947859) ('courtesi', 6.970411137910149)
('dj', 9.188272370947859) ('dj', 6.970411137910149)
('hughton', 9.188272370947859) ('hughton', 6.459585514144159)
('inde', 9.188272370947859) ('inde', 6.816260458082891)
('knighthood', 9.188272370947859) ('knighthood', 6.970411137910149)
('load', 9.188272370947859) ('load', 6.816260458082891)
('murder', 9.188272370947859) ('murder', 6.970411137910149)
('rais', 9.188272370947859) ('rais', 6.970411137910149)
('rate', 9.188272370947859) ('rate', 6.816260458082891)
('stock', 9.188272370947859) ('stock',

('women', 9.188272370947859) ('women', 7.2285110035911835)
('youth', 9.188272370947859) ('youth', 7.2285110035911835)
51.466681162516274,-0.2335423231124878
2014-12-31T17:00:00.000000000
--------------
('amen', 9.188272370947859) ('amen', 7.275075845189281)
('amor', 9.188272370947859) ('amor', 7.275075845189281)
('asleep', 9.188272370947859) ('asleep', 7.275075845189281)
('await', 9.188272370947859) ('await', 7.275075845189281)
('biggest', 9.188272370947859) ('biggest', 7.275075845189281)
('bow', 9.188272370947859) ('bow', 7.120925165362023)
('freddi', 9.188272370947859) ('freddi', 6.233621970361121)
('lambeth', 9.188272370947859) ('lambeth', 7.275075845189281)
('load', 9.188272370947859) ('load', 6.668940041618966)
('niall', 9.188272370947859) ('niall', 7.120925165362023)
('river', 9.188272370947859) ('river', 6.987393772737501)
('rubbish', 9.188272370947859) ('rubbish', 7.275075845189281)
('til', 9.188272370947859) ('til', 6.869610737081117)
('trust', 9.188272370947859) ('trust', 7.2

51.466681162516274,-0.2335423231124878
2015-01-01T13:00:00.000000000
--------------
('aux', 9.188272370947859) ('aux', 6.7120812774708964)
('eaten', 9.188272370947859) ('eaten', 6.7120812774708964)
('gym', 9.188272370947859) ('gym', 6.5579305976436375)
('llama', 9.188272370947859) ('llama', 6.7120812774708964)
('opinion', 9.188272370947859) ('opinion', 6.7120812774708964)
51.466681162516274,-0.2335423231124878
2015-01-01T10:00:00.000000000
--------------
('ell', 9.188272370947859) ('ell', 7.030284538957564)
('floripa', 9.188272370947859) ('floripa', 7.030284538957564)
('gym', 9.188272370947859) ('gym', 6.876133859130306)
('load', 9.188272370947859) ('load', 6.742602466505783)
('messi', 9.188272370947859) ('messi', 6.876133859130306)
('oprincesssparkleo', 9.188272370947859) ('oprincesssparkleo', 7.030284538957564)
('podolski', 9.188272370947859) ('podolski', 7.030284538957564)
('product', 9.188272370947859) ('product', 7.030284538957564)
('toast', 9.188272370947859) ('toast', 6.74260246

('midfield', 9.188272370947859) ('midfield', 6.8271340229118564)
('motiv', 9.188272370947859) ('motiv', 7.600323911145338)
('newcastl', 9.188272370947859) ('newcastl', 7.312641838693557)
('product', 9.188272370947859) ('product', 7.600323911145338)
('race', 9.188272370947859) ('race', 7.600323911145338)
('scream', 9.188272370947859) ('scream', 7.600323911145338)
('shock', 9.188272370947859) ('shock', 6.994188107575022)
('soo', 9.188272370947859) ('soo', 7.600323911145338)
('southampton', 9.188272370947859) ('southampton', 5.728521734243746)
('stock', 9.188272370947859) ('stock', 7.600323911145338)
('tactic', 9.188272370947859) ('tactic', 6.8271340229118564)
('ting', 9.188272370947859) ('ting', 7.44617323131808)
('ty', 9.188272370947859) ('ty', 7.312641838693557)
('winner', 9.188272370947859) ('winner', 7.194858803037174)
('wood', 9.188272370947859) ('wood', 7.600323911145338)
('yaya', 9.188272370947859) ('yaya', 6.619494658133612)
51.466681162516274,-0.2335423231124878
2015-01-01T16:00

('terri', 9.188272370947859) ('terri', 6.021085549971084)
('wind', 9.188272370947859) ('wind', 7.725833642209509)
('wit', 9.188272370947859) ('wit', 7.320368534101345)
51.466681162516274,-0.2335423231124878
2015-01-01T19:00:00.000000000
--------------
('biggest', 9.188272370947859) ('biggest', 6.983216338251486)
('chrissi', 9.188272370947859) ('chrissi', 7.137367018078744)
('flash', 9.188272370947859) ('flash', 6.983216338251486)
('gym', 9.188272370947859) ('gym', 7.137367018078744)
('hungri', 9.188272370947859) ('hungri', 7.137367018078744)
('ilysm', 9.188272370947859) ('ilysm', 5.884604049583377)
('inde', 9.188272370947859) ('inde', 7.137367018078744)
('load', 9.188272370947859) ('load', 6.983216338251486)
('mixtap', 9.188272370947859) ('mixtap', 5.318208574662575)
('rate', 9.188272370947859) ('rate', 7.137367018078744)
('shock', 9.188272370947859) ('shock', 7.137367018078744)
('tobi', 9.188272370947859) ('tobi', 7.137367018078744)
('trek', 9.188272370947859) ('trek', 6.2900691576915

('truth', 9.228710798793687) ('truth', 6.870708345972182)
('ukip', 9.228710798793687) ('ukip', 6.177561165412237)
51.466681162516274,-0.13774210214614868
2014-12-31T14:00:00.000000000
--------------
('cita', 9.634175906901852) ('cita', 7.196104626857632)
('richmond', 9.634175906901852) ('richmond', 7.041953947030374)
('whelan', 9.634175906901852) ('whelan', 6.908422554405851)
('aim', 9.228710798793687) ('aim', 7.196104626857632)
('camden', 9.228710798793687) ('camden', 6.908422554405851)
('elect', 9.228710798793687) ('elect', 7.196104626857632)
('fb', 9.228710798793687) ('fb', 7.196104626857632)
('suggest', 9.228710798793687) ('suggest', 7.196104626857632)
('sul', 9.228710798793687) ('sul', 7.196104626857632)
51.466681162516274,-0.13774210214614868
2014-12-31T15:00:00.000000000
--------------
('afrobeat', 9.634175906901852) ('afrobeat', 6.773386270601908)
('ars', 9.634175906901852) ('ars', 6.773386270601908)
('kept', 9.634175906901852) ('kept', 7.061068343053689)
('suppos', 9.228710798

('download', 9.634175906901852) ('download', 7.158742965580796)
('dubai', 9.634175906901852) ('dubai', 6.647917341814805)
('rm', 9.634175906901852) ('rm', 6.753277857472632)
('allah', 9.228710798793687) ('allah', 7.158742965580796)
('blame', 9.228710798793687) ('blame', 7.004592285753538)
('buckingham', 9.228710798793687) ('buckingham', 7.158742965580796)
('indian', 9.228710798793687) ('indian', 7.158742965580796)
('sha', 9.228710798793687) ('sha', 7.158742965580796)
('uncl', 9.228710798793687) ('uncl', 7.004592285753538)
51.466681162516274,-0.13774210214614868
2014-12-31T20:00:00.000000000
--------------
('ace', 9.634175906901852) ('ace', 6.935649768001586)
('download', 9.634175906901852) ('download', 6.935649768001586)
('reunit', 9.634175906901852) ('reunit', 7.089800447828844)
('rm', 9.634175906901852) ('rm', 7.089800447828844)
('allah', 9.228710798793687) ('allah', 6.935649768001586)
('camden', 9.228710798793687) ('camden', 6.802118375377063)
('hammersmith', 9.228710798793687) ('ha

('shaw', 9.634175906901852) ('shaw', 7.225877690680423)
('airport', 9.228710798793687) ('airport', 7.225877690680423)
('chase', 9.228710798793687) ('chase', 5.798761335040277)
('crouch', 9.228710798793687) ('crouch', 7.071727010853165)
('donat', 9.228710798793687) ('donat', 5.583649955423331)
('dri', 9.228710798793687) ('dri', 7.225877690680423)
('perhap', 9.228710798793687) ('perhap', 5.551901257108751)
('shawcross', 9.228710798793687) ('shawcross', 6.619741887110107)
('utd', 9.228710798793687) ('utd', 7.071727010853165)
('xxxxx', 9.228710798793687) ('xxxxx', 6.715052066914432)
51.466681162516274,-0.13774210214614868
2015-01-01T12:00:00.000000000
--------------
('ac', 9.634175906901852) ('ac', 7.324358962381311)
('ako', 9.634175906901852) ('ako', 7.324358962381311)
('bird', 9.634175906901852) ('bird', 7.170208282554053)
('carbella', 9.634175906901852) ('carbella', 7.324358962381311)
('chel', 9.634175906901852) ('chel', 6.7182231588109955)
('diego', 9.634175906901852) ('diego', 5.82028

('shite', 9.634175906901852) ('shite', 7.089498287379348)
('slag', 9.634175906901852) ('slag', 7.194858803037174)
('spain', 9.634175906901852) ('spain', 7.089498287379348)
('striker', 9.634175906901852) ('striker', 7.089498287379348)
('sunderland', 9.634175906901852) ('sunderland', 5.520882369465502)
('theo', 9.634175906901852) ('theo', 7.194858803037174)
('townsend', 9.634175906901852) ('townsend', 7.600323911145338)
('transfer', 9.634175906901852) ('transfer', 7.312641838693557)
('unfollow', 9.634175906901852) ('unfollow', 7.600323911145338)
('aim', 9.228710798793687) ('aim', 7.600323911145338)
('airport', 9.228710798793687) ('airport', 7.44617323131808)
('amen', 9.228710798793687) ('amen', 7.600323911145338)
('blame', 9.228710798793687) ('blame', 6.256589164444243)
('buckingham', 9.228710798793687) ('buckingham', 7.194858803037174)
('camden', 9.228710798793687) ('camden', 7.44617323131808)
('defenc', 9.228710798793687) ('defenc', 6.214029550025447)
('defens', 9.228710798793687) ('de

('buzz', 9.634175906901852) ('buzz', 6.878535781822306)
('chadli', 9.634175906901852) ('chadli', 5.779923493154196)
('chel', 9.634175906901852) ('chel', 6.9526437539760275)
('comeback', 9.634175906901852) ('comeback', 7.571682962382251)
('dart', 9.634175906901852) ('dart', 7.571682962382251)
('dowd', 9.634175906901852) ('dowd', 7.4381515697577285)
('fake', 9.634175906901852) ('fake', 7.725833642209509)
('fantasi', 9.634175906901852) ('fantasi', 7.320368534101345)
('imo', 9.634175906901852) ('imo', 7.725833642209509)
('init', 9.634175906901852) ('init', 7.725833642209509)
('injuri', 9.634175906901852) ('injuri', 7.571682962382251)
('jt', 9.634175906901852) ('jt', 7.725833642209509)
('lack', 9.634175906901852) ('lack', 7.725833642209509)
('llori', 9.634175906901852) ('llori', 6.9526437539760275)
('moan', 9.634175906901852) ('moan', 7.725833642209509)
('mvg', 9.634175906901852) ('mvg', 7.320368534101345)
('pipe', 9.634175906901852) ('pipe', 7.725833642209509)
('predict', 9.634175906901852

('bashment', 9.228710798793687) ('bashment', 5.080358320247361)
('blame', 9.228710798793687) ('blame', 7.159799861927197)
('bmt', 9.228710798793687) ('bmt', 7.054439346269371)
('buff', 9.228710798793687) ('buff', 7.159799861927197)
('cast', 9.228710798793687) ('cast', 7.565264970035361)
('frank', 9.228710798793687) ('frank', 7.411114290208102)
('henderson', 9.228710798793687) ('henderson', 7.565264970035361)
('joker', 9.228710798793687) ('joker', 7.054439346269371)
('jovet', 9.228710798793687) ('jovet', 7.159799861927197)
('lfc', 9.228710798793687) ('lfc', 7.411114290208102)
('lip', 9.228710798793687) ('lip', 6.872117789475416)
('loan', 9.228710798793687) ('loan', 7.411114290208102)
('ly', 9.228710798793687) ('ly', 7.159799861927197)
('midfield', 9.228710798793687) ('midfield', 7.565264970035361)
('neither', 9.228710798793687) ('neither', 7.565264970035361)
('ngl', 9.228710798793687) ('ngl', 7.565264970035361)
('premier', 9.228710798793687) ('premier', 7.565264970035361)
('robbi', 9.22

('heathrow', 9.475328987317539) ('heathrow', 6.936216072877503)
('hogmanay', 9.475328987317539) ('hogmanay', 6.936216072877503)
('iran', 9.475328987317539) ('iran', 7.223898145329285)
('kingdom', 9.475328987317539) ('kingdom', 6.936216072877503)
('hounslow', 9.069863879209374) ('hounslow', 7.069747465502026)
('thoma', 9.069863879209374) ('thoma', 6.530750964769339)
51.512376149495445,-0.13774210214614868
2014-12-31T18:00:00.000000000
--------------
('kingdom', 9.475328987317539) ('kingdom', 6.020064384385107)
('nak', 9.475328987317539) ('nak', 7.172743894323493)
('statu', 9.475328987317539) ('statu', 7.018593214496234)
('borough', 9.069863879209374) ('borough', 7.172743894323493)
('brixton', 9.069863879209374) ('brixton', 6.885061821871712)
('chap', 9.069863879209374) ('chap', 7.172743894323493)
51.512376149495445,-0.13774210214614868
2014-12-31T19:00:00.000000000
--------------
('airport', 9.475328987317539) ('airport', 7.087748278830837)
('britain', 9.475328987317539) ('britain', 7.0

('poppin', 9.475328987317539) ('poppin', 6.341771188886872)
('qpr', 9.475328987317539) ('qpr', 7.383225063715033)
('rooney', 9.475328987317539) ('rooney', 7.229074383887775)
('shaw', 9.475328987317539) ('shaw', 6.977759955606868)
('sub', 9.475328987317539) ('sub', 7.383225063715033)
('villa', 9.475328987317539) ('villa', 7.095542991263252)
('ward', 9.475328987317539) ('ward', 6.977759955606868)
('aswel', 9.069863879209374) ('aswel', 7.095542991263252)
('bench', 9.069863879209374) ('bench', 7.383225063715033)
('buff', 9.069863879209374) ('buff', 7.229074383887775)
('calendar', 9.069863879209374) ('calendar', 7.095542991263252)
('defo', 9.069863879209374) ('defo', 7.383225063715033)
('donat', 9.069863879209374) ('donat', 6.7770892601447175)
('draw', 9.069863879209374) ('draw', 6.083942079584772)
('freez', 9.069863879209374) ('freez', 7.229074383887775)
('geniu', 9.069863879209374) ('geniu', 7.383225063715033)
('jone', 9.069863879209374) ('jone', 7.095542991263252)
('kmt', 9.0698638792093

('slice', 9.475328987317539) ('slice', 7.728827955256198)
('townsend', 9.475328987317539) ('townsend', 6.955638067022716)
('wanker', 9.475328987317539) ('wanker', 7.728827955256198)
('yess', 9.475328987317539) ('yess', 7.728827955256198)
('assist', 9.069863879209374) ('assist', 7.441145882804417)
('chadli', 9.069863879209374) ('chadli', 6.747998702244471)
('draw', 9.069863879209374) ('draw', 6.747998702244471)
('dvd', 9.069863879209374) ('dvd', 7.5746772754289395)
('esio', 9.069863879209374) ('esio', 7.323362847148034)
('hoe', 9.069863879209374) ('hoe', 7.728827955256198)
('holi', 9.069863879209374) ('holi', 7.441145882804417)
('loan', 9.069863879209374) ('loan', 7.5746772754289395)
('madrid', 9.069863879209374) ('madrid', 7.5746772754289395)
('onto', 9.069863879209374) ('onto', 7.218002331490207)
('plenti', 9.069863879209374) ('plenti', 7.728827955256198)
('potenti', 9.069863879209374) ('potenti', 7.728827955256198)
('pressur', 9.069863879209374) ('pressur', 7.728827955256198)
('scum'

('impress', 9.069863879209374) ('impress', 7.533581175176913)
('jealou', 9.069863879209374) ('jealou', 7.533581175176913)
('jone', 9.069863879209374) ('jone', 7.533581175176913)
('kmt', 9.069863879209374) ('kmt', 7.40004978255239)
('knight', 9.069863879209374) ('knight', 6.183654458227897)
('lauren', 9.069863879209374) ('lauren', 7.533581175176913)
('ledger', 9.069863879209374) ('ledger', 7.282266746896006)
('ngl', 9.069863879209374) ('ngl', 7.687731855004171)
('none', 9.069863879209374) ('none', 7.687731855004171)
('reveal', 9.069863879209374) ('reveal', 7.282266746896006)
('sam', 9.069863879209374) ('sam', 7.687731855004171)
('shape', 9.069863879209374) ('shape', 7.533581175176913)
('trot', 9.069863879209374) ('trot', 7.533581175176913)
51.512376149495445,-0.13774210214614868
2015-01-01T21:00:00.000000000
--------------
('coach', 9.475328987317539) ('coach', 7.366224958593099)
('dart', 9.475328987317539) ('dart', 6.673077778033154)
('discuss', 9.475328987317539) ('discuss', 7.5203756

('refere', 9.069863879209374) ('refere', 6.604751094050022)
('robbi', 9.069863879209374) ('robbi', 6.941223330671235)
('sack', 9.069863879209374) ('sack', 7.452048954437226)
('woo', 9.069863879209374) ('woo', 7.452048954437226)
51.512376149495445,-0.13774210214614868
2015-01-01T23:00:00.000000000
--------------
('batman', 9.475328987317539) ('batman', 7.159799861927197)
('bmt', 9.475328987317539) ('bmt', 7.054439346269371)
('bridget', 9.475328987317539) ('bridget', 7.565264970035361)
('clip', 9.475328987317539) ('clip', 7.054439346269371)
('clue', 9.475328987317539) ('clue', 7.159799861927197)
('contract', 9.475328987317539) ('contract', 7.565264970035361)
('hoje', 9.475328987317539) ('hoje', 7.565264970035361)
('huntsman', 9.475328987317539) ('huntsman', 7.565264970035361)
('huth', 9.475328987317539) ('huth', 7.159799861927197)
('iphon', 9.475328987317539) ('iphon', 7.411114290208102)
('juli', 9.475328987317539) ('juli', 7.159799861927197)
('ly', 9.475328987317539) ('ly', 7.1597998619

('gmt', 9.120142581909091) ('gmt', 7.032057944073236)
('harrod', 9.120142581909091) ('harrod', 7.186208623900494)
('lhr', 9.120142581909091) ('lhr', 6.675383000134503)
('lil', 9.120142581909091) ('lil', 7.186208623900494)
('repeat', 9.120142581909091) ('repeat', 7.186208623900494)
('tattoo', 9.120142581909091) ('tattoo', 7.186208623900494)
('twice', 9.120142581909091) ('twice', 7.032057944073236)
('ugli', 9.120142581909091) ('ugli', 7.032057944073236)
51.512376149495445,-0.2335423231124878
2014-12-31T13:00:00.000000000
--------------
('certain', 9.120142581909091) ('certain', 7.20993050887108)
('cheeki', 9.120142581909091) ('cheeki', 6.804465400762917)
('church', 9.120142581909091) ('church', 6.804465400762917)
('cray', 9.120142581909091) ('cray', 7.20993050887108)
('decent', 9.120142581909091) ('decent', 7.20993050887108)
('deliveri', 9.120142581909091) ('deliveri', 6.9222484364192995)
('gmt', 9.120142581909091) ('gmt', 7.055779829043822)
('harrod', 9.120142581909091) ('harrod', 7.209

('gmt', 9.120142581909091) ('gmt', 6.935649768001586)
('gorgeou', 9.120142581909091) ('gorgeou', 7.089800447828844)
('harrod', 9.120142581909091) ('harrod', 6.802118375377063)
('healthi', 9.120142581909091) ('healthi', 6.578974824062853)
('hungri', 9.120142581909091) ('hungri', 7.089800447828844)
('legend', 9.120142581909091) ('legend', 7.089800447828844)
('reunit', 9.120142581909091) ('reunit', 7.089800447828844)
('review', 9.120142581909091) ('review', 7.089800447828844)
('thame', 9.120142581909091) ('thame', 6.483664644258528)
('trafalgar', 9.120142581909091) ('trafalgar', 6.578974824062853)
('yea', 9.120142581909091) ('yea', 7.089800447828844)
('yeni', 9.120142581909091) ('yeni', 7.089800447828844)
51.512376149495445,-0.2335423231124878
2014-12-31T22:00:00.000000000
--------------
('gmt', 9.120142581909091) ('gmt', 5.366278277705742)
51.512376149495445,-0.2335423231124878
2015-01-01T08:00:00.000000000
--------------
('gmt', 9.120142581909091) ('gmt', 6.111987788356544)
('latest', 9

('ell', 9.120142581909091) ('ell', 7.030284538957564)
('gmt', 9.120142581909091) ('gmt', 6.876133859130306)
('hat', 9.120142581909091) ('hat', 6.876133859130306)
('healthi', 9.120142581909091) ('healthi', 6.742602466505783)
('lool', 9.120142581909091) ('lool', 7.030284538957564)
('messi', 9.120142581909091) ('messi', 6.876133859130306)
('piccadilli', 9.120142581909091) ('piccadilli', 6.876133859130306)
('thame', 9.120142581909091) ('thame', 6.624819430849399)
('toast', 9.120142581909091) ('toast', 6.742602466505783)
51.512376149495445,-0.2335423231124878
2015-01-01T11:00:00.000000000
--------------
('airport', 9.120142581909091) ('airport', 7.225877690680423)
('album', 9.120142581909091) ('album', 7.071727010853165)
('chicken', 9.120142581909091) ('chicken', 7.225877690680423)
('crouch', 9.120142581909091) ('crouch', 7.071727010853165)
('donat', 9.120142581909091) ('donat', 5.583649955423331)
('fella', 9.120142581909091) ('fella', 6.938195618228642)
('gmt', 9.120142581909091) ('gmt', 7

('conced', 9.120142581909091) ('conced', 7.44617323131808)
('cost', 9.120142581909091) ('cost', 7.600323911145338)
('crap', 9.120142581909091) ('crap', 7.44617323131808)
('creat', 9.120142581909091) ('creat', 7.312641838693557)
('decent', 9.120142581909091) ('decent', 7.089498287379348)
('defeat', 9.120142581909091) ('defeat', 7.194858803037174)
('equalis', 9.120142581909091) ('equalis', 6.994188107575022)
('everton', 9.120142581909091) ('everton', 6.8271340229118564)
('excus', 9.120142581909091) ('excus', 7.600323911145338)
('explain', 9.120142581909091) ('explain', 7.600323911145338)
('frank', 9.120142581909091) ('frank', 6.133986842351911)
('gmt', 9.120142581909091) ('gmt', 7.44617323131808)
('gooner', 9.120142581909091) ('gooner', 7.600323911145338)
('hall', 9.120142581909091) ('hall', 7.600323911145338)
('heathrow', 9.120142581909091) ('heathrow', 7.600323911145338)
('henri', 9.120142581909091) ('henri', 7.600323911145338)
('hungri', 9.120142581909091) ('hungri', 7.600323911145338

('comeback', 9.120142581909091) ('comeback', 7.571682962382251)
('conced', 9.120142581909091) ('conced', 6.878535781822306)
('consid', 9.120142581909091) ('consid', 7.119697838639194)
('costa', 9.120142581909091) ('costa', 7.4381515697577285)
('dart', 9.120142581909091) ('dart', 7.571682962382251)
('decent', 9.120142581909091) ('decent', 7.725833642209509)
('defeat', 9.120142581909091) ('defeat', 7.119697838639194)
('dem', 9.120142581909091) ('dem', 7.4381515697577285)
('esio', 9.120142581909091) ('esio', 7.4381515697577285)
('excus', 9.120142581909091) ('excus', 7.4381515697577285)
('fabrega', 9.120142581909091) ('fabrega', 6.6272213535414)
('fantasi', 9.120142581909091) ('fantasi', 7.320368534101345)
('fella', 9.120142581909091) ('fella', 7.571682962382251)
('frank', 9.120142581909091) ('frank', 7.571682962382251)
('gea', 9.120142581909091) ('gea', 7.725833642209509)
('gmt', 9.120142581909091) ('gmt', 7.725833642209509)
('gorgeou', 9.120142581909091) ('gorgeou', 7.725833642209509)
('

('twice', 9.120142581909091) ('twice', 7.565264970035361)
('vol', 9.120142581909091) ('vol', 7.565264970035361)
('yaya', 9.120142581909091) ('yaya', 6.584435717023635)
51.512376149495445,-0.2335423231124878
2015-01-01T22:00:00.000000000
--------------
('gmt', 9.120142581909091) ('gmt', 5.74493212836325)
51.512376149495445,-0.2335423231124878
2015-02-01T03:00:00.000000000
--------------
('gmt', 9.120142581909091) ('gmt', 6.159875980425959)
('godfath', 9.120142581909091) ('godfath', 6.314026660253217)
('hungri', 9.120142581909091) ('hungri', 6.314026660253217)
('lool', 9.120142581909091) ('lool', 6.026344587801436)
51.512376149495445,-0.2335423231124878
2015-02-01T02:00:00.000000000
--------------


In [None]:
51.466681162516274,-0.2335423231124878
2014-12-31T09:00:00.000000000
('freez', 9.188272370947859) ('freez', 6.672750365408472)
('gym', 9.188272370947859) ('gym', 6.385068292956691)
('wcw', 9.188272370947859) ('wcw', 6.518599685581214)
('xxxxx', 9.188272370947859) ('xxxxx', 6.672750365408472)

51.466681162516274,-0.2335423231124878
2014-12-31T10:00:00.000000000
('bastard', 9.188272370947859) ('bastard', 6.970411137910149)
('biggest', 9.188272370947859) ('biggest', 6.816260458082891)
('courtesi', 9.188272370947859) ('courtesi', 6.970411137910149)
('dj', 9.188272370947859) ('dj', 6.970411137910149)
('hughton', 9.188272370947859) ('hughton', 6.459585514144159)
('inde', 9.188272370947859) ('inde', 6.816260458082891)
('knighthood', 9.188272370947859) ('knighthood', 6.970411137910149)
('load', 9.188272370947859) ('load', 6.816260458082891)
('murder', 9.188272370947859) ('murder', 6.970411137910149)
('rais', 9.188272370947859) ('rais', 6.970411137910149)
('rate', 9.188272370947859) ('rate', 6.816260458082891)
('stock', 9.188272370947859) ('stock', 6.970411137910149)
('trust', 9.188272370947859) ('trust', 6.682729065458369)
('tune', 9.188272370947859) ('tune', 6.970411137910149)
('women', 9.188272370947859) ('women', 6.054120406035994)

51.466681162516274,-0.2335423231124878
2014-12-31T07:00:00.000000000
('snow', 9.188272370947859) ('snow', 5.608497976080766)

51.466681162516274,-0.2335423231124878
2014-12-31T04:00:00.000000000
('denni', 9.188272370947859) ('denni', 4.998200701669198)

51.466681162516274,-0.2335423231124878
2014-12-31T05:00:00.000000000
('lewisham', 9.188272370947859) ('lewisham', 5.1404857182199555)

51.466681162516274,-0.2335423231124878
2014-12-31T14:00:00.000000000
('condemn', 9.188272370947859) ('condemn', 6.752925310315798)
('democraci', 9.188272370947859) ('democraci', 6.647564794657972)
('disabl', 9.188272370947859) ('disabl', 7.004239738596705)
('express', 9.188272370947859) ('express', 7.004239738596705)
('gym', 9.188272370947859) ('gym', 6.870708345972182)
('heel', 9.188272370947859) ('heel', 7.158390418423963)
('hopkin', 9.188272370947859) ('hopkin', 7.004239738596705)
('howev', 9.188272370947859) ('howev', 7.158390418423963)
('inde', 9.188272370947859) ('inde', 7.158390418423963)
('kati', 9.188272370947859) ('kati', 7.158390418423963)
('motiv', 9.188272370947859) ('motiv', 7.158390418423963)
('nigger', 9.188272370947859) ('nigger', 7.158390418423963)
('panto', 9.188272370947859) ('panto', 7.158390418423963)
('river', 9.188272370947859) ('river', 7.158390418423963)
('shoot', 9.188272370947859) ('shoot', 7.004239738596705)
('smith', 9.188272370947859) ('smith', 7.158390418423963)
('technic', 9.188272370947859) ('technic', 7.158390418423963)
('tribun', 9.188272370947859) ('tribun', 7.158390418423963)
('trust', 9.188272370947859) ('trust', 6.647564794657972)
('truth', 9.188272370947859) ('truth', 6.870708345972182)
('unearth', 9.188272370947859) ('unearth', 7.158390418423963)
('women', 9.188272370947859) ('women', 6.4652432378640174)
('wood', 9.188272370947859) ('wood', 7.158390418423963)

51.466681162516274,-0.2335423231124878
2014-12-31T15:00:00.000000000
('dj', 9.188272370947859) ('dj', 7.196104626857632)
('elect', 9.188272370947859) ('elect', 7.196104626857632)
('flipagram', 9.188272370947859) ('flipagram', 7.196104626857632)
('honest', 9.188272370947859) ('honest', 7.196104626857632)
('inde', 9.188272370947859) ('inde', 7.041953947030374)
('kati', 9.188272370947859) ('kati', 7.196104626857632)
('key', 9.188272370947859) ('key', 6.790639518749468)
('paddington', 9.188272370947859) ('paddington', 6.908422554405851)
('product', 9.188272370947859) ('product', 7.196104626857632)
('quo', 9.188272370947859) ('quo', 7.196104626857632)
('trust', 9.188272370947859) ('trust', 7.196104626857632)

In [150]:
total_count

13905

In [95]:
sorted(key_words.items(), key=lambda x: x[1], reverse=True)

[('abl', 60),
 ('account', 50),
 ('ah', 42),
 ('accept', 39),
 ('age', 28),
 ('acc', 24),
 ('ago', 20),
 ('agre', 20),
 ('adam', 20),
 ('af', 18),
 ('alcohol', 14),
 ('afternoon', 12),
 ('abus', 12),
 ('admit', 12),
 ('abi', 11),
 ('ahh', 9),
 ('abbey', 8),
 ('absolut', 8),
 ('ad', 8),
 ('alright', 8),
 ('around', 8),
 ('alway', 8),
 ('airport', 6),
 ('ac', 5),
 ('across', 5),
 ('achiev', 5),
 ('add', 5),
 ('along', 5),
 ('anyway', 5),
 ('anyth', 5),
 ('allow', 5),
 ('addict', 4),
 ('ador', 4),
 ('anoth', 4),
 ('album', 4),
 ('al', 4),
 ('american', 4),
 ('advic', 3),
 ('anyon', 3),
 ('address', 3),
 ('air', 3),
 ('amen', 3),
 ('alreadi', 3),
 ('aim', 2),
 ('agent', 2),
 ('away', 2),
 ('beauti', 2),
 ('ask', 2),
 ('bed', 2),
 ('allah', 2),
 ('ahead', 2),
 ('amount', 2),
 ('argument', 2),
 ('also', 2),
 ('care', 2),
 ('america', 1),
 ('ahhh', 1),
 ('aint', 1),
 ('andi', 1),
 ('arsen', 1),
 ('awak', 1),
 ('argu', 1),
 ('awkward', 1),
 ('best', 1),
 ('cold', 1),
 ('came', 1)]

In [34]:
# 51.512376149495445,0.24545878171920776
# 2014-12-31T08:00:00.000000000
# ('airport', 5.61512051684126) ('airport', 6.255757752129241)

# 51.512376149495445,0.24545878171920776
# 2015-01-01T14:00:00.000000000
# ('airport', 5.61512051684126) ('airport', 7.383225063715033)

# 51.512376149495445,0.24545878171920776
# 2015-01-01T13:00:00.000000000
# ('airport', 5.61512051684126) ('airport', 7.348847573630395)

# 51.512376149495445,0.24545878171920776
# 2015-01-01T10:00:00.000000000
# ('airport', 5.61512051684126) ('airport', 6.7120812774708964)

# 51.512376149495445,0.24545878171920776
# 2015-01-01T12:00:00.000000000
# ('airport', 5.61512051684126) ('airport', 7.225877690680423)

# 51.512376149495445,0.24545878171920776
# 2015-01-01T17:00:00.000000000
# ('airport', 5.61512051684126) ('airport', 7.324358962381311)

In [55]:
tweets_at_coords = tweets[tweets["lat_long_bin"] == "51.512376149495445,0.24545878171920776"]

tweets_at_coords[tweets_at_coords.parsed_tweets.str.contains("airport")]

Unnamed: 0,id,Name,X,Y,LONGITUDE,LATITUDE,MESSAGEDATE,USERID,USERSCREENNAME,MESSAGETEXT,LOCATION,HASHTAGS,ISRETWEET,long_bin,lat_bin,lat_long_bin,parsed_tweets
16543,550358426539528194,,0.254575,51.55541,0.254575,51.555408,31/12/2014 18:30:48,1563558049,upminster13,What's a dart players favorite airport Heathrow,Romford; London,,0,0.2454587817192077,51.512376149495445,"51.512376149495445,0.24545878171920776",What s a dart players favorite airport Heathrow


In [57]:
tweets[tweets.parsed_tweets.str.contains("airport")]

Unnamed: 0,id,Name,X,Y,LONGITUDE,LATITUDE,MESSAGEDATE,USERID,USERSCREENNAME,MESSAGETEXT,LOCATION,HASHTAGS,ISRETWEET,long_bin,lat_bin,lat_long_bin,parsed_tweets
139,550215342677843968,,-0.487881,51.47168,-0.487881,51.47168,31/12/2014 09:02:14,137175146,obaa_boni,Omg Omg Omg there's this fine as man in the ai...,Hillingdon; London,,0,-0.5209429860115051,51.466681162516274,"51.466681162516274,-0.5209429860115051",Omg Omg Omg there s this fine as man in the ai...
839,550224754838634496,,-0.489448,51.472065,-0.489448,51.472065,31/12/2014 09:39:38,19987346,EmmaKParky,In the airport waiting for my flight and there...,Hillingdon; London,,0,-0.5209429860115051,51.466681162516274,"51.466681162516274,-0.5209429860115051",In the airport waiting for my flight and there...
2895,550230068807892992,,-0.468273,51.47151,-0.468273,51.471512,31/12/2014 10:00:45,137175146,obaa_boni,I can't even buy food. Everything in the Lond...,Hillingdon; London,,0,-0.5209429860115051,51.466681162516274,"51.466681162516274,-0.5209429860115051",I can t even buy food Everything in the London...
3549,550179981163507715,,-0.102889,51.544327,-0.102889,51.544327,31/12/2014 06:41:44,343197788,kickalert,@julesjoseph Fairport Convention at @unionchap...,Islington; London,,0,-0.1377421021461486,51.512376149495445,"51.512376149495445,-0.13774210214614868",Fairport Convention at Mar London UK
3870,550184482389905408,,-0.448606,51.469936,-0.448606,51.469936,31/12/2014 06:59:37,1238600882,hollyms6,@HeathrowAirport thanks for the best airport s...,Hillingdon; London,,0,-0.5209429860115051,51.466681162516274,"51.466681162516274,-0.5209429860115051",thanks for the best airport security experienc...
4471,550198835096862720,,-0.325371,51.516876,-0.325371,51.516876,31/12/2014 07:56:39,351652356,FergusRose,Dreadful drop off experience at Luton airport ...,Ealing; London,,0,-0.3293425440788269,51.512376149495445,"51.512376149495445,-0.3293425440788269",Dreadful drop off experience at Luton airport ...
5203,550186765940969473,,-0.449158,51.471706,-0.449158,51.471706,31/12/2014 07:08:41,103015611,PRCAIngham,There's something rather unsettling about sitt...,Hillingdon; London,,0,-0.5209429860115051,51.466681162516274,"51.466681162516274,-0.5209429860115051",There s something rather unsettling about sitt...
5543,550206924768436225,,-0.457358,51.470493,-0.457358,51.470493,31/12/2014 08:28:47,463712041,BasiC_Reedy,At le airport waiting for the goat @BasiC_Rate...,Hillingdon; London,,0,-0.5209429860115051,51.466681162516274,"51.466681162516274,-0.5209429860115051",At le airport waiting for the goat can t wait ...
5993,550296322532995072,,-0.110786,51.46256,-0.110786,51.462559,31/12/2014 14:24:01,938939070,HomeAffairsPA,Woman; 26; from Haringey; London; arrested at ...,Lambeth; London,Syria,0,-0.1377421021461486,51.42098617553711,"51.42098617553711,-0.13774210214614868",Woman from Haringey London arrested at Luton a...
6773,550295283821993985,,-0.437932,51.479786,-0.437932,51.479786,31/12/2014 14:19:54,72398127,JAZZYCAB,@Rayuny congratulations ! Spain . We in London...,Hillingdon; London,,0,-0.5209429860115051,51.466681162516274,"51.466681162516274,-0.5209429860115051",congratulations Spain We in London need to fig...


In [124]:
for hourly_interval in idf_by_hourly_interval.keys():
    idf = idf_by_hourly_interval[hourly_interval]["sorted_idf"]
    if len([i for i in idf if ((i[1]>7.5))]) > 0:
        print(hourly_interval)
        print(len([i for i in idf if ((i[1]>7.5))])) 
        print("-------------------")

2015-01-01T00:00:00.000000000
262
-------------------
2015-01-01T18:00:00.000000000
245
-------------------
2015-01-01T16:00:00.000000000
175
-------------------
2015-01-01T21:00:00.000000000
243
-------------------
2015-01-01T20:00:00.000000000
149
-------------------
2015-01-01T19:00:00.000000000
279
-------------------
2015-01-01T22:00:00.000000000
158
-------------------


In [69]:
idf_by_hourly_interval[np.datetime64('2015-01-01T00:00:00.000000000')]["sorted_idf"][:1000]

[('ador', 7.809774870621934),
 ('alcohol', 7.809774870621934),
 ('apart', 7.809774870621934),
 ('appreci', 7.809774870621934),
 ('auguri', 7.809774870621934),
 ('awkward', 7.809774870621934),
 ('bath', 7.809774870621934),
 ('begin', 7.809774870621934),
 ('bell', 7.809774870621934),
 ('bon', 7.809774870621934),
 ('book', 7.809774870621934),
 ('bought', 7.809774870621934),
 ('british', 7.809774870621934),
 ('catch', 7.809774870621934),
 ('chanc', 7.809774870621934),
 ('class', 7.809774870621934),
 ('clear', 7.809774870621934),
 ('cross', 7.809774870621934),
 ('cup', 7.809774870621934),
 ('current', 7.809774870621934),
 ('david', 7.809774870621934),
 ('delight', 7.809774870621934),
 ('deserv', 7.809774870621934),
 ('di', 7.809774870621934),
 ('dinner', 7.809774870621934),
 ('ed', 7.809774870621934),
 ('either', 7.809774870621934),
 ('emot', 7.809774870621934),
 ('er', 7.809774870621934),
 ('es', 7.809774870621934),
 ('especi', 7.809774870621934),
 ('eu', 7.809774870621934),
 ('everywher',

In [113]:
for lat_long_bin in idf_by_lat_long_bin.keys():
    print(lat_long_bin)
    idf = idf_by_lat_long_bin[lat_long_bin]["sorted_idf"]
#     print([i for i in idf if ((i[1]>8) and (i[1]<9))])
    print(len([i for i in idf if ((i[1]>9))]))
    print("-------------------")

51.375291188557945,-0.3293425440788269
0
-------------------
51.466681162516274,-0.2335423231124878
4965
-------------------
51.329596201578774,-0.04194188117980957
0
-------------------
51.512376149495445,-0.04194188117980957
0
-------------------
51.55807113647461,0.14965856075286865
0
-------------------
51.649461110432945,-0.3293425440788269
0
-------------------
51.603766123453774,-0.425142765045166
0
-------------------
51.466681162516274,-0.13774210214614868
7544
-------------------
51.512376149495445,-0.13774210214614868
7342
-------------------
51.603766123453774,0.14965856075286865
0
-------------------
51.329596201578774,-0.13774210214614868
0
-------------------
51.42098617553711,-0.2335423231124878
0
-------------------
51.512376149495445,-0.2335423231124878
4747
-------------------
51.42098617553711,0.05385833978652954
0
-------------------
51.42098617553711,-0.3293425440788269
0
-------------------
51.512376149495445,-0.5209429860115051
0
-------------------
51.603766123

In [116]:
# Top temporal key words by IDF (where IDF > 9)
# 51.466681162516274,-0.2335423231124878 - Roehampton
# 51.466681162516274,-0.13774210214614868 - Clapham
# 51.512376149495445,-0.13774210214614868 - Soho
# 51.512376149495445,-0.2335423231124878 - White City

In [110]:
tweets[tweets["parsed_tweets"].str.contains("brunch")]

Unnamed: 0,id,Name,X,Y,LONGITUDE,LATITUDE,MESSAGEDATE,USERID,USERSCREENNAME,MESSAGETEXT,LOCATION,HASHTAGS,ISRETWEET,datetime,datetime_to_nearest_hour,parsed_tweets
4657,550203315926368256,,-0.207275,51.477604,-0.207275,51.477604,31/12/2014 08:14:27,46715362,ChelseaBoy007,Feeling the cold? Why not enjoy brunch from 10...,Hammersmith; London,parsonsgreen,0,2014-12-31 08:14:27,2014-12-31 08:00:00,Feeling the cold Why not enjoy brunch from tod...
6146,550293222296403968,,-0.128225,51.51687,-0.128225,51.516869,31/12/2014 14:11:42,559336525,AndreeaGB_,@b1305catalina brunch time #shoppingtime @ Joe...,Camden Town; London,shoppingtime,0,2014-12-31 14:11:42,2014-12-31 14:00:00,brunch time Joe the Juice New Oxford Street
6656,550302233418817538,,-0.123593,51.51277,-0.123593,51.512772,31/12/2014 14:47:31,397526492,samrogers30,@KineticEcstasy having a well needed drink aft...,City of London; London,,0,2014-12-31 14:47:31,2014-12-31 14:00:00,having a well needed drink after that brunch L...
7089,550304971309469697,,-0.081878,51.527348,-0.081878,51.527348,31/12/2014 14:58:23,324909172,sarry_baby,NYE brunch with my ride or die @hannahapilates...,Hackney; London,,0,2014-12-31 14:58:23,2014-12-31 14:00:00,NYE brunch with my ride or die Bill s Restaura...
7990,550294264887128065,,-0.080705,51.4197,-0.080705,51.419701,31/12/2014 14:15:51,1354302756,JoannasRest,@SimplyJenkins No brunch I'm afraid. We open t...,Croydon; London,,0,2014-12-31 14:15:51,2014-12-31 14:00:00,No brunch I m afraid We open tomorrow at midda...
10614,550247383922380801,,-0.14125,51.49244,-0.14125,51.492439,31/12/2014 11:09:34,141422165,mac_kevin84,lapaq kronik !! jom brunch before explore city...,London; England,,0,2014-12-31 11:09:34,2014-12-31 11:00:00,lapaq kronik jom brunch before explore city let s
11362,550255914318848002,,-0.24093,51.346172,-0.24093,51.346172,31/12/2014 11:43:27,32602294,SassyMusacchio_,New Years Eve brunch,Epsom; England,,0,2014-12-31 11:43:27,2014-12-31 11:00:00,New Years Eve brunch
11993,550239410156216320,,-0.06523,51.498035,-0.06523,51.498035,31/12/2014 10:37:52,2932095629,see_us_hear_us,Time for a quick DiscoPump @TheGymGroup then b...,Camberwell; London,,0,2014-12-31 10:37:52,2014-12-31 10:00:00,Time for a quick DiscoPump then brunch then re...
18690,550319522000166912,,-0.085692,51.55176,-0.085692,51.551762,31/12/2014 15:56:13,19401472,YepStephenBrown,Boozy brunch and then a few in the pub. My res...,Islington; London,,0,2014-12-31 15:56:13,2014-12-31 15:00:00,Boozy brunch and then a few in the pub My reso...
20379,550319122417197056,,-0.207234,51.477562,-0.207234,51.477562,31/12/2014 15:54:37,46715362,ChelseaBoy007,Why not begin the year 2015 in style British b...,Hammersmith; London,parsonsgreen fulham,0,2014-12-31 15:54:37,2014-12-31 15:00:00,Why not begin the year in style British brunch...
