# Final Year Project: 

## Analysing drug use in the UK through Twitter

In [None]:
from pymongo import MongoClient
from collections import Counter

import gmaps.datasets
import pprint

# connects to local MongoDB 
client = MongoClient()
db = client["twitter_db"]
collection = db.twitter_collection


# Lists for holding tweets 
tweets = []
filteredTweets = []
newleyFilteredTweets = []

#### A) This block ignores tweets without coordinate data & place data ( 1 mil approx tweets ) (Run A OR B NOT both)

In [None]:
# iterates through mongoDB collection, adding a dictionary object to a list
for obj in collection.find():
    if obj['text'] is None:
        continue
    if obj['geo'] is None:
        continue
    if obj['place'] is None:
        continue
    tweet = {}
    tweet['text'] = obj['text']
    tweet['geo'] = obj['geo']['coordinates']
    tweet['place'] = obj['place']['name']
    tweets.append(tweet)

#### B) This block ignores tweets without place data, and ignores coordinate data completely ( 12 mil approx tweets )

In [None]:
# iterates through mongoDB collection, adding a dictionary object to a list
for obj in collection.find():
    if obj['text'] is None:
        continue
    if obj['place'] is None:
        continue
    tweet = {}
    tweet['text'] = obj['text']
    tweet['place'] = obj['place']['name']
    tweets.append(tweet)

#### A) Only run this block for visualisation ( not required for operation of application)

In [None]:
coord = []
# convert dictionary entries to array 
for tweet in tweets:
    coord.append(tweet['geo'])
    
gmaps.configure(api_key="AIzaSyADv13vyns8lTpdjwoxMwYL3Q0k2Eqoyno")
locations = coord
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations))
fig

### This block filters for drug related tweets

In [None]:
print('Amount of tweets before filtering: ' + str(len(tweets)))

newleyFilteredTweets = []
# collection of keywords split by drug.
keywordWeed = ['weed', 'marijuana', 'stoned', 'blazed', '420', 'mary jane','devil\'s lettuce', 'devils lettuce', 'dank',
               'doobie', 'kush']
keywordCocaine = ['cocaine','charlie', 'blow', '8 ball', 'nose candy', 'white powder', 'snow white', 'yayo']
keywordEcstasy = ['ecstasy','molly','mandy',' rolling','mdma', 'XTC','xtc','e pill','rave']
keywordAmphetamines = ['whizz', 'sulph', 'paste' , 'billy' , 'base', 'uppers']
keywordHallucinogens = ['lsd', 'LSD',' shrooms', 'magic mushrooms', 'liberty cap', 'mushies', 'acid', 'blotter']

keywordAll = keywordWeed + keywordCocaine + keywordEcstasy + keywordAmphetamines + keywordHallucinogens

# To change the filter used, assign keyword to a different list above. 
keywords = keywordWeed

for tweet in tweets:
    if any(word in tweet['text'] for word in keywords):
        filteredTweets.append(tweet)
print('Amount of tweets matching keywords: ' + str(len(filteredTweets)))

### Second round of filtering to remove false positives

In [None]:
keywordsR = ['garden', 'Garden', 'boot', 'weeds', 'model', 'education', 'weather', 'Weather', 'tweed', 'Tweed', 'weeding',
             'seaweed', 'Seaweed', 'Stigmabase', 'travel', 'Travel', 'nutrition', 'based', 'gravel']
# list comprehension to make sure no skips in the iterations
for tweet in filteredTweets:
    if not any(word in tweet['text'] for word in keywordsR):
        newleyFilteredTweets.append(tweet)
filteredTweets = newleyFilteredTweets

print('Amount of tweets after additional filtering: ' + str(len(filteredTweets)))

del(newleyFilteredTweets)


### B) counts all places and lists them

In [None]:
# below is just to get a list of places for categorisation
places = []
for tweet in filteredTweets:
    places.append(tweet['place'])
    # print(tweet['text'] + " ( " + tweet['place'] + ' )')
pprint.pprint(Counter(places))    

### A) HeatMap block

In [None]:
coord = []
# convert dictionary entries to array 
for tweet in filteredTweets:
    coord.append(tweet['geo'])
    
gmaps.configure(api_key="AIzaSyADv13vyns8lTpdjwoxMwYL3Q0k2Eqoyno")
locations = coord
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations))
fig

### B) Place counter

In [None]:
northEast = ['County Durham', 'Darlington', 'Gateshead', 'Hartlepool', 'Middlesbrough', 'Newcastle', 'North Tyneside', 
             'Northumberland', 'Redcar and Cleveland', 'South Tyneside', 'Stockton-on-Tees', 'Sunderland', 'North East']

northWest = ['Blackburn with Darwen', 'Blackpool', 'Bolton' ,'Bury', 'Cheshire East', 'Cheshire West', 'Chester', 'Cumbria',
             'Halton', 'Knowsley', 'Lancashire', 'Liverpool', 'Manchester', 'Oldham', 'Rochdale', 'Salford', 'Sefton', 
             'St. Helens', 'Stockport', 'Tameside', 'Trafford', 'Warrington', 'Wigan', 'Wirral', 'North West',
             'Ashton-under-Lyne']

yorkshireAndTheHumber = ['Barnsley', 'Bradford' , 'Calderdale', 'Doncaster', 'East Riding of Yorkshire', 'Kingston upon Hull',
                         'Kirklees', 'Leeds', 'North East', 'Lincolnshire', 'Lincolnshire North', 'Yorkshire', 
                         'Rotherham', 'Sheffield', 'Wakefield', 'York']

eastMidlands = ['Derby', 'Derbyshire', 'Leicester', 'Leicestershire', 'Lincoln', 'Northampton', 'Nottingham', 
                'Nottinghamshire', 'Rutland', 'East Midlands', 'Loughborough']

westMidlands = ['Birmingham', 'Coventry', 'Dudley', 'Herefordshire', 'Sandwell', 'Shropshire', 'Solihull', 'Staffordshire', 
                'Stoke-on-Trent', 'Telford', 'Wrekin', 'Walsall', 'Warwickshire', 'Wolverhampton', 'Worcestershire', 'Worcester',
                'West Midlands', 'Stoke']

eastOfEngland = ['Bedford', 'East', 'Cambridgeshire', 'Bedfordshire', 'Essex', 'Norwich', 'Cambridge',
                 'Hertfordshire', 'Luton', 'Norfolk', 'Peterborough', 'Hertford', 'Bedford',
                 'Southend-on-Sea', 'Suffolk Thurrock']

London = ['Gravesend', 'Grays', 'Barking', 'Dagenham', 'Barnet', 'Bexley', 'Brent', 'Bromley', 'Camden Town', 'London',
          'Croydon', 'Ealing', 'Enfield', 'Greenwich', 'Hackney', 'Hammersmith', 'Fulham', 'Haringey', 'Harrow', 'Havering', 
          'Hillingdon', 'Hounslow', 'Islington', 'Kensington', 'Chelsea', 'Kingston upon Thames', 'Walthamstow',
          'Lambeth', 'Lewisham', 'Merton', 'Newham', 'Poplar', 'Paddington', 'Camberwell', 'Tottenham',
          'Redbridge', 'Richmond', 'Southwark', 'Sutton', 'Tower Hamlets', 'Waltham Forest', 'Wandsworth', 
          'Westminster']

southEast = ['South East', 'Bracknell Forest', 'Brighton', 'Hove', 'Buckingham', 'Sussex', 'Hampshire', 
             'Isle of Wight', 'Kent', 'Worthing',
             'Medway', 'Milton Keynes', 'Oxford', 'Portsmouth', 'Reading', 'Slough', 'Southampton', 'Surrey', 
             'Berkshire', 'Sussex', 'Windsor', 'Maidenhead', 'Wokingham']

southWest = ['South West', 'Bath', 'Somerset', 'Bournemouth', 'Bristol', 'Cornwall', 'Devon', 'Dorset', 
             'Gloucestershire', 'Isles of Scilly', 'Exeter',
             'Plymouth', 'Poole', 'Swindon', 'Torbay', 'Wiltshire']

Ireland = ['Dublin', 'Ireland', 'Belfast', 'Fingal','Northern Ireland', 'South Dublin','Dun Laoghaire-Rathdown']
Scotland = ['Glasgow','Scotland']
Wales = ['Wales', 'Cardiff', 'Swansea']
           
northEastCount = 0
northWestCount = 0
yorkshireAndTheHumberCount = 0
eastMidlandsCount = 0
westMidlandsCount = 0
eastOfEnglandCount = 0
LondonCount = 0
southEastCount = 0
southWestCount = 0
irelandCount = 0
scotlandCount = 0
walesCount = 0

for tweet in filteredTweets:
    if any (word in tweet['place'] for word in northEast):
        northEastCount += 1 
    elif any (word in tweet['place'] for word in northWest):
        northWestCount += 1
    elif any (word in tweet['place'] for word in yorkshireAndTheHumber):
        yorkshireAndTheHumberCount += 1
    elif any (word in tweet['place'] for word in eastMidlands):
        eastMidlandsCount += 1
    elif any (word in tweet['place'] for word in westMidlands):
        westMidlandsCount += 1
    elif any (word in tweet['place'] for word in eastOfEngland):
        eastOfEnglandCount += 1
    elif any (word in tweet['place'] for word in London):
        LondonCount += 1
    elif any (word in tweet['place'] for word in southEast):
        southEastCount += 1        
    elif any (word in tweet['place'] for word in southWest):
        southWestCount += 1
    elif any (word in tweet['place'] for word in Ireland):
        irelandCount += 1
    elif any (word in tweet['place'] for word in Scotland):
        scotlandCount += 1
    elif any (word in tweet['place'] for word in Wales):
        walesCount += 1
        

allCount = northEastCount + northWestCount + yorkshireAndTheHumberCount + eastMidlandsCount + westMidlandsCount + eastOfEnglandCount + LondonCount + southEastCount + southWestCount + irelandCount + scotlandCount + walesCount

print('North East:', northEastCount)
print('North West:', northWestCount)
print('Yorkshire and the Humber:', yorkshireAndTheHumberCount)
print('East Midlands:', eastMidlandsCount)
print('West Midlands:', westMidlandsCount)
print('East Of England:', eastOfEnglandCount)
print('London:', LondonCount)
print('South East:', southEastCount)
print('South West:', southWestCount)
print('Ireland:', irelandCount)
print('Scotland:', scotlandCount)
print('Wales:', walesCount)
print('All:', allCount)