In [58]:
# -*- coding: <utf-8> -*-
import urllib
import urllib.request as request
import re
import html
import sys, os
sys.path.append("C:\\Program Files\\Anaconda3\\envs\\tensorflow\\lib\\site-packages")
import tweepy

from collections import deque

urlstr = "https://uk-air.defra.gov.uk/latest/currentlevels?view=site#L"
shorturlstr = "https://goo.gl/ZpELjS"

urlWHO = "http://apps.who.int/iris/bitstream/10665/69477/1/WHO_SDE_PHE_OEH_06.02_eng.pdf"

sitename = b'Liverpool'

mgm3 = '\u03BCgm\u207B\u00B3'
O3, NO2, SO2, PM25, PM100 = "O\u2083", "NO\u2082", "SO\u2082", "PM\u2082\u2085", "PM\u2081\u2080\u2080"
guides = {O3:100, NO2:200, SO2:20, PM25:25, PM100:50} # source: http://apps.who.int/iris/bitstream/10665/69477/1/WHO_SDE_PHE_OEH_06.02_eng.pdf  
meansWHO = {O3:'8-hour', NO2:'1-hour', SO2:'10-minute', PM25:'24-hour', PM100:'24-hour'}
meansDEFRA = {O3:'8-hour', NO2:'1-hour', SO2:'max 15-min', PM25:'24-hour', PM100:'24-hour'}


def tweet(status, replyto=None):
    if not status:
        return
    consumer_key, consumer_secret, access_token, access_token_secret = pickle.load(open("apikeys.bin", "rb"))    

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)
    print(api.me().name)
    print(status)
    print(len(status))
    print(api.me().name)
    if replyto:
        stat = api.update_status(status=status, in_reply_to_status_id=replyto.id)
    else:
        stat = api.update_status(status=status)
    return stat
    
def compose(day, clock, reading):    
    status = ["%s, %s (%s)" % (day, clock, mgm3)]
    skeys = list(reading.keys())
    skeys.sort()
    for k in skeys:
        status.append("%s: %.0f %s" % (k, reading[k][0], reading[k][1]))
    status.append("%s" % shorturlstr)
    status = '\n'.join(status)
    return status

def composeAboveTweet(day, clock, above):
    status = []
    for k in above:
        # count hours above
        print("In composeAboveTweet", k, above[k])
        lday, lclock, lvalue = above[k][0]
        if lday == day and lclock == clock:
            stat = []
            # count hours above
            nhours = 1
            for lday, lclock, lvalue in above[k][1:]:
                if int(lclock[:lclock.index(':')]) + nhours == int(clock[:clock.index(':')]):
                    nhours += 1
                else:
                    break
            stat.append("@lpoolcouncil @DefraUKAir: %s above @WHO guide of %.0f%s (%s-mean) (%s) for %d hours (%s)" % 
                        (k, guides[k], mgm3, meansWHO[k], urlWHO, nhours, shorturlstr))
            stat.append("#AirPollution #Liverpool")
            if meansWHO[k] != meansDEFRA[k]:
                stat.append("(Note #DEFRA data is %s-mean)" % meansDEFRA[k])            
            status.append('\n'.join(stat))
    return status
        


def scrape():
    f = request.urlopen(urlstr)

    r = f.read()
    g = re.search(b".*<tr>.*(%s.*?)</tr>" % sitename, r, re.DOTALL)
    #print(g.group(1))

    # split into <td></td>
    row = g.group(1)
    #print("row = %s\n" % row)

    # date and time
    dategroups = re.search(b".*<td>(.*?)<br.*?>(.*?)</td>", row, re.DOTALL)
    day = dategroups.group(1).decode("utf-8")
    clock = dategroups.group(2).decode("utf-8")


    # data
    cols = re.findall(b"<span.*?>(.*?)</span>", row, re.DOTALL)
    assert len(cols) == 5
    units = [O3, NO2, SO2, PM25, PM100]
    datanums = []
    for v in cols:
        if b' ' in v:
            value = float(v[:v.index(b' ')])
        else:
            value = float(v[:v.index(b'&')])
        nv = v.replace(b'&nbsp;', b' ')
        ix = re.match(b".*?(\(.*?\))", nv).group(1)
        datanums.append((value, ix.decode("utf-8")))

    reading = dict(zip(units, datanums))
    return day, clock, reading

def loadReadings():
    fall = "allreadings.bin"
    allreadings = deque()
    if os.path.isfile(fall):
        allreadings = pickle.load(open(fall, "rb"))
    return allreadings

def pickleReadings(allreading):
    fall = "allreadings.bin"
    pickle.dump(allreadings, open(fall, "wb"))
    
def compareWHO(allreadings):
    above = {}
    for (day, clock, reading) in allreadings:
        for k in guides:
            if reading[k][0] > guides[k]:
                if k not in above:
                    above[k] = []
                above[k].append((day,clock, reading[k][0]))
    return above


debug = False

if debug:
    stat = tweet("TESTTEST")
    print(stat.id)
    tweet("In reply to: TEST3", stat)

else:
    day, clock, reading = scrape()
    status = compose(day, clock, reading)
    stat = tweet(status)

    allreadings = loadReadings()
    allreadings.appendleft((day, clock, reading))
    pickleReadings(allreadings)

    # compare with WHO recommendations
    r = compareWHO(allreadings)
    if r:
        stats = composeAboveTweet(day, clock, r)
        for s in stats:
            tweet(s, replyto=stat)




Liverpool Speke Air
TESTTEST
8
Liverpool Speke Air
824277255526486016
Liverpool Speke Air
In reply to: TEST3
18
Liverpool Speke Air
