# When is good reader

In [92]:
import pandas as pd
import datetime
import os
import io

In [94]:
class Reader():
    """Reader for these silly html files."""
    
    def __init__(self, filename):
        self.filename = filename
        f = open(filename,"r",encoding='utf-8');
        self.text = f.readlines()
        # text is a list of strings, each element - a row of the original html.
        f.close()
        self.row = 0
        self.point = 0
        
    def _find_snip(self, snip, after=0):
        """Finds a fragment in the text."""
        if after>=len(self.text):
            return (None,None)
        for i in range(after, len(self.text)):
            out = self.text[i].find(snip)
            if out != - 1:
                return (i,out)
        return (None,None)
    
    def goto(self, row, point=0):
        self.row = row
        self.point = point
        
    def reset(self):
        self.goto(0,0)
        
    def seek(self, snip, move=True):
        row,point = self._find_snip(snip, after=self.row)
        if row is None:
            #print('End of file')
            return None
        if move:
            self.row = row
            self.point = point
        return (row, point)
        
    def say(self, shift, length):
        k = min(self.point+shift+length, len(self.text[self.row]))
        out = self.text[self.row][(self.point+shift):k]
        #print(out)
        return out
        
    def skiprow(self):
        self.row += 1
        self.point = 0
        
    def sift(self, snip, shift, length, maxsteps=None):
        """Look for a snip, read fixed length at a fixed distance."""
        count = 0
        out = []
        while True:
            count += 1
            res = self.seek(snip)
            if res is None:
                break
            if maxsteps is not None and count>maxsteps:
                break
            out.append(self.say(shift, length))
            self.skiprow()
        return out
    
    def double_sift(self, snip1, snip2, pad1=0, pad2=0):
        """Use two snips as brackets, and pad with pads."""
        out = []
        while True:
            res = self.seek(snip1, move=True)
            if res is None:
                break
            res = self.seek(snip2, move=False)
            if res is None:
                break
            if res[0]!=self.row:
                print('No snip2 within current row')
                continue
            out.append(self.say(pad1, res[1]-self.point-pad2-pad1))
            self.skiprow()
        return out
                
    
# Test    
reader = Reader('../../source.html')
reader.reset()
slot_ids = reader.sift('ResultsPopup?', 53, 13)
len(slot_ids)

156

In [95]:
# Slot ids:
reader.reset()
slots = reader.sift('ResultsPopup?', 53, 13)
len(slot_ids)

156

In [97]:
# Names:
reader.reset()
names = reader.double_sift('.name =', ';\n',9,1)
names = [s.strip() for s in names]
len(names)

27

In [98]:
# Availability, encoded
reader.reset()
temp = reader.double_sift('myCanDos ', 'split', 12, 2)
cando = [a.split(',') for a in temp]

In [None]:
# Put it all in a long-form pandas dataframe
TOTAL_DAYS_ASKED = 12 # How many days were there in the table

df = pd.DataFrame(columns=['Name','Day','Time','Cando'])
for iperson in range(len(names)):
    name = names[iperson]
    print(name)
    time = 8
    day = 0
    for slot in slots:
        if day>=TOTAL_DAYS_ASKED:
            day = 0
            time += 1
        df = df.append({'Name': name, 'Day': day, 'Time': time, 'Cando': slot in cando[iperson]}
                 , ignore_index=True)
        day += 1

In [100]:
df.to_csv('availability.csv', index=False)