## Alberta Education Learning Outcomes: [learnalberta.ca](http://www.learnalberta.ca/ProgramsOfStudy.aspx)

**Disclaimer**: this notebook is under development. Its inner workings will not make sense.

_Assembled By Eric Easthope_

$\unicode{xA9}$ _MIT License_

---

In [1]:
# point to url
url = 'http://www.learnalberta.ca/ProgramsOfStudy.aspx'

from requests import get, utils
from bs4 import BeautifulSoup as bs
from bs4.dammit import EncodingDetector

# check that request to url is okay
r = get(url); r.ok

True

In [2]:
# sort out which encoding to use
httpEncoding = r.encoding if 'charset' in r.headers.get('content-type', '').lower() else None
htmlEncoding = EncodingDetector.find_declared_encoding(r.content, is_html=True)
encoding = htmlEncoding or httpEncoding; encoding

'utf-8'

In [3]:
# begin parsing HTML
soup = bs(r.content, "lxml", from_encoding=encoding)
searchFields = [i.find_next() for i in soup.find_all('strong')]

searchOptions = [i.find_next()
                  .find_next()
                  .find_next()
                  .find_all("option")[1:]
                     for i in soup.find_all('strong')
                ]
fieldsOptionsReference = dict(zip(searchFields, searchOptions))

# print program fields
for field, option in fieldsOptionsReference.items():
    print(field.text)
    for opt in option:
        print('\t', opt.text)

Core Programs
	 English Language Arts
	 Fine Arts
	 Health / Career & Life Management
	 Mathematics
	 Physical Education
	 Science
	 Social Studies
Complementary Programs
	 Aboriginal Studies
	 CTF
	 CTS: Apprenticeship
	 CTS: Business, Administration, Finance & Information Technology (BIT)
	 CTS: Career Transitions (CTR)
	 CTS: Health, Recreation & Human Services (HRH)
	 CTS: Media, Design & Communication Arts (MDC)
	 CTS: Natural Resources (NAT)
	 CTS: Trades, Manufacturing & Transportation (TMT)
	 English as a Second Language
	 Fine Arts
	 First Nations, Métis and Inuit (FNMI) Languages
	 French as a Second Language (FSL)
	 International Languages
	 Knowledge and Employability Occupational Courses


---

In [4]:
topic = 'Mathematics'

In [5]:
# parse  corresponding page HTML for a field
def soupForField(field):
    params = {
        "lang": "en",
        "posLang": "en",
        "Core": field
    }
    
    r = get(url, params=params)
    httpEncoding = r.encoding if 'charset' in r.headers.get('content-type', '').lower() else None
    htmlEncoding = EncodingDetector.find_declared_encoding(r.content, is_html=True)
    encoding = htmlEncoding or httpEncoding; encoding
    return bs(r.content, "lxml", from_encoding=encoding)

# parse corresponding page HTML for a program
def soupForProgram(programId):
    import json
    url = 'http://www.learnalberta.ca/ProgramOfStudy.aspx'
    params = {
        "lang": "en",
        "ProgramId": programId
    }
    r = get(url, params=params)
    httpEncoding = r.encoding if 'charset' in r.headers.get('content-type', '').lower() else None
    htmlEncoding = EncodingDetector.find_declared_encoding(r.content, is_html=True)
    encoding = htmlEncoding or httpEncoding; encoding
    return bs(r.content, "html5lib", from_encoding=encoding)

In [6]:
# find programs
programs = soupForField(topic).find_all('a', href=lambda href: href and "ProgramOfStudy" in href)
programReference = {}
for p in programs:
    href = p['href']
    params = dict(q.split('=') for q in utils.urlparse(href).query.split('&'))
    programReference[params['ProgramId']] = p.text.strip()
programReference

{'174398': 'Mathematics (K & E) 10-4, 20-4',
 '26061': 'Mathematics Kindergarten to Grade 9 (2007, Updated 2016)',
 '348234': 'Mathematics Grade 10 - 12',
 '432948': 'Mathematics 31',
 '447048': 'Mathematics (K & E) Grade 8 - 9'}

In [7]:
# specify ID as above
programId = "348234"

# specify course code
course = '20-1'

In [8]:
# find headers for course code
programData = soupForProgram(programId)
rationaleSections = programData.find_all('div', class_='title')
for i, r in enumerate(rationaleSections):
    if course in r.text:
        print(i, ':', r.text)

29 : 20-1 
30 : Algebra & Number (20-1)
37 : Trigonometry (20-1)
41 : Relations & Functions (20-1)


---

In [9]:
def removeBrackets(string):
    start = string.find( '(' )
    end = string.find( ')' )
    if start != -1 and end != -1:
        return (string[:start] + string[end+1:]).strip();
    
def getSpecificOutcomes(generalOutcome):
    specificOutcomes = generalOutcome.find_all('div', class_='formalp')
    specificOutcomeData = []
    for outcome in specificOutcomes:
        outcomeList = []
        # get enumeration for specific outcome
        outcomeNumber = ''.join(c for c in outcome.find('div').text if c.isdigit())

        # get specific outcome description
        description = outcome.find('p').text.strip().replace('\r', ' ').replace(':', '').replace('.', '').replace('\n', ' ')
        descriptionEnding = outcome.find(text=True, recursive=False)

        # if they exist, distinguish bullet points
        bullets = []
        bulletList = outcome.find('ul')
        if bulletList is not None:
            for b in bulletList.find_all('li'):
                bullets.append(b.text.replace('.', ''))

        bulletDescriptions = [description+' '+b+' '+descriptionEnding.strip().replace('.', '') for b in bullets]

        # learning processes: "critical aspects of learning, doing and understanding"
        processes = [a.text for a in outcome.find_next_sibling().find_all('a')]
        if len(processes) == 0:
            processes = [a.text for a in outcome.find_next_sibling().find_next_sibling().find_all('a')]

        fullDescription = description + (descriptionEnding if descriptionEnding is not None else '')
        if len(bulletDescriptions) != 0:
            for description in bulletDescriptions:
                outcomeList.append(description.strip())
        else:
            outcomeList.append(fullDescription)
        specificOutcomeData.append({
            'name': 'Specific Outcome'+' '+outcomeNumber,
            'outcomes': outcomeList,
            'processes': processes
        })
    return specificOutcomeData;

def getOutcomesByIndex(index):
    outcomes = []
    generalOutcome = rationaleSections[index].find_next_sibling().parent
    outcomeTitleText = generalOutcome.find('div', class_='title').text # first title instance
    outcomeTitle = removeBrackets(outcomeTitleText)

    outcomeDescriptionText = generalOutcome.find('div', class_='subtitle').text # first subtitle instance
    colonIndex = outcomeDescriptionText.find(':')
    outcomeDescription = outcomeDescriptionText[colonIndex+1:].replace('.', '').strip()

    outcomeCode = ''.join(c for c in outcomeTitle if c.isupper())
    identifier = topic[:2].upper()+course+'.'+outcomeCode
    
    # append general outcome
    outcomes.append([identifier, ' - '.join([outcomeTitle, outcomeDescription]), []])
    
    for i, outcome in enumerate(getSpecificOutcomes(generalOutcome)):
        for j, o in enumerate(outcome['outcomes']):
            if len(outcome['outcomes']) > 1:
                outcomes.append([identifier + str(i+1) + chr(ord('A') + j), o, outcome['processes']])
            else:
                outcomes.append([identifier + str(i+1), o, outcome['processes']])
    return outcomes;

In [10]:
outcomeGroups = [(i, ''.join(c for c in removeBrackets(r.text) if c.isupper()))
                 for (i, r) in enumerate(rationaleSections) if course in r.text and '(' in r.text]

def groupByOutcomeGroup(outcomeGroups):
    nodes = []; edges = []
    outcomeProcesses = {}
    for i, (sectionIndex, group) in enumerate(outcomeGroups):
        outcomes = getOutcomesByIndex(sectionIndex)
        for outcome in outcomes:
            for p in outcome[2]:
                try:
                    outcomeProcesses[p].append(outcome[0])
                except KeyError:
                    outcomeProcesses[p] = []
                    outcomeProcesses[p].append(outcome[0])

        for outcome in outcomes:
            nodes.append({
                'id': outcome[0],
                'description': outcome[1],
                'group': i,
                'processes': outcome[2],
                'r': 16 if outcome[0].split('.')[1] == group else 4
            })
            # cluster nodes by group
            if outcome[0].split('.')[1] != group:
                edges.append({
                    'source': topic[:2].upper()+course+'.'+group,
                    'target': outcome[0],
                    'value': 1
                })

    # match by processes
    from itertools import permutations
    for process, identifiers in outcomeProcesses.items():
        pairings = list(permutations(identifiers, 2))
        for p in pairings:
            edges.append({
                        'source': p[0],
                        'target': p[1],
                        'value': 1/50
                    })
    return nodes, edges

In [11]:
# generate node and edge data for graph
nodes, edges = groupByOutcomeGroup(outcomeGroups)

---

In [12]:
#requires pip install packyou
from packyou.github.callysto.nbplus import d3graph
from d3graph import D3, SVG, Graph

In [13]:
# groupByOutcomeGroup
forces = {
    'forceX': {
        'x': 'd => d.r > 12 ? (height/3)*Math.cos(d.group * 2*Math.PI/d3.max(nodes, d => d.group+1)) : 0',
        'strength': '1/5'
    },
    'forceY': {
        'y': 'd => d.r > 12 ? (height/3)*Math.sin(d.group * 2*Math.PI/d3.max(nodes, d => d.group+1)) : 0',
        'strength': '1/5'
    },
    'forceRadial': {
        'name': 'r',
        'radius': 'd => height/3',
        'strength': '1/5'
    },
    'forceLink': {
        'id': 'd => d.id',
        'strength': 'd => 0'
    },
    'forceCollide': {
        'radius': 'd => d.r'
    }
}

In [14]:
nodeView = {
    'r': 'd => d.r',
    'fill':  ('d => d.r < 12 ?'
              ' d3.schemeCategory20[1+d.group*2] :'
              ' d3.schemeCategory20[d.group*2]'),
    'stroke': '\"#fff\"',
    'title': 'd => d.id + "\ : \" + d.description'
}

In [15]:
edgeView = {
    'stroke-width': 'd => d.value',
    'stroke': 'd => d.value < 1 ? "\#111\" : \"#e9e9e9\"',
}

In [16]:
svg = SVG(height=500, centerOrigin=True)

G = Graph()
G.addForce(**forces)
G.addNode(nodes, style=nodeView)
G.addEdge(edges, style=edgeView)
G.addTime()

(svg+G).make()

<IPython.core.display.Javascript object>

---