In [2]:
import pathlib
import os
import problem_bank_scripts as pbs
import altair as alt
import pandas as pd
import yaml
import urllib.parse
from collections import Counter



In [3]:
source_root = '../content/public/'

## Analytics

questions = []
for root, dirs, files in os.walk(source_root):
    for file in files:
        if(file.endswith(".md")):
            questions.append(os.path.join(root,file))
            
o_topics = [[q.split('../content/public/')[1].split('/')[0].split('.')[1],
             q.split('../content/public/')[1].split('/')[0].replace('.','-')] for q in questions]

df_nice = pd.DataFrame(o_topics,columns=['Topic','Nice Topics']
                      ).drop_duplicates().reset_index(drop=True).sort_values(by='Nice Topics',axis=0)
question_dict = {}

topics = []

for i,q in enumerate(questions):

    try: 
        mdtext = pathlib.Path(q).read_text(encoding='utf8')

        # Deal with YAML header
        header_text = mdtext.rsplit('---\n')[1]
        header = yaml.safe_load('---\n' + header_text)
    except:
        print(f'Problem in question: {q}')

        raise

    question_dict[f"Q{i}"] = {}
    question_dict[f"Q{i}"]['title'] = header['title']
    question_dict[f"Q{i}"]['topic'] = header['topic']
    question_dict[f"Q{i}"]['outcomes'] = header['outcomes']

    topics.append(header['topic'])

df = pd.DataFrame(dict(Counter(topics)),index=['Count']).T.reset_index().rename(columns={'index':'Topic'})
df = df.merge(df_nice)

## Create plot of questions by topic
chart = alt.Chart(df).mark_bar().encode(alt.Y('Nice Topics:O',title=''),alt.X('Count')).properties(title=f'Questions by Topic (N={len(questions)})')
chart.save('../images/topics.png',webdriver='firefox',scale_factor=2)

In [4]:
chart