![Callysto.ca Banner](https://github.com/callysto/curriculum-notebooks/blob/master/callysto-notebook-banner-top.jpg?raw=true)

# Notebook Readability Checker

In [1]:
import os
import json
import pandas as pd
#!pip install --user textstat # https://github.com/shivam5992/textstat
import textstat

In [3]:
# https://stackoverflow.com/questions/761824/python-how-to-convert-markdown-formatted-text-to-text

#!pip install --user markdown
from markdown import Markdown
from io import StringIO

def unmark_element(element, stream=None):
    if stream is None:
        stream = StringIO()
    if element.text:
        stream.write(element.text)
    for sub in element:
        unmark_element(sub, stream)
    if element.tail:
        stream.write(element.tail)
    return stream.getvalue()


# patching Markdown
Markdown.output_formats["plain"] = unmark_element
__md = Markdown(output_format="plain")
__md.stripTopLevelTags = False


def unmark(text):
    return __md.convert(text)

In [4]:
df = pd.DataFrame(columns=['Notebook', 'Cell Number', 'Readability', 'Text'])

for root, dirs, files in os.walk("."):
    for filename in files:
        if filename.endswith('.ipynb'):
            notebook_name = filename[:-6]
            file = os.path.join(root, filename)
            notebook = json.load(open(file))
            cell_number = 0
            for cell in notebook['cells']:
                cell_number += 1
                cell_type = cell['cell_type']
                if cell_type == 'markdown':
                    #text = cell['source'][0].replace('[', '').replace(']', '').replace('#', '')
                    text = unmark(cell['source'][0])
                    readability = textstat.text_standard(text, float_output=True) # .flesch_kincaid_grade(text)
                    df = df.append({
                        'Notebook':notebook_name,
                        'Cell Number':cell_number,
                        'Readability':readability,
                        'Text':text},
                        ignore_index=True)
df

Unnamed: 0,Notebook,Cell Number,Readability,Text
0,Outreach-Education-Council-2019,1,18.0,Curricular Outcomes Using Computational Thinki...
1,Outreach-Education-Council-2019,2,22.0,The ability to process information in an analy...
2,Outreach-Education-Council-2019,3,1.0,Who is here?
3,Outreach-Education-Council-2019,4,12.0,How many secondary teachers?
4,Outreach-Education-Council-2019,5,12.0,How many elementary teachers?
...,...,...,...,...
253,Outreach-Education-Council-2019-checkpoint,39,1.0,Call to Action
254,Outreach-Education-Council-2019-checkpoint,40,1.0,Get in Touch
255,notebooks-parse-test-checkpoint,1,1.0,Notebook Parse Test
256,notebooks-parse-test-checkpoint,3,1.0,First and Last Cells


In [5]:
print(df[df['Notebook']=='Outreach-Education-Council-2019']['Readability'].median())
df[df['Notebook']=='Outreach-Education-Council-2019']

3.0


Unnamed: 0,Notebook,Cell Number,Readability,Text
0,Outreach-Education-Council-2019,1,18.0,Curricular Outcomes Using Computational Thinki...
1,Outreach-Education-Council-2019,2,22.0,The ability to process information in an analy...
2,Outreach-Education-Council-2019,3,1.0,Who is here?
3,Outreach-Education-Council-2019,4,12.0,How many secondary teachers?
4,Outreach-Education-Council-2019,5,12.0,How many elementary teachers?
5,Outreach-Education-Council-2019,6,15.0,How many administrators?
6,Outreach-Education-Council-2019,7,0.0,Who else?
7,Outreach-Education-Council-2019,8,3.0,What brought you here?
8,Outreach-Education-Council-2019,9,0.0,Outline
9,Outreach-Education-Council-2019,10,0.0,


In [None]:
for root, dirs, files in os.walk("."):
    for filename in files:
        if filename.endswith('.ipynb'):
            file = os.path.join(root, filename)
            notebook = json.load(open(file))
            print(file)
            #print(len(notebook['cells']))
            #print(notebook['cells'][0]['source'])
            print(notebook['cells'][-1]['source'])
            print('')

[![Callysto.ca License](https://github.com/callysto/curriculum-notebooks/blob/master/callysto-notebook-banner-bottom.jpg?raw=true)](https://github.com/callysto/curriculum-notebooks/blob/master/LICENSE.md)