In [1]:
import json
import os

from bs4 import BeautifulSoup
from bs4.element import NavigableString

from libedx import parse_xml_file
from libedx import parse_xml_file_refusive
from libedx import print_course
from libedx import extract_course_tree


containerdir = 'chefdata/Sample2'

course_list = json.load(open(os.path.join(containerdir,'course_list.json')))
course_list


{'title': 'HP LIFE Sample Courses',
 'kind': 'HP LIFE couses listing',
 'courses': [{'name': 'Success Mindset (English)',
   'path': 'Success_Mindset_English',
   'lang': 'en'},
  {'name': '3D Printing (हिन्दी)', 'path': '3D_printing_Hindi', 'lang': 'hi'},
  {'name': 'Cash flow (中文)', 'path': 'Cash_flow_Chinese', 'lang': 'zh'},
  {'name': '3D Printing (العربية)',
   'path': 'Design_thinking_Arabic',
   'lang': 'ar'}]}

In [2]:
course = course_list['courses'][0]
print('info from course_list json=', course)
basedir = os.path.join(containerdir, course['path'])
coursedir = os.path.join(basedir, 'course')
data = extract_course_tree(coursedir)
print_course(data)

info from course_list json= {'name': 'Success Mindset (English)', 'path': 'Success_Mindset_English', 'lang': 'en'}
 - Success mindset kind=course 	  url_name=course attrs={'kind': 'course', 'display_name': 'Success mindset', 'enrollment_start': '"2018-10-11T00:00:00+00:00"', 'graceperiod': '', 'language': 'en', 'minimum_grade_credit': '0.8', 'start': '"2018-10-11T00:00:00+00:00"', 'url_name': 'course', 'org': 'hp-life-e-learning', 'course': '6638hpl-en30'}
    - Start Course kind=chapter 	 
       - Success mindset kind=sequential 	 
          - Unit kind=vertical 	 
             -  kind=html 	  url_name=5e7e7ad5290f46cbbba0642d913f13c9
    - Story kind=chapter 	 
       - Story Slides kind=sequential 	 
          - Unit kind=vertical 	 
             -  kind=problem 	 activity_ref=Success Mindset - Story
    - Business Concept kind=chapter 	 
       - Activity kind=sequential 	 
          - Unit kind=vertical 	 
             -  kind=problem 	 activity_ref=Business Concept
    - Technol

In [3]:
def flatten_subtree(chapter):
    """
    Returns a flat list of the content nodes
    """
    content_items = []
    for sequential in chapter['children']:
        for vertical in sequential['children']:
            for content_item in vertical['children']:
                content_item['title'] = sequential['display_name']
                content_items.append(content_item)
    return content_items


In [8]:
from le_utils.constants import content_kinds, file_types, licenses
from ricecooker.classes.licenses import get_license

from sushichef import transform_resource_folder
from sushichef import transform_html
from sushichef import transform_articulate_storyline_folder

HPLIFE_LICENSE = get_license(licenses.CC_BY, copyright_holder='HP LIFE').as_dict()






def build_subtree_from_course(course):
    print('Building a tree from course', course)
    course_dict = dict(
        title=course['name'],
        language=course['lang'],
        children = [],
    )
    basedir = os.path.join(containerdir, course['path'])
    contentdir = os.path.join(basedir, 'content')
    coursedir = os.path.join(basedir, 'course')
    data = extract_course_tree(coursedir)

    # TODO: title = data['display_name'] + (first_native_name)

    course_dict['source_id'] = data['course']


    for i, chapter in enumerate(data['children']):

        if 'display_name' not in chapter:
            print('skipping title-less wiki')
            continue
        if i == 4:
            print('skipping course feedback', chapter['display_name'])
            continue

        chapter_dict = dict(
            title=chapter['display_name'],
            source_id=chapter['display_name'],
            children = [],
        )
        course_dict['children'].append(chapter_dict)

        content_items = flatten_subtree(chapter)
        for item in content_items:
            html5_dict = dict(
                kind=content_kinds.HTML5,
                title=chapter_dict['title'],
                source_id=chapter_dict['title'],
                license=HPLIFE_LICENSE,
                language=course['lang'],
                files=[],
            )
            chapter_dict['children'].append(html5_dict)


            kind = item['kind']

            # Resouce folder
            if kind == 'html' and 'activity' in item:
                activity_ref = item['activity']['activity_ref']
                zip_info = transform_resource_folder(contentdir, activity_ref, item['content'])
                if zip_info:
                    zippath = zip_info['zippath']
                else:
                    continue

            # Generic HTML
            elif kind == 'html':
                zip_info = transform_html(item['content'])
                zippath = zip_info['zippath']

            # Articulate Storyline
            elif kind == 'problem' and 'activity' in item:
                activity_ref = item['activity']['activity_ref']
                zip_info = transform_articulate_storyline_folder(contentdir, activity_ref)
                if zip_info:
                    html5_dict['thumbnail'] = zip_info['thumbnail']
                    zippath = zip_info['zippath']
                else:
                    print('transform_articulate_storyline_folder returned None')
                    continue

            file_dict = dict(
                file_type=file_types.HTML5,
                path=zippath,
                language=course['lang'],
            )
            html5_dict['files'].append(file_dict)
        
    return course_dict



channel_dict = dict(    
    children = []
)

for course in course_list['courses']:
    course_dict = build_subtree_from_course(course)
    channel_dict['children'].append(course_dict)
    


Building a tree from course {'name': 'Success Mindset (English)', 'path': 'Success_Mindset_English', 'lang': 'en'}
skipping course feedback Course Feedback
skipping title-less wiki
Building a tree from course {'name': '3D Printing (हिन्दी)', 'path': '3D_printing_Hindi', 'lang': 'hi'}
Found resources_folder {'kind': 'resources_folder', 'bucket_url': 'https://s3.amazonaws.com/hp-life-content', 'bucket_path': 'Antonio+TechClass+Academy/3D+Printing+(Hindi)', 'activity_ref': 'Downloadable Resources', 'entrypoint': None}
https://s3.amazonaws.com/hp-life-content/Antonio+TechClass+Academy/3D+Printing+(Hindi)/Downloadable+Resources/3D+printing+application+areas+HI.pdf
https://s3.amazonaws.com/hp-life-content/Antonio+TechClass+Academy/3D+Printing+(Hindi)/Downloadable+Resources/3D+printing++marketplaces+-+services+-+resources+HI.pdf
https://s3.amazonaws.com/hp-life-content/Antonio+TechClass+Academy/3D+Printing+(Hindi)/Downloadable+Resources/3D+printing+processes+and+technologies+HI.pdf
https://s3

In [9]:
channel_dict

{'children': [{'title': 'Success Mindset (English)',
   'language': 'en',
   'children': [{'title': 'Start Course',
     'source_id': 'Start Course',
     'children': [{'kind': 'html5',
       'title': 'Start Course',
       'source_id': 'Start Course',
       'license': {'license_id': 'CC BY',
        'copyright_holder': 'HP LIFE',
        'description': None},
       'language': 'en',
       'files': [{'file_type': 'html5',
         'path': '/var/folders/wc/2r44j8gs4gn56t1xtw5f6wlm0000gn/T/tmpm3q3vteu.zip',
         'language': 'en'}]}]},
    {'title': 'Story',
     'source_id': 'Story',
     'children': [{'kind': 'html5',
       'title': 'Story',
       'source_id': 'Story',
       'license': {'license_id': 'CC BY',
        'copyright_holder': 'HP LIFE',
        'description': None},
       'language': 'en',
       'files': [{'file_type': 'html5',
         'path': '/var/folders/wc/2r44j8gs4gn56t1xtw5f6wlm0000gn/T/tmpf5qjocmr.zip',
         'language': 'en'}],
       'thumbnail': 'ch

In [11]:
doc = BeautifulSoup('<div><p>hello</p></div>', 'html5lib')

In [17]:
from bs4 import Tag
meta = Tag(name='meta', attrs={'charset':'utf-8'})
doc.head.append(meta)

In [18]:
doc

<html><head><meta charset="utf-8"></meta></head><body><div><p>hello</p></div></body></html>