In [1]:
fileLocation = 'large-language-models.dbc'

In [2]:
# Cleanup from prior run
import shutil
try: shutil.rmtree('tmp_dbc')
except OSError: pass

import zipfile
import os
try: os.mkdir('tmp_dbc')
except OSError: pass
with zipfile.ZipFile(fileLocation, 'r') as z:
    z.extractall('tmp_dbc')

print ('*** Contents from the .dbc file (usually one file or a directory) ***\n')
print (os.listdir('tmp_dbc'))

*** Contents from the .dbc file (usually one file or a directory) ***

['manifest.mf', 'Large-Language-Models-v2.0.0']


In [4]:
import fnmatch

filesToParse = []
for root, dirNames, fileNames in os.walk('tmp_dbc'):
    for fileName in fnmatch.filter(fileNames, '*.python'):
        filesToParse.append((root, fileName))

def getIpynbName(path, fileName):
    path = os.path.normpath(path)
    pathSplit = path.split(os.sep)[2:]
    baseDir = os.path.join(*pathSplit) if len(pathSplit) > 0 else '.'
    newFileName = os.path.splitext(fileName)[0] + '_export.ipynb'
    return os.path.join(baseDir, newFileName)

print ("*** Files to be created (relative to your current working directory) ***")
print ("(Warning: files will be overwritten!)\n")
for path, fileName in filesToParse:
    print (getIpynbName(path, fileName))

*** Files to be created (relative to your current working directory) ***

./Version Info_export.ipynb
LLM 05 - Society and LLMs/LLM 05L - LLMs and Society Lab_export.ipynb
LLM 05 - Society and LLMs/LLM 05 - LLMs and Society_export.ipynb
LLM 03 - Multi-stage Reasoning/LLM 03 - Building LLM Chains_export.ipynb
LLM 03 - Multi-stage Reasoning/LLM 03L - Building LLM Chains Lab_export.ipynb
Includes/Reset_export.ipynb
Includes/Classroom-Setup_export.ipynb
Includes/Test-Framework_export.ipynb
Includes/Workspace-Setup_export.ipynb
Includes/_common_export.ipynb
Includes/Print-Dataset-Copyrights_export.ipynb
LLM 04 - Fine-tuning and Evaluating LLMs/LLM 04b - Evaluating LLMs_export.ipynb
LLM 04 - Fine-tuning and Evaluating LLMs/LLM 04L - Fine-tuning LLMs Lab_export.ipynb
LLM 04 - Fine-tuning and Evaluating LLMs/LLM 04a - Fine-tuning LLMs_export.ipynb
LLM 01 - Applications with LLMs/LLM 01 - LLMs with Hugging Face_export.ipynb
LLM 01 - Applications with LLMs/LLM 01L - LLMs with Hugging Face Lab_ex

In [7]:
import codecs
import nbformat
from nbformat.v3.nbpy import PyReader
import json
import re

_header = u'# -*- coding: utf-8 -*-\n# <nbformat>3.0</nbformat>\n'
_markdownCell = u'\n\n# <markdowncell>\n\n'
_codeCell = u'\n\n# <codecell>\n\n'
_firstCell = u"""# Increase compatibility with Databricks
from IPython.display import display as idisplay, HTML
displayHTML = lambda x: idisplay(HTML(x))
def display(*args, **kargs): pass"""

def convertToIpynb(fileToParse):
    
    with codecs.open(os.path.join(*fileToParse), encoding="utf-8") as fp:
        jsonData = json.load(fp)
        commands = jsonData['commands']
        commandInfo = [(x['position'], x['command']) for x in commands]
        commandList = sorted(commandInfo)

    with codecs.open('tmp_ipynb.py', 'w', encoding="utf-8") as fp:
        fp.write(_header)
        fp.write(_codeCell)
        fp.write(_firstCell)

        for position, command in commandList:
            if re.match(r'\s*%md', command):
                command = re.sub(r'^\s*%md', '', command, flags=re.MULTILINE)
                command = re.sub(r'(%\(|\)%)', '$', command)
                command = re.sub(r'(%\[|\]%)', '$$', command)

                fp.write(_markdownCell)
                asLines = command.split('\n')
                command = '# ' + '\n# '.join(asLines)
            else:
                command = re.sub(r'^\s*baseDir\s*=.*$', 'baseDir = \'data\'', 
                                 command, flags=re.MULTILINE)
                fp.write(_codeCell)

            fp.write(command)

    outputName = getIpynbName(*fileToParse)

    with codecs.open('tmp_ipynb.py', 'r', encoding="utf-8") as intermediate:
        nb = PyReader().read(intermediate)

    os.remove('tmp_ipynb.py')
    baseDirectory = os.path.split(outputName)[0]

    if not os.path.isdir(baseDirectory):
        os.makedirs(baseDirectory)

    with codecs.open(outputName, 'w', encoding="utf-8") as output:
        nbformat.write(nbformat.convert(nb, 4.0), output)  
        print ('Created: {0}'.format(outputName))

for fileToParse in filesToParse:
    convertToIpynb(fileToParse)

Created: ./Version Info_export.ipynb
Created: LLM 05 - Society and LLMs/LLM 05L - LLMs and Society Lab_export.ipynb
Created: LLM 05 - Society and LLMs/LLM 05 - LLMs and Society_export.ipynb
Created: LLM 03 - Multi-stage Reasoning/LLM 03 - Building LLM Chains_export.ipynb
Created: LLM 03 - Multi-stage Reasoning/LLM 03L - Building LLM Chains Lab_export.ipynb
Created: Includes/Reset_export.ipynb
Created: Includes/Classroom-Setup_export.ipynb
Created: Includes/Test-Framework_export.ipynb
Created: Includes/Workspace-Setup_export.ipynb
Created: Includes/_common_export.ipynb
Created: Includes/Print-Dataset-Copyrights_export.ipynb
Created: LLM 04 - Fine-tuning and Evaluating LLMs/LLM 04b - Evaluating LLMs_export.ipynb
Created: LLM 04 - Fine-tuning and Evaluating LLMs/LLM 04L - Fine-tuning LLMs Lab_export.ipynb
Created: LLM 04 - Fine-tuning and Evaluating LLMs/LLM 04a - Fine-tuning LLMs_export.ipynb
Created: LLM 01 - Applications with LLMs/LLM 01 - LLMs with Hugging Face_export.ipynb
Created: L