In [102]:
import json
import os
import re
from datetime import date
from bs4 import BeautifulSoup
from datetime import datetime
from pathlib import Path

In [103]:
def create_beamer_content(main_title, slides):
    today = date.today()
    beamer = '''
\\documentclass{beamer}
\\usepackage[T1]{fontenc}
\\usepackage[polish]{babel}
\\usepackage[utf8]{inputenc}
\\usepackage{array}
\\usepackage{url}
\\usetheme{Boadilla}
\\setbeamertemplate{frametitle continuation}{}
\\titlegraphic{\\includegraphics[width=4cm]{../logo_kul.jpeg}}        
\\title{%s}
\\author{KPI KUL}
\\institute{\\url{ai.kul.pl}}
\\date{%s}

\\begin{document}
\\frame{\\titlepage}

\\AtBeginSection[]
{
    \\begin{frame}[allowframebreaks]
        \\frametitle{Spis treści}
        \\tableofcontents[currentsection]
    \\end{frame}
}    

%s

\\end{document}
    '''
    
    return beamer % (main_title, today, slides)


def remove_html_tags(text):
    """Remove html tags from a string"""
    import re
    clean = re.compile('<.*?>')
    return re.sub(clean, '', text)


def remove_empty_slides(text):
    """Remove empty slides from a string"""
    empty_slide = re.compile('\\\\begin{frame}\n\\\\frametitle{.*}\n\s*\\\\end{frame}')
    return re.sub(empty_slide, '', text)


def clean_content(content):
    content = content.replace('#','')
    content = content.replace('*','')
    content = remove_html_tags(content)
#     content = content.replace('_','\_')
    content = content.strip()
    return content


def convert_md_table_to_latex(table):
    pattern1 = ":-+:"
    pattern2 = "-{3}"
    rows = table.split('\n')
    row_0_cells = rows[0].split('|')
    columns_number = len(row_0_cells) - 2
    rows_number = len(rows)
    cell_width = 9 // columns_number
    
    fixed_columns_length = ' p{%dcm} |' % cell_width    
    begin = '''
    \\begin{center}
    \\begin{table}
    \\small
    \\renewcommand{\\arraystretch}{0.9}
    \\begin{tabular}{ |%s }\n
    ''' % str(columns_number * fixed_columns_length) 

    table =''
    latex_rows =''
    for row_index, row in enumerate(rows):
        latex_row = '\\hline\n'
        for column_index, cell in enumerate(row.split('|')[1:-1]):
    #         cell = clean_content(cell)
            if re.findall(pattern1, cell) or re.findall(pattern2, cell):
                break
            if (column_index < columns_number-1):                
                    latex_row += cell +  '&'
#             elif row_index < rows_number-1:
#                 latex_row += cell +  '\\\\'
            else:
                latex_row += cell +  '\\\\'
        latex_rows += latex_row + '\n'    
    end = '''
    \\hline
    \\end{tabular}
    \\end{table}
    \\renewcommand{\\arraystretch}{1}
    \\end{center}
    '''   

    table = begin + latex_rows + end
    
    return table


def find_level(cell):
    tuple_to_return = tuple()
    for content in cell:
        if content.strip()[:5].count('#') == 1:
            tuple_to_return = ('main_title', clean_content(content))
            break
        elif content.strip()[:5].count('#') == 2:
            tuple_to_return = ('section_title', clean_content(content))
            break
        elif content.strip()[:7].count('#') == 3:
            tuple_to_return = ('slide_title', clean_content(content))
            break
        else:
            tuple_to_return = ('c', '')
    return tuple_to_return
    
    
def classify_content(content):
    soup = BeautifulSoup(content)
    tags = soup.find_all(['span', 'div', 'img'])
    list_to_return = []
    if tags:
        for tag in tags: 
            if tag.name in ['span', 'div'] and tag.has_attr('t'):
                list_to_return.append((tag.attrs["t"], clean_content(tag.get_text())))
            elif tag.name == 'img':
                list_to_return.append(('img', tag.attrs["src"]))
    return list_to_return


def get_slides_from_markdown(notebook_content):
    inside_slide = False
    level1 = False
    level2=False
    
    beamer_slides = ''
    prev_slide_title =''
    for cell in [k['source'] for k in notebook_content['cells'] if k['cell_type']=="markdown"]:
        # extraction logic goes here 
        header = find_level(cell)
        cell_content_classified = classify_content(" ".join(cell))
        
        classified_content = [header] + cell_content_classified     

        for content_part in classified_content:
            if content_part:
#                 print('\t', content_part)
                tag = content_part[0]
                text = content_part[1]
                if tag == 'main_title':
                    global main_title
                    main_title = text
                elif tag == 'section_title' and inside_slide == False:                
                    beamer_slides += '\n\\section{%s}\n' % text
                    inside_slide = False
                elif tag == 'section_title' and inside_slide == True and level1 == False and level2 == False:                
                    beamer_slides += '\n\\end{frame}\n'
                    beamer_slides += '\n\\section{%s}\n' % text 
                    inside_slide = False
                elif tag == 'section_title' and inside_slide == True and level1 == True and level2 == False:                
                    beamer_slides += '\\end{itemize}\n'
                    beamer_slides += '\n\\end{frame}\n'
                    beamer_slides += '\n\\section{%s}\n' % text 
                    inside_slide = False
                    level1 = False
                elif tag == 'section_title' and inside_slide == True and level1 == True and level2 == True:                
                    beamer_slides += '\t\\end{itemize}\n'
                    beamer_slides += '\\end{itemize}\n'
                    beamer_slides += '\n\\end{frame}\n'
                    beamer_slides += '\n\\section{%s}\n' % text 
                    inside_slide = False
                    level1 = False
                    level2 = False                    
                elif tag == 'slide_title' and inside_slide == False:  
                    beamer_slides += '\n\\begin{frame}\n\\frametitle{%s}\n ' % text
                    prev_slide_title = text
                    inside_slide = True 
                elif tag == 'slide_title' and inside_slide == True and level1 == False:
                    beamer_slides += '\n\\end{frame}\n'
                    beamer_slides += '\n\\begin{frame}\n\\frametitle{%s}\n' % text
                    inside_slide = True
                    prev_slide_title = text
                elif tag == 'slide_title' and inside_slide == True and level1 == True and level2 == False:
                    beamer_slides += '\\end{itemize}\n'
                    beamer_slides += '\\end{frame}\n\n'
                    beamer_slides += '\\begin{frame}\n\\frametitle{%s}\n' % text
                    level1 = False
                    prev_slide_title = text
                    inside_slide = True 
                elif tag == 'slide_title' and inside_slide == True and level1 == True and level2 == True:
                    beamer_slides += '\t\\end{itemize}\n'
                    beamer_slides += '\\end{itemize}\n'
                    beamer_slides += '\\end{frame}\n\n'
                    beamer_slides += '\\begin{frame}\n\\frametitle{%s}\n' % text
                    level1 = False
                    level2 = False
                    prev_slide_title = text
                    inside_slide = True                     
                elif tag == 'c' and inside_slide == False:  
                    beamer_slides += '\n\\begin{frame}\n\\frametitle{%s}\n' % prev_slide_title
                    inside_slide = True 
                elif tag == 'c' and inside_slide == True and level1 == False:
                    beamer_slides += '\n\\end{frame}\n'
                    beamer_slides += '\n\\begin{frame}\n\\frametitle{%s}\n' % prev_slide_title
                    inside_slide = True
                elif tag == 'c' and inside_slide == True and level1 == True and level2 == False:
                    beamer_slides += '\\end{itemize}\n'
                    beamer_slides += '\\end{frame}\n\n'
                    beamer_slides += '\\begin{frame}\n\\frametitle{%s}\n' % prev_slide_title                    
                    level1 = False
                    inside_slide = True                     
                elif tag == 'c' and inside_slide == True and level1 == True and level2 == True:
                    beamer_slides += '\t\\end{itemize}\n'
                    beamer_slides += '\\end{itemize}\n'
                    beamer_slides += '\\end{frame}\n\n'
                    beamer_slides += '\\begin{frame}\n\\frametitle{%s}\n' % prev_slide_title                    
                    level1 = False
                    level2 = False
                    inside_slide = True                      
                elif tag == 'l1' and level1 == False:
                    beamer_slides += '\\begin{itemize}\n\\item %s\n' % text
                    inside_slide = True
                    level1 = True
                elif tag == 'l1' and level1 == True and level2 == False:
                    beamer_slides += '\\item %s \n' % text
                elif tag == 'l1' and level1 == True and level2 == True:
                    beamer_slides += '\t\\end{itemize}\n'
                    beamer_slides += '\\item %s \n' % text
                    level2 = False
                elif tag == 'l2' and level2 == False:
                    beamer_slides += '\t\\begin{itemize}\n\t\\item %s\n' % text
                    level2 = True
                elif tag == 'l2' and level2 == True:
                    beamer_slides += '\t\\item %s\n' % text 
                elif tag == 'q':
                    beamer_slides += '\t\\begin{quote}\n %s \n\t\\end{quote}\n\n' % text
                elif tag == 'v':                    
                    beamer_slides += '\\begin{verbatim}\n %s \n\\end{verbatim}\n\n' % text.replace("```","")   
                elif tag == 'img' and 'https://' not in text:
                    text = '../'+text 
                    beamer_slides += '''
                    \\begin{figure}[h]
                        \\centering
                        \\includegraphics[width=0.5\\textwidth]{%s}
                    \\end{figure}                    
                    ''' % text 
                elif tag == 't':
                    beamer_slides += convert_md_table_to_latex(text)                    
    if level1 == True and level2 == False:
        beamer_slides += '\\end{itemize}\n\\end{frame}\n'
    elif level1 == True and level2 == True:
        beamer_slides += '\t\\end{itemize}\n\\end{itemize}\n\\end{frame}\n'
    elif inside_slide == True:
        beamer_slides += '\\end{frame}\n'            
    
    beamer_slides = remove_empty_slides(beamer_slides)
    # add allowframebreaks to biblio
    beamer_slides = beamer_slides.replace('\\begin{frame}\n\\frametitle{Bibliografia}', '\\begin{frame}[allowframebreaks]\n\\frametitle{Bibliografia}')
    # add fragile to all slides (except biblio i toc)
    beamer_slides = beamer_slides.replace('\\begin{frame}\n\\frametitle', '\\begin{frame}[fragile]\n\\frametitle')
    # remove empty lines
    beamer_slides = re.sub(r'(\n\s*)+\n+', '\n\n', beamer_slides)
    
    return beamer_slides    

In [104]:
#notebook_path = 'python_programowanie/python_podstawy.ipynb'
notebook_path = 'wprowadzenie_do_ai/02A_wstep_do_uczenia_maszynowego.ipynb'

In [105]:
folders = ["programowanie_w_jezyku_python_1", 
           "programowanie_w_jezyku_python_2", 
           "teoretyczne_podstawy_reprezentacji_wiedzy", 
           "ontologia_w_praktyce", 
           "r_programowanie", 
           "wprowadzenie_do_ai",
           "podstawy_uczenia_maszynowego",
           "glebokie_uczenie_maszynowe"
          ]

def get_notebooks_to_beamerization(list_with_folder_names):
    notebook_paths = []
    for folder_name in folders:
        for root, dirs, files in os.walk(folder_name):
            for file in files:
                if file.endswith(".ipynb"):
                    notebook_path = os.path.join(root, file)
                    if ".ipynb_checkpoints" not in notebook_path:
                        notebook_paths.append(os.path.join(root, file))
    return notebook_paths

notebook_paths = get_notebooks_to_beamerization(folders)

In [106]:
for notebook_path in notebook_paths:
    main_title = 'moja prezentacja'

    with open(notebook_path, "r", encoding="utf8") as file:
        notebook_content = json.load(file)

    slides_from_markdown = get_slides_from_markdown(notebook_content)
    beamer_content = create_beamer_content(main_title, slides_from_markdown)

    # folders_tree = notebook_path.replace(notebook_path.split('/')[-1],'')

    # datetime object containing current date and time
    now = datetime.now()
    dt_string = now.strftime("%d-%m-%Y")

    folder = notebook_path.split('/')[0]

    output_file = Path(f'beamer/{folder}/{main_title}.tex')
    output_file.parent.mkdir(exist_ok=True, parents=True)

    with open(output_file, mode='w', encoding="utf8") as f:
      f.write(beamer_content)