In [68]:
import nbformat
from pptx import Presentation
from pptx.util import Inches

In [1]:
# Read the Notebook
def read_ipynb_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        notebook = nbformat.read(file, as_version=4)
    return notebook

notebook_path = "/Users/nilsjennissen/PycharmProjects/presentations/notebooks/template.ipynb"
notebook = read_ipynb_file(notebook_path)

In [70]:
notebook

{'cells': [{'cell_type': 'markdown',
   'metadata': {},
   'source': '# Classification Challenge\n'},
  {'cell_type': 'markdown',
   'source': '## Task',
   'metadata': {'collapsed': False}},
  {'cell_type': 'markdown',
   'source': 'Your company, DS Pros, would like to win a contract with a big city council as it would give us great PR. To do so you think it would be a great idea to proactively browse in the open data sets of this city (the one you choose, total freedom here) identify a situation that could be solved or improved using classification algorithms and present it to the technical office of that city council.\n\nYou need to prepare the following:\n\n- A presentation describing the solution you try to solve, how classification will solve it and a summary of the solution proposed\n- A well documented and visually appealing notebook where you try different models, explain the steps followed and chose one particular algorithm and hyperparameters (explaining why)\n- You should a

In [71]:
def extract_text_fields(notebook):
    doc_name = []
    headers = []
    subheaders = []
    texts = []

    for cell in notebook.cells:
        if cell.cell_type == 'markdown':
            lines = cell.source.split('\n')
            for line in lines:
                if line.startswith('#'):
                    doc_name.append(line)
                elif line.startswith('##'):
                    headers.append(line)
                elif line.startswith('###'):
                    subheaders.append(line)
                else:
                    texts.append(line)

    return doc_name, headers, subheaders, texts

doc_name, headers, subheaders, texts = extract_text_fields(notebook)

In [72]:
headers

['## Task',
 '## 1. Setup and tool import',
 '## 2. The Data',
 '## 3. Data Preprocessing',
 '## 4. Models used',
 '## 5. Visualizing the results',
 '## 6. Saving the best model']

In [73]:
texts

['# Classification Challenge',
 '',
 'Your company, DS Pros, would like to win a contract with a big city council as it would give us great PR. To do so you think it would be a great idea to proactively browse in the open data sets of this city (the one you choose, total freedom here) identify a situation that could be solved or improved using classification algorithms and present it to the technical office of that city council.',
 '',
 'You need to prepare the following:',
 '',
 '- A presentation describing the solution you try to solve, how classification will solve it and a summary of the solution proposed',
 '- A well documented and visually appealing notebook where you try different models, explain the steps followed and chose one particular algorithm and hyperparameters (explaining why)',
 '- You should also export that model, once trained, using pickle or similar so it can be reused.',
 '- You should implement a .py script that loads the exported model, accepts a file with sampl

In [74]:
#
def create_ppt_from_template(template_path, headers, texts):
    prs = Presentation(template_path)
    text_index = 0

    for header in headers:
        slide = prs.slides.add_slide(prs.slide_layouts[4])
        title = slide.shapes.title
        title.text = header

        text_box = slide.shapes.add_textbox(Inches(1), Inches(1), Inches(8), Inches(5))
        text_frame = text_box.text_frame

        while text_index < len(texts):
            if texts[text_index] not in headers:
                text_frame.text += texts[text_index] + '\n'
                text_index += 1
            else:
                break

    return prs

template_path = "./templates/template.pptx"
presentation = create_ppt_from_template(template_path, headers, texts)



In [75]:
# Saving the presentation
presentation.save('pres/presentation2.pptx')

# Second try


In [125]:
def read_ipynb_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        notebook = nbformat.read(file, as_version=4)
    return notebook

notebook_path = "/Users/nilsjennissen/Documents/02_PRIVAT/01_STUDIUM/01_BTS/09_Classical_Data_Analysis/FinalProject/classification_challenge.ipynb"
notebook = read_ipynb_file(notebook_path)

def extract_text_fields(notebook):
    sections = []

    for cell in notebook.cells:
        if cell.cell_type == 'markdown':
            lines = cell.source.split('\n')
            section = {'header': '', 'text': ''}
            for line in lines:
                if line.startswith('##'):
                    section['header'] = line
                else:
                    section['text'] += line + '\n'
            sections.append(section)

    return sections

sections = extract_text_fields(notebook)

def create_ppt_from_template(template_path, sections):
    prs = Presentation(template_path)

    for section in sections:
        slide = prs.slides.add_slide(prs.slide_layouts[20])
        title = slide.shapes.title
        title.text = section['header']

        # Access the existing text box shape in the slide layout
        text_box = slide.shapes[2]
        text_frame = text_box.text_frame
        text_frame.text = section['text']

    return prs

template_path = "./templates/template.pptx"
presentation = create_ppt_from_template(template_path, sections)

presentation.save('pres/Classification_Challenge.pptx')

## Third try

In [11]:
import nbformat
from pptx import Presentation
from pptx.util import Inches

def read_ipynb_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        notebook = nbformat.read(file, as_version=4)
    return notebook

def create_presentation(notebook, template_path):
    prs = Presentation(template_path)
    title_slide_layout = prs.slide_layouts[0]
    content_slide_layout = prs.slide_layouts[4]

    # Set the presentation title
    for cell in notebook.cells:
        if cell.cell_type == "markdown" and cell.source.startswith("# "):
            title_slide = prs.slides.add_slide(title_slide_layout)
            title = title_slide.shapes.title
            title.text = cell.source[2:].strip()
            break

    # Create slides for each second-level header
    for cell in notebook.cells:
        if cell.cell_type == "markdown" and cell.source.startswith("## "):
            slide = prs.slides.add_slide(content_slide_layout)
            title = slide.shapes.title
            lines = cell.source.split('\n')
            title.text = lines[0][3:].strip()

            # Add content to the slide
            content = ""
            for line in lines[1:]:
                content += line.strip() + "\n"

            for subcell in notebook.cells[notebook.cells.index(cell) + 1:]:
                if subcell.cell_type == "markdown" and subcell.source.startswith("### "):
                    content += subcell.source[4:].strip() + "\n\n"
                elif subcell.cell_type == "markdown" and subcell.source.startswith("## "):
                    break
                elif subcell.cell_type == "markdown":
                    lines = subcell.source.split('\n')
                    for line in lines:
                        if not line.startswith("### "):
                            content += line.strip() + "\n"

            # Find the existing text shape on the slide and insert the content
            for shape in slide.shapes:
                if shape.has_text_frame and not shape.text.startswith("Click to edit"):
                    text_frame = shape.text_frame
                    text_frame.text = content
                    break

    return prs

notebook_path = "/Users/nilsjennissen/PycharmProjects/presentations/notebooks/template.ipynb"
template_path = "../templates/template.pptx"
output_path = "../pres/output.pptx"

notebook = read_ipynb_file(notebook_path)
presentation = create_presentation(notebook, template_path)
presentation.save(output_path)