In [63]:
import json
from pathlib import Path

data = Path('./5_Logical_Indexing.ipynb').read_text()
notebook = json.loads(data)
notebook.keys()

dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])

In [97]:
cells[0]

{'cell_type': 'code',
 'execution_count': None,
 'metadata': {'cell_id': '00000-d8c9f90f-d649-47b2-a772-a6639c594321',
  'deepnote_cell_type': 'code',
  'id': 'metric-level'},
 'outputs': [],
 'source': ['import numpy as np']}

In [93]:
from dataclasses import dataclass, field
from typing import Optional, TypedDict

class JupyterCell(TypedDict):
    cell_type: str
    source: list[str]


@dataclass
class ExerciseSection:
    start_idx: int
    end_idx: Optional[int] = None
    exclude: list[int] = field(default_factory=list)

    @classmethod
    def from_cells(cls, cells: list[JupyterCell]) -> list['ExerciseSection']:
        sections = []
        current: Optional[ExerciseSection] = None
        for idx, cell in enumerate(cells):
            match cell, current:
                case {'cell_type': 'markdown', 'source': source}, None:
                    text = ''.join(source)
                    if 'exercise' in text.strip('*#').lower():
                        current = ExerciseSection(start_idx=idx)
                case {'cell_type': 'markdown', 'source': source}, ExerciseSection():
                    text = ''.join(source)
                    if text.startswith('#'):
                        current.end_idx = idx
                        sections.append(current)
                        current = None
                    elif any([text.strip('*#').lower().startswith(exception) for exception in ['example', 'data']]):
                        current.exclude.append(idx)
        return sections

    def preview(self, cells: list[JupyterCell]) -> str:
        lines = []
        for cell in cells[self.start_idx:self.end_idx]:
            match cell:
                case {'cell_type': 'markdown', 'source': source}:
                    lines.extend('M | ' + line.strip() for line in source)
                case {'cell_type': 'code', 'source': source}:
                    if source:
                        lines.extend('C | ' + line for line in source)
                    else:
                        lines.append('C | ')
        return '\n'.join(lines)
                    


sections = ExerciseSection.from_cells(notebook['cells'])
print(sections[1].preview(notebook['cells']))

M | **Exercises**:  Using the data below, extract only the values that corresspond to each question
C | 
M | 1. The values that are less than 0
C | 
M | 2. The values that are greater than 3
C | 
M | 4. The values not equal to 7
C | 
M | 5. The values equal to 20
C | 
M | The values that are not missing
C | 


In [95]:
def studentize(cells: list[JupyterCell], section: ExerciseSection) -> list[JupyterCell]:
    new_cells = cells.copy()
    for cell in new_cells[section.start_idx:section.end_idx]:
        if cell['cell_type'] == 'code':
            cell['source'] = ''
    return new_cells


cells = notebook['cells']
sections = ExerciseSection.from_cells(cells)
for section in sections:
    cells = studentize(cells=cells, section=section)
new_cells = cells


[{'cell_type': 'code',
  'execution_count': None,
  'metadata': {'cell_id': '00000-d8c9f90f-d649-47b2-a772-a6639c594321',
   'deepnote_cell_type': 'code',
   'id': 'metric-level'},
  'outputs': [],
  'source': ['import numpy as np']},
 {'cell_type': 'markdown',
  'metadata': {'cell_id': '00001-08bea5dd-b612-4234-a812-1f203f7df899',
   'deepnote_cell_type': 'markdown',
   'id': 'sorted-signal'},
  'source': ['# Filtering Data With Logical Indexing\n',
   '\n',
   'Sometimes you want to remove certain values from your dataset.  In Numpy, this can be done with **Logical Indexing**, and in normal Python this is done with an **If Statement**']},
 {'cell_type': 'markdown',
  'metadata': {'cell_id': '00002-9e11b127-b196-47c8-9d0d-2b50736be4b3',
   'deepnote_cell_type': 'markdown',
   'id': 'adequate-commercial'},
  'source': ['### Step 1: Create a Logical Numpy Array\n',
   '\n',
   'We can convert all of the values in an array at once with a single logical expression.  This is broadcasting, 

In [13]:
new_notebook = notebook.copy()
new_notebook['cells'] = new_cells
Path('newfile.ipynb').write_text(json.dumps(new_notebook, indent=4))

26922