# Feature
Data2Data: Convert data to a different type or structure.



## Motivation
Python and various libs like Pandas excel at working with data but remembering/looking up all the paramaters and writing repetitive snippets is not efficient.

# Imports & Environment

In [1]:
import os
import sys
import json
from google.colab import _message

In [None]:
# %%snippet -s -r -e first_cell
if 'c' not in globals():
  !pip install nbf
  from nbf import epc
  c = epc.do_setup({'resources':{'nbf':{}}, 'keyring':'bit.ly/3QPP1bs', 'use_google_drive':True})
  %run {c.get('nb_dir')}notebooks_as_features.ipynb
  logger = epc.get_logger(c.get('nb_name'))
else:
  !cd {c['project_path']} && git pull

In [None]:
# c

# Structure

In [None]:
no_upper = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v|via|vs|so'
skip_headers = ['Examples','EOF','Usage','Tests']

# Functions

In [None]:
def text2title(text:str)->str:
  '''comprehensive title capitalization, will preserve ACK-ronyms if in caps and
   not in the list of always-lowercase words'''
  text = text.replace('_',' ').replace('-',' ')
  title = ' '.join([x[0].upper()+x[1:] if x.lower() not in no_upper.split('|') else x.lower() for x in text.split()])
  return title

def nb_name2module_name(nb_name):
  mod_remove = no_upper+'|notebook|with|is'
  nb_name = nb_name.replace('_',' ').replace('-',' ').lower()
  return ' '.join([word for word in nb_name.split(' ') if word not in mod_remove.split('|')]).replace(' ','')

def get_json_for_python_export():
  activenb = _message.blocking_request('get_ipynb', request='', timeout_sec=5)['ipynb']
  activenb['cells'] = activenb['cells']
  out_cells = []
  collecting = True
  for cell in activenb['cells']:
    if cell['cell_type']=='markdown' and cell['source'][0][0]=='#' and cell['source'][0][1]!='#' and cell['source'][0].replace('#','').strip() not in skip_headers:
      print('Collecting cells under header:',cell['source'][0])
      collecting = True
    if cell['cell_type']=='markdown' and cell['source'][0][0]=='#' and cell['source'][0][1]!='#' and cell['source'][0].replace('#','').strip() in skip_headers:
      print('Skipping cells under header:',cell['source'][0])
      collecting = False
    if collecting == True:
      out_cells.append(cell)
  activenb['cells']=out_cells
  return activenb

def nb2ipynb(nb_json=None):
  '''active notebook to `c[nb_dir]{c[running_nb]}.ipynb`'''
  out_name = c['nb_dir']+c['running_nb']+'.ipynb'
  print(out_name)
  if nb_json==None:
    nb_json = _message.blocking_request('get_ipynb', request='', timeout_sec=5)['ipynb']
  with open(out_name, 'w') as nb_file:
    nb_file.write(json.dumps(nb_json,indent=2))
  !ls -la {out_name}

def nb2py_prepend(module_path):
  '''prepend code into exported notebooks so they run nicely (and without ipython)'''
  with open(c['nb_dir']+'/templates/nb2py_prepend','r') as fnullipy:
    prepend = fnullipy.read()
  with open(module_path, 'r') as original: data = original.read()
  paragraphs = data.split('\n')
  paragraphs.insert(2, '\n'+prepend)
  data = '\n'.join(paragraphs)
  # print(data)
  with open(module_path,'w') as prepended:
    prepended.write(data)

def nb2py(nb_name:str=None,save_dir=None):
  '''converts a notebook to python, if nb_name is None uses active notebook and
  skips cells under 'Examples', 'Tests', etc. If save_dir is None uses project/project dir'''
  if save_dir==None: save_dir=f"{c['project_path']}{c['active_project']}"
  if nb_name==None:
    nb_name = c['running_nb']
    nb2ipynb(get_json_for_python_export())
  else: nb2ipynb()
  module_file_name = nb_name2module_name(nb_name)+'.py'
  !mkdir -p {save_dir}
  module_path = f"{save_dir}/{module_file_name}"
  command = f"jupyter nbconvert --to python --output-dir {save_dir} --output {module_file_name} {c['nb_dir']}{nb_name}.ipynb"
  !{command}
  print(command)
  nb2py_prepend(module_path)
  

# Tests

In [None]:
import pytest

In [None]:
# text2title('BDD is the awesome aND good')
nb_name2module_name('BDD test a notebook')

In [None]:
%%snippet -w -e -t test_d2d_titlenames.py
test_data = [('test','Test'),
             ('test The thing','Test the Thing'),
             ('tEst_the_thing','TEst the Thing'), # what is expected here?
             ('test-is-the-thing','Test Is the Thing'),
             ('BDD is the awesome aND good','BDD Is the Awesome and Good')]

@pytest.mark.parametrize("text,expected", test_data)
def test_text2title(text,expected):
  assert text2title(text)==expected

test_data = [('BDD test a notebook','bddtest'),('this is teh awesomer','thistehawesomer'),('fun with notebooks','funnotebooks')]
@pytest.mark.parametrize("text,expected", test_data)
def test_nb_name2module_name(text,expected):
  assert nb_name2module_name(text)==expected

@pytest.mark.constraint
def test_filesize_less_than_14kB():
  print('current test file',os.getenv('PYTEST_CURRENT_TEST'))
  # fpath = c['active_project']+'/'+os.getenv('PYTEST_CURRENT_TEST').split('::')[0]
  fpath = '/tmp/'+os.getenv('PYTEST_CURRENT_TEST').split('::')[0]
  file_size = os.path.getsize(fpath)/1024
  assert file_size < 14

In [None]:
# !pip install nbmake
# !pytest --nbmake {c['nb_dir']}d2d.ipynb

# if "ipykernel_launcher" in sys.argv[0] and c['nb_name']==c['running_nb']:
#   # nb2py() #c['running_nb'] # no tests now!
#   # nb2py(None,'/tmp/')
#   # !pytest -v {c['project_path']}{c['active_project']}/{c['nb_name']}.py #-m constraint
#   nb2py(c['running_nb'],'/tmp/')
#   !pytest -v /tmp/{c['running_nb']}.py #-m constraint

In [None]:
# !cat /tmp/d2d.py

In [None]:
# if "ipykernel_launcher" in sys.argv[0] and c['nb_name']==c['running_nb']:
  # run_nb('git_for_colab')
  # git_ppush('feat: snippets and tests')
  # drive_nb2repo_nb()
  # !cd {c['project_path']} && git commit -m "no idea"