In [1]:
# %pip install python-dotenv nbformat pandas

In [2]:
from doit.tools import load_ipython_extension
load_ipython_extension()

In [3]:
import pandas as pd
from dotenv import load_dotenv
from doit.tools import run_once

In [4]:
# Download data.csv from url
# Process data.csv to get new cleaned data
# Make a report.ipynb with code and figures 

In [5]:
import os
import urllib.request

def task_download():
    def download():
        load_dotenv('.env')
        url = os.environ['data_url'] 
        df = pd.read_csv(url)
        df.to_csv('data.csv', index=False)

    return {
        'actions': [download],
        'targets': ['data.csv'],
        'uptodate': [True],
    }

In [6]:
def task_process():
    def process():
        df = pd.read_csv('data.csv')
        df = df.dropna(axis=0)
        df.to_csv('data_process.csv')

    return {
        'actions': [process],
        'targets': ['data_process.csv'],
        'file_dep': ['data.csv'],
        'uptodate': [lambda: os.path.exists('data_process.csv') and os.path.getmtime('data_process.csv') > os.path.getmtime('data.csv')]
    }

In [7]:
import nbformat as nbf

def gen_report():
    nb = nbf.v4.new_notebook()
    nb.cells.append(nbf.v4.new_code_cell("""%pip install dtale"""
    ))
    nb.cells.append(nbf.v4.new_code_cell("""import pandas as pd\nimport dtale"""
    ))
    nb.cells.append(nbf.v4.new_markdown_cell("""## EDA"""))
    nb.cells.append(nbf.v4.new_code_cell("""df = pd.read_csv('data_process.csv')"""))
    nb.cells.append(nbf.v4.new_code_cell("""dtale.show(df)"""
    ))

    with open('report.ipynb', 'w') as f:
        nbf.write(nb, f)

In [8]:
def task_generate_report():
    return {
        'actions': [gen_report],
        'file_dep': ['data_process.csv'],
        'targets': ['report.ipynb'],
        'uptodate': [lambda: os.path.exists('report.ipynb') and os.path.getmtime('report.ipynb') > os.path.getmtime('data_process.csv')]
    }

In [9]:
%doit 

.  download
.  process
.  generate_report


In [10]:
%doit

-- download
-- process
-- generate_report
