---
# gokart task run
---
running sample task

In [1]:
import os
os.environ['TASK_WORKSPACE_DIRECTORY'] = './resources'

In [2]:
!cat ./task.py

# define tasks
import gokart
import luigi
from luigi.util import requires
from logging import getLogger

logger = getLogger(__name__)


class SampleTask(gokart.TaskOnKart):
    task_namespace = 'sample'
    name = luigi.Parameter()
    number = luigi.IntParameter()
    
    def run(self):
        self.dump(f'this is sample output. model number: {self.number}')

        
@requires(SampleTask)
class SecondTask(gokart.TaskOnKart):
    task_namespace = 'sample'
    param = luigi.Parameter()

    def run(self):
        sample = self.load()
        self.dump(sample + f'add task: {self.param}')
        
gokart.run()

In [3]:
# sample task run
!rm -rf ./resources
!python task.py sample.SampleTask --name='EXAMPLE' --number=1 --local-scheduler 2> /dev/null
!python task.py sample.SampleTask --name='EXAMPLE' --number=2 --local-scheduler 2> /dev/null
!python task.py sample.SampleTask --name='EXAMPLE' --number=3 --local-scheduler 2> /dev/null
!python task.py sample.SampleTask --name='TEMP' --number=1 --local-scheduler 2> /dev/null
!python task.py sample.SampleTask --name='TEMP' --number=2 --local-scheduler 2> /dev/null
!python task.py sample.SecondTask --name='TEMP' --number=2 --param='RUN' --local-scheduler --local-temporary-directory='./resource' 2> /dev/null

In [4]:
!tree ./resources/

[34m./resources/[00m
├── [34m__main__[00m
│   ├── SampleTask_84b0b9c5a39bce072271599c9f730660.pkl
│   ├── SampleTask_944fc52ef5011b71b5839f035f4d7e48.pkl
│   ├── SampleTask_d05a2ab961781d3d8eca3e2e5f0d608b.pkl
│   ├── SampleTask_d57cff8074e2560896974850e5d3174d.pkl
│   ├── SampleTask_e883bcfad65f5fb68259d1cd4691f384.pkl
│   └── SecondTask_ea1806322904199b2455d6e115c525ea.pkl
└── [34mlog[00m
    ├── [34mmodule_versions[00m
    │   ├── SampleTask_84b0b9c5a39bce072271599c9f730660.txt
    │   ├── SampleTask_944fc52ef5011b71b5839f035f4d7e48.txt
    │   ├── SampleTask_d05a2ab961781d3d8eca3e2e5f0d608b.txt
    │   ├── SampleTask_d57cff8074e2560896974850e5d3174d.txt
    │   ├── SampleTask_e883bcfad65f5fb68259d1cd4691f384.txt
    │   └── SecondTask_ea1806322904199b2455d6e115c525ea.txt
    ├── [34mprocessing_time[00m
    │   ├── SampleTask_84b0b9c5a39bce072271599c9f730660.pkl
    │   ├── SampleTask_944fc52ef5011b71b5839f035f4d7e48.pkl
    │   ├── SampleTask_d05a2ab961781d3d8eca3e2e5f0d6

---
# Init Thunderbolt
---
using thunderbolt

In [5]:
from thunderbolt import Thunderbolt

In [6]:
# 1st arg is gokart's output directory (default: $TASK_WORKSPACE_DIRECTORY)
# 2nd arg is `task_filters` (for fast)

tb = Thunderbolt() 

## Check tasks param
checking thunderbolt's task_id

In [7]:
df = tb.get_task_df()

In [8]:
import pandas as pd
pd.set_option("display.max_colwidth", 200)
df

Unnamed: 0,task_id,task_name,last_modified,task_params
0,0,SampleTask,2020-02-13 14:17:13.704206,"{'name': 'TEMP', 'number': '1'}"
1,1,SampleTask,2020-02-13 14:17:15.728998,"{'name': 'TEMP', 'number': '2'}"
2,2,SecondTask,2020-02-13 14:17:17.735793,"{'name': 'TEMP', 'number': '2', 'param': 'RUN'}"
3,3,SampleTask,2020-02-13 14:17:07.787346,"{'name': 'EXAMPLE', 'number': '1'}"
4,4,SampleTask,2020-02-13 14:17:09.776602,"{'name': 'EXAMPLE', 'number': '2'}"
5,5,SampleTask,2020-02-13 14:17:11.699651,"{'name': 'EXAMPLE', 'number': '3'}"


## thunderbolt filter
2nd arg 'task_filters' is str or list. So fast.

task_filters is partial match word for example: 
 - 'Tag' -> HogeTag, NormalizeHogeTag, TagTask, ...
 - ['Train', 'Tag'] -> TrainModel, TrainData, HogeTag, NormalizeHogeTag, TagTask, ...

In [9]:
tb = Thunderbolt(task_filters='Sample') 

In [10]:
# example: all_data=True

tb.get_task_df(all_data=True)

Unnamed: 0,task_id,task_name,last_modified,task_params,task_hash,task_log
0,0,SampleTask,2020-02-13 14:17:13.704206,"{'name': 'TEMP', 'number': '1'}",d57cff8074e2560896974850e5d3174d,{'file_path': ['./resources/__main__/SampleTask_d57cff8074e2560896974850e5d3174d.pkl']}
1,1,SampleTask,2020-02-13 14:17:15.728998,"{'name': 'TEMP', 'number': '2'}",d05a2ab961781d3d8eca3e2e5f0d608b,{'file_path': ['./resources/__main__/SampleTask_d05a2ab961781d3d8eca3e2e5f0d608b.pkl']}
2,3,SampleTask,2020-02-13 14:17:07.787346,"{'name': 'EXAMPLE', 'number': '1'}",e883bcfad65f5fb68259d1cd4691f384,{'file_path': ['./resources/__main__/SampleTask_e883bcfad65f5fb68259d1cd4691f384.pkl']}
3,4,SampleTask,2020-02-13 14:17:09.776602,"{'name': 'EXAMPLE', 'number': '2'}",84b0b9c5a39bce072271599c9f730660,{'file_path': ['./resources/__main__/SampleTask_84b0b9c5a39bce072271599c9f730660.pkl']}
4,5,SampleTask,2020-02-13 14:17:11.699651,"{'name': 'EXAMPLE', 'number': '3'}",944fc52ef5011b71b5839f035f4d7e48,{'file_path': ['./resources/__main__/SampleTask_944fc52ef5011b71b5839f035f4d7e48.pkl']}


---
# Data Load
---
using load method
- arg: thunderbolt's task_id
- return : data list

In [11]:
x = tb.load(task_id=3)
print(x)

this is sample output. model number: 1


### newest data load example

In [12]:
tb.get_data('SampleTask')

'this is sample output. model number: 2'