---
# gokart task run
---
running sample task

In [1]:
import os
os.environ['TASK_WORKSPACE_DIRECTORY'] = './resources'

In [2]:
!cat ./example_task.py

# define tasks
import gokart
import luigi
from luigi.util import requires
from logging import getLogger

logger = getLogger(__name__)


class SampleTask(gokart.TaskOnKart):
    task_namespace = 'sample'
    model_name = luigi.Parameter()
    number = luigi.IntParameter()
    
    def require(self):
        return

    def output(self):
        return self.make_target(f'{self.model_name}/sample.pkl')

    def run(self):
        self.dump(f'this is sample output. model number: {self.number}')

        
@requires(SampleTask)
class SecondTask(gokart.TaskOnKart):
    task_namespace = 'sample'
    param = luigi.Parameter()

    def output(self):
        return self.make_target(f'SECOND_TASK/task.pkl')

    def run(self):
        sample = self.load()
        self.dump(sample + f'add task: {self.param}')
        
gokart.run()

In [3]:
# sample task run
!python example_task.py sample.SampleTask --model-name='EXAMPLE' --number=1 --local-scheduler 2> /dev/null
!python example_task.py sample.SampleTask --model-name='EXAMPLE' --number=2 --local-scheduler 2> /dev/null
!python example_task.py sample.SampleTask --model-name='EXAMPLE' --number=3 --local-scheduler 2> /dev/null
!python example_task.py sample.SampleTask --model-name='TEMP' --number=1 --local-scheduler 2> /dev/null
!python example_task.py sample.SampleTask --model-name='TEMP' --number=2 --local-scheduler 2> /dev/null
!python example_task.py sample.SecondTask --model-name='TEMP' --number=2 --param='RUN' --local-scheduler --local-temporary-directory='./resource' 2> /dev/null

In [4]:
!tree ./resources/

[0;34m./resources/[00m
├── [0;34mEXAMPLE[00m
│   ├── sample_222e2155cfed25bcda5234056cf1fa5c.pkl
│   ├── sample_9a30d39315e2db5a8db587544a3bd5c2.pkl
│   └── sample_af3d19290570cca7e6aa0a5a5dc534ea.pkl
├── [0;34mSECOND_TASK[00m
│   └── task_6cf070abfbefc917a198521a26818c7a.pkl
├── [0;34mTEMP[00m
│   ├── sample_c4cfb28a2370db69219de0acc802b721.pkl
│   └── sample_dc93d9e52a67bed92f72193bb7dd10a9.pkl
└── [0;34mlog[00m
    ├── [0;34mprocessing_time[00m
    │   ├── SampleTask_222e2155cfed25bcda5234056cf1fa5c.pkl
    │   ├── SampleTask_9a30d39315e2db5a8db587544a3bd5c2.pkl
    │   ├── SampleTask_af3d19290570cca7e6aa0a5a5dc534ea.pkl
    │   ├── SampleTask_c4cfb28a2370db69219de0acc802b721.pkl
    │   ├── SampleTask_dc93d9e52a67bed92f72193bb7dd10a9.pkl
    │   └── SecondTask_6cf070abfbefc917a198521a26818c7a.pkl
    ├── [0;34mtask_log[00m
    │   ├── SampleTask_222e2155cfed25bcda5234056cf1fa5c.pkl
    │   ├── SampleTask_9a30d39315e2db5a8db587544a3bd5c2.pkl
    │   ├── SampleTask_af3d

---
# Init Thunderbolt
---
using thunderbolt

In [5]:
from thunderbolt import Thunderbolt

In [6]:
# 1st arg is gokart's TASK_WORKSPACE_DIRECTORY

task_dir = os.environ['TASK_WORKSPACE_DIRECTORY']
tb = Thunderbolt(task_dir) 

100%|██████████| 6/6 [00:00<00:00, 5551.69it/s]


## Check tasks param
checking thunderbolt's task_id

In [7]:
df = tb.get_task_df()

In [8]:
import pandas as pd
pd.set_option("display.max_colwidth", 200)
df

Unnamed: 0,task_id,task_name,last_modified,task_params
0,0,SampleTask,2019-08-18 01:57:03.552976,"{'model_name': 'EXAMPLE', 'number': '2'}"
1,1,SampleTask,2019-08-18 01:57:06.890364,"{'model_name': 'TEMP', 'number': '2'}"
2,2,SampleTask,2019-08-18 01:57:04.874456,"{'model_name': 'EXAMPLE', 'number': '3'}"
3,3,SampleTask,2019-08-18 01:57:05.879843,"{'model_name': 'TEMP', 'number': '1'}"
4,4,SampleTask,2019-08-18 01:57:02.499170,"{'model_name': 'EXAMPLE', 'number': '1'}"
5,5,SecondTask,2019-08-18 01:57:07.918226,"{'model_name': 'TEMP', 'number': '2', 'param': 'RUN'}"


## thunderbolt filter
2nd arg 'task_filters' is str or list.  
task_filters is partial match word for example: 
 - 'Tag' -> GaussTag, NormalizeGaussTag, MaxwellTag, TagSingle,...
 - ['Train', 'Tag'] -> TrainModel, TrainData,GaussTag, NormalizeGaussTag, MaxwellTag, TagSingle,...

In [9]:
tb = Thunderbolt(task_dir, task_filters='Sample') 

100%|██████████| 6/6 [00:00<00:00, 3927.25it/s]


In [10]:
# all_data=True
tb.get_task_df(all_data=True)

Unnamed: 0,last_modified,task_hash,task_id,task_log,task_name,task_params
0,2019-08-18 01:57:03.552976,9a30d39315e2db5a8db587544a3bd5c2,0,{'file_path': ['./resources/EXAMPLE/sample_9a30d39315e2db5a8db587544a3bd5c2.pkl']},SampleTask,"{'model_name': 'EXAMPLE', 'number': '2'}"
1,2019-08-18 01:57:06.890364,c4cfb28a2370db69219de0acc802b721,1,{'file_path': ['./resources/TEMP/sample_c4cfb28a2370db69219de0acc802b721.pkl']},SampleTask,"{'model_name': 'TEMP', 'number': '2'}"
2,2019-08-18 01:57:04.874456,af3d19290570cca7e6aa0a5a5dc534ea,2,{'file_path': ['./resources/EXAMPLE/sample_af3d19290570cca7e6aa0a5a5dc534ea.pkl']},SampleTask,"{'model_name': 'EXAMPLE', 'number': '3'}"
3,2019-08-18 01:57:05.879843,dc93d9e52a67bed92f72193bb7dd10a9,3,{'file_path': ['./resources/TEMP/sample_dc93d9e52a67bed92f72193bb7dd10a9.pkl']},SampleTask,"{'model_name': 'TEMP', 'number': '1'}"
4,2019-08-18 01:57:02.499170,222e2155cfed25bcda5234056cf1fa5c,4,{'file_path': ['./resources/EXAMPLE/sample_222e2155cfed25bcda5234056cf1fa5c.pkl']},SampleTask,"{'model_name': 'EXAMPLE', 'number': '1'}"


---
# Data Load
---
using load method
- arg: thunderbolt's task_id
- return : data list

In [11]:
for x in tb.load(task_id=3):
    print(x)

this is sample output. model number: 1


### newest data load example

In [12]:
tb = Thunderbolt(task_dir, 'Second')
tb.load(task_id=tb.get_task_df().sort_values('last_modified').task_id.iloc[0])

100%|██████████| 6/6 [00:00<00:00, 6403.52it/s]


['this is sample output. model number: 2add task: RUN']