In [1]:
%reload_ext watermark
%reload_ext autoreload
%autoreload 2
%matplotlib inline
%watermark -v -p numpy,pandas,matplotlib,sklearn,torch,torchvision,pytorch_lightning

CPython 3.6.9
IPython 7.16.1

numpy 1.18.5
pandas 1.0.4
matplotlib 3.2.1
sklearn 0.23.1
torch 1.6.0.dev20200609+cu101
torchvision 0.7.0.dev20200609+cu101
pytorch_lightning 0.8.5


In [2]:
from k12libs.utils.nb_easy import k12ai_train_execute
from k12libs.utils.nb_easy import k12ai_get_data, k12ai_get_top_dir
from k12libs.utils.nb_easy import k12ai_print, k12ai_set_notebook
import os
import json
import pandas as pd

import matplotlib.pyplot as plt

pd.options.display.max_rows=None
pd.options.display.max_columns=None

k12ai_set_notebook(cellw=90)

In [3]:
backbones = [
    'resnet18',           # 0
    'resnet50',           # 1
    'densenet121',        # 2
    'mobilenet_v2',       # 3
    'squeezenet1_0',      # 4
    'squeezenet1_1',      # 5
    'shufflenet_v2_x0_5', # 6
    'shufflenet_v2_x1_0', # 7
]

datasets = ['rmnist', 'flowers', 'rchestxray', 'rDogsVsCats']
batch_sizes = [8, 16, 32, 64]
input_sizes = [28, 32, 64, 128, 224]

## 记录

In [4]:
def mem_stat(dataset, force=False):
    stat_file = os.path.join(k12ai_get_top_dir(), 'assets', f'{dataset}_stats.json')
    if os.path.exists(stat_file):
        if not force:
            print(f'already exist: {stat_file}')
            return
    max_try = 1800
    stats = []
    for net in backbones:
        for bs in batch_sizes:
            for ss in input_sizes:
                print(' '*20, net, bs, ss)
                trycnt = max_try
                keys = k12ai_train_execute(
                    framework='k12cv', task='cls',
                    network=net, dataset=dataset,
                    batchsize=bs, inputsize=ss, epoch_num=2, run_num=1)
                while trycnt > 0:
                    data = k12ai_get_data(keys[0], 'error', num=1, rm=True, waitcnt=2)
                    if data:
                        try:
                            result = data[0]['value']
                            code = result['data']['code']
                            if code == 100004 or code > 100100: # stop or error
                                print('-'*40)
                                print(result)
                                print('-'*40)
                                break
                            elif code == 100003: # finish
                                stats.append(result)
                                print('-'*40)
                                print(result)
                                print('-'*40)
                                break
                        except Exception as err:
                            print(err, keys[0])
                    trycnt -= 1
    with open(stat_file, 'w') as f:
        json.dump(stats, f)

In [None]:
for dataset in datasets:
    mem_stat(dataset, force=False)

/hzcsk12/hzcsnote/assets/rmnist_stats.json
already exist: /hzcsk12/hzcsnote/assets/rmnist_stats.json
/hzcsk12/hzcsnote/assets/flowers_stats.json
                     resnet18 8 28


## 分析mnist

### 读取信息

In [None]:
with open(os.path.join(k12ai_get_top_dir(), 'assets', 'rmnist_stats.json'), 'r') as f:
    items = json.load(f)
len(items), len(backbones) * len(batch_sizes) * len(input_sizes)

### 提取信息

In [None]:
# k12ai_print(items[0])

In [None]:
stats = []
for el in items:
    data = el['data']['expand']
    stats.append({
        'dataset_name': data['environ']['dataset_name'],
        'model_name': data['environ']['model_name'],
        'batch_size': int(data['environ']['batch_size']),
        'input_size': eval(data['environ']['input_size'])[0],
        'epoch_time': data['uptime'] / float(data['environ']['num_epochs']),
        'cpu_memory_usage_MB': data['resource']['containers'][0]['cpu_memory_usage_MB'],
        'gpu_memory_usage_MB': data['resource']['containers'][0]['gpu_memory_usage_MB']
    })
stats[0], len(stats) 

In [None]:
# Err: 1
# k12ai_train_execute(
#     framework='k12cv', task='cls',
#     network='resnet18', dataset='rmnist',
#     batchsize=32, inputsize=28, epoch_num=2, run_num=1)

In [None]:
# Err: 2
# k12ai_train_execute(
#     framework='k12cv', task='cls',
#     network='shufflenet_v2_x1_0', dataset='rmnist',
#     batchsize=32, inputsize=28, epoch_num=2, run_num=1)

In [None]:
# Err: 3
# k12ai_train_execute(
#     framework='k12cv', task='cls',
#     network='shufflenet_v2_x1_0', dataset='rmnist',
#     batchsize=32, inputsize=32, epoch_num=2, run_num=1)

### 图表显示

In [None]:
df_mnist = pd.DataFrame(stats)

#### 整体分组表

In [None]:
table_mnist = pd.pivot_table(df_mnist, index=['model_name', 'batch_size', 'input_size'])
table_mnist

#### 整体GPU表

In [None]:
table_mnist.plot(kind='bar', y='gpu_memory_usage_MB', figsize=(24, 12), fontsize='small');

#### 整体CPU表

In [None]:
table_mnist.plot(kind='bar', y='cpu_memory_usage_MB', figsize=(24, 12), fontsize='small');

#### 整体时间消耗表

In [None]:
table_mnist.plot(kind='bar', y='epoch_time', figsize=(24, 12), fontsize='small');

#### 模型GPU表

In [None]:
models_grouped = df_mnist.groupby(by='model_name')
for model_name in models_grouped.groups:
    model_df = models_grouped.get_group(model_name)
    table = pd.pivot_table(model_df, index=['batch_size', 'input_size']) 
    table.plot(kind='bar', title=f'{model_name}', y='gpu_memory_usage_MB', figsize=(12, 8), fontsize=12) 
    xmin, xmax = plt.xlim()
    plt.hlines(y=2000, xmin=xmin, xmax=xmax, colors='red')

#### 模型CPU表

In [None]:
models_grouped = df_mnist.groupby(by='model_name')
for model_name in models_grouped.groups:
    model_df = models_grouped.get_group(model_name)
    table = pd.pivot_table(model_df, index=['batch_size', 'input_size']) 
    table.plot(kind='bar', title=f'{model_name}', y='cpu_memory_usage_MB', figsize=(12, 8), fontsize=12) 
    xmin, xmax = plt.xlim()
    plt.hlines(y=2000, xmin=xmin, xmax=xmax, colors='red')

#### 模型时间消耗表

In [None]:
models_grouped = df_mnist.groupby(by='model_name')
for model_name in models_grouped.groups:
    model_df = models_grouped.get_group(model_name)
    table = pd.pivot_table(model_df, index=['batch_size', 'input_size']) 
    table.plot(kind='bar', title=f'{model_name}', y='epoch_time', figsize=(12, 8), fontsize=12) 
    xmin, xmax = plt.xlim()
    plt.hlines(y=60,  xmin=xmin, xmax=xmax, colors='green')
    plt.hlines(y=120, xmin=xmin, xmax=xmax, colors='blue')
    plt.hlines(y=180, xmin=xmin, xmax=xmax, colors='red')

## 分析chestxray

### 读取信息

In [None]:
with open(os.path.join(k12ai_get_top_dir(), 'assets', 'rchestxray_stats.json'), 'r') as f:
    items = json.load(f)
len(items), len(backbones) * len(batch_sizes) * len(input_sizes)

### 提取信息

In [None]:
# k12ai_print(items[0])

In [None]:
stats = []
for el in items:
    data = el['data']['expand']
    stats.append({
        'dataset_name': data['environ']['dataset_name'],
        'model_name': data['environ']['model_name'],
        'batch_size': int(data['environ']['batch_size']),
        'input_size': eval(data['environ']['input_size'])[0],
        'epoch_time': data['uptime'] / float(data['environ']['num_epochs']),
        'cpu_memory_usage_MB': data['resource']['containers'][0]['cpu_memory_usage_MB'],
        'gpu_memory_usage_MB': data['resource']['containers'][0]['gpu_memory_usage_MB']
    })
stats[0], len(stats) 

### 图表显示

In [None]:
df_xray = pd.DataFrame(stats)

#### 低于2G组合

In [None]:
df_gpu2G = df_xray[df_xray['gpu_memory_usage_MB'] < 2000]
pd.pivot_table(df_gpu2G, index=['model_name', 'batch_size', 'input_size'])

#### 整体分组表

In [None]:
table_xray = pd.pivot_table(df_xray, index=['model_name', 'batch_size', 'input_size'])
table_xray

#### 整体GPU表

In [None]:
table_xray.plot(kind='bar', y='gpu_memory_usage_MB', figsize=(24, 12), fontsize='small');

#### 整体CPU表

In [None]:
table_xray.plot(kind='bar', y='cpu_memory_usage_MB', figsize=(24, 12), fontsize='small');

#### 整体时间消耗表

In [None]:
table_xray.plot(kind='bar', y='epoch_time', figsize=(24, 12), fontsize='small');

#### 模型GPU表

In [None]:
models_grouped = df_xray.groupby(by='model_name')
for model_name in models_grouped.groups:
    model_df = models_grouped.get_group(model_name)
    table = pd.pivot_table(model_df, index=['batch_size', 'input_size']) 
    table.plot(kind='bar', title=f'{model_name}', y='gpu_memory_usage_MB', figsize=(12, 8), fontsize=12) 
    xmin, xmax = plt.xlim()
    plt.hlines(y=2000, xmin=xmin, xmax=xmax, colors='red')

#### 模型CPU表

In [None]:
models_grouped = df_xray.groupby(by='model_name')
for model_name in models_grouped.groups:
    model_df = models_grouped.get_group(model_name)
    table = pd.pivot_table(model_df, index=['batch_size', 'input_size']) 
    table.plot(kind='bar', title=f'{model_name}', y='cpu_memory_usage_MB', figsize=(12, 8), fontsize=12) 
    xmin, xmax = plt.xlim()
    plt.hlines(y=2000, xmin=xmin, xmax=xmax, colors='red')

#### 模型时间消耗表

In [None]:
models_grouped = df_xray.groupby(by='model_name')
for model_name in models_grouped.groups:
    model_df = models_grouped.get_group(model_name)
    table = pd.pivot_table(model_df, index=['batch_size', 'input_size']) 
    table.plot(kind='bar', title=f'{model_name}', y='epoch_time', figsize=(12, 8), fontsize=12) 
    xmin, xmax = plt.xlim()
    plt.hlines(y=5, xmin=xmin, xmax=xmax, colors='green')
    plt.hlines(y=10, xmin=xmin, xmax=xmax, colors='blue')
    plt.hlines(y=15, xmin=xmin, xmax=xmax, colors='red')