In [1]:
import glob
import json
import pandas as pd
import matplotlib.pyplot as plt
import os
import time
from functools import reduce
from pylab import rcParams
rcParams['figure.figsize'] = 10,10

In [2]:
# benchmarking throughput

target_ids = [
    '30I8399019000',
    '30I8399019001',
    '30I8399019002',
    '30I8399019003',
    '30I8399019004',
    '30I8399019005',
    '30I8399019006',
    '30I8399019007',
    '30I8399019008',
    '30I8399019009',
    '30I8399019010',
    '30I8399019011',
    '30I8399019012',
    '30I8399019013',
]
base_dir = '../'
download_dir = 'tmp/collect-all'
benchmark_dir = 'tmp/benchmark_throughput'

def parse_elapsed(lines):
    ret = {}
    for x in lines:
        key, value = x.split('\t')
        m, s = value.replace('s', '').split('m')
        ret[key] = float(m)*60 + float(s)
    return ret

def file_stat(files):
    ret = {}
    ret['file_num'] = len(files)
    ret['file_size'] = reduce(lambda a,b: a+b ,map(os.path.getsize, files))

    return ret

cmd_rm_stat = 'cd {base_dir} && mkdir -p {benchmark_dir} && rm -f {benchmark_dir}/stat-{id}.json'
cmd_calc_stat = 'cd {base_dir} && time python3 lastomesh.py --local-scheduler --workers 4 DownloadShizuokaPCD --product-id {id} --output-dir {benchmark_dir} --work-dir {download_dir}/{id}'

throughputs = []
for id in target_ids:

    # ファイル数, 容量
    las_files = list(glob.glob(os.path.join(base_dir, download_dir, id, '*.las')))
    fstat = file_stat(las_files)
    !{cmd_rm_stat.format(base_dir=base_dir, benchmark_dir=benchmark_dir, id=id)}

    # 読み込み&特徴量算出 時間計測
    output = !{cmd_calc_stat.format(base_dir=base_dir, download_dir=download_dir, benchmark_dir=benchmark_dir, id=id)}
    elapsed = parse_elapsed(output[-3:])

    # 点の数
    filepath = os.path.join(base_dir, benchmark_dir, 'stat-{}.json'.format(id))
    with open(filepath, 'r') as f:
        pcd_stat = json.load(f)

    # 結果まとめる
    record = dict(fstat, **elapsed, point_num=pcd_stat['shape']['value'][0], id=id)
    print(record)
    throughputs.append(record)

df_throughputs = pd.DataFrame(throughputs)
df_throughputs

{'file_num': 4, 'file_size': 641946890, 'real': 6.325, 'user': 15.429, 'sys': 3.01, 'point_num': 18880735, 'id': '30I8399019000'}
{'file_num': 4, 'file_size': 635859292, 'real': 6.885, 'user': 15.734, 'sys': 2.71, 'point_num': 18701690, 'id': '30I8399019001'}
{'file_num': 4, 'file_size': 641868078, 'real': 6.522, 'user': 15.373, 'sys': 2.933, 'point_num': 18878420, 'id': '30I8399019002'}
{'file_num': 4, 'file_size': 649677538, 'real': 6.626, 'user': 15.565, 'sys': 2.798, 'point_num': 19108110, 'id': '30I8399019003'}
{'file_num': 4, 'file_size': 648882006, 'real': 6.93, 'user': 16.052, 'sys': 3.017, 'point_num': 19084710, 'id': '30I8399019004'}
{'file_num': 4, 'file_size': 637587546, 'real': 6.204, 'user': 15.104, 'sys': 2.795, 'point_num': 18752520, 'id': '30I8399019005'}
{'file_num': 3, 'file_size': 605604597, 'real': 6.918, 'user': 14.539, 'sys': 2.899, 'point_num': 17811860, 'id': '30I8399019006'}
{'file_num': 3, 'file_size': 618411615, 'real': 6.065, 'user': 14.802, 'sys': 3.032, '

Unnamed: 0,file_num,file_size,real,user,sys,point_num,id
0,4,641946890,6.325,15.429,3.01,18880735,30I8399019000
1,4,635859292,6.885,15.734,2.71,18701690,30I8399019001
2,4,641868078,6.522,15.373,2.933,18878420,30I8399019002
3,4,649677538,6.626,15.565,2.798,19108110,30I8399019003
4,4,648882006,6.93,16.052,3.017,19084710,30I8399019004
5,4,637587546,6.204,15.104,2.795,18752520,30I8399019005
6,3,605604597,6.918,14.539,2.899,17811860,30I8399019006
7,3,618411615,6.065,14.802,3.032,18188535,30I8399019007
8,4,629775264,6.124,15.161,2.858,18522750,30I8399019008
9,4,650957842,6.307,15.473,3.023,19145765,30I8399019009


In [3]:
throughput_point = df_throughputs['point_num'].sum() / df_throughputs['real'].sum()
file_size_sum = df_throughputs['file_size'].sum() / (1024 ** 3)
elapsed_per_1gb = df_throughputs['real'].sum() / file_size_sum
print(throughput_point, elapsed_per_1gb, file_size_sum, df_throughputs['point_num'].sum())

2913049.7523390194 10.841062682215895 8.380174772813916 264650570


In [4]:
# benchmarking downsampling

target_projects = {
    'mms':[
        '30I8399019000',
        '30I8399019001',
        '30I8399019002',
        '30I8399019003',
        '30I8399019004',
        '30I8399019005',
        '30I8399019006',
        '30I8399019007',
        '30I8399019008',
        '30I8399019009',
    ],
    'construction': [
        '28XXX00040001',
        '29D6152011105',
        '29K2033011103',
        '29K3481011101',
        '29W9350011101',
        '30D0230011102',
        '30D0721011102',
        '30D3703011102',
        '30D7318011101',
        '30K2664011102',
        '31K2650011102',
    ],
    'building': [
        '01R0107011318',
        '01R0107021318',
        '01R0107031318',
        '01R0107041318',
        '01R0107051318',
        '01R0107061318',
        '01R0107071318',
        '29XXX00010002',
        '30XXX03010001',
        '31XXX07010001',
    ],
    'terrain': [
        '28XXX00030001',
        '28XXX00030002',
        '28XXX00030003',
        '28XXX00030004',
        '30XXX00010001',
        '30XXX00010002',
        '30XXX00010003',
        '30XXX00010004',
        '30XXX00010005',
        '30XXX00010062',
    ],
}
base_dir = '../'
download_dir = 'tmp/collect-all'
benchmark_dir = 'tmp/benchmark_throughput'

cmd_rm_ply = 'cd {base_dir} && mkdir -p {benchmark_dir} && rm -f {benchmark_dir}/pcd-{id}.ply'
cmd_calc_ply = 'cd {base_dir} && time python3 lastomesh.py --local-scheduler --workers 4 CreateMeshFromLasData --product-id {id} --output-dir {benchmark_dir} --work-dir {download_dir}/{id} --output-filename pcd-{id}.ply --skip-meshing true'

downsampling = []
for group, target_ids in target_projects.items():
    for id in target_ids:
        print(id)
        # 読み込み&特徴量算出 時間計測
        !{cmd_rm_ply.format(base_dir=base_dir, benchmark_dir=benchmark_dir, id=id)}
        output = !{cmd_calc_ply.format(base_dir=base_dir, download_dir=download_dir, benchmark_dir=benchmark_dir, id=id)}
        elapsed = parse_elapsed(output[-3:])

        # ファイル数, 容量
        las_files = list(glob.glob(os.path.join(base_dir, download_dir, id, '*.las')))
        fstat = file_stat(las_files)

        ply_files = list(glob.glob(os.path.join(base_dir, benchmark_dir, 'pcd-{}.ply'.format(id))))
        fstat_ply = file_stat(ply_files)

        # 結果まとめる
        record = dict(ply_size=fstat_ply['file_size'], **fstat, **elapsed, id=id, group=group)
        # print(record)
        downsampling.append(record)

df_downsampling = pd.DataFrame(downsampling)
df_downsampling

30I8399019000
30I8399019001
30I8399019002
30I8399019003
30I8399019004
30I8399019005
30I8399019006
30I8399019007
30I8399019008
30I8399019009
28XXX00040001
29D6152011105
29K2033011103
29K3481011101
29W9350011101
30D0230011102
30D0721011102
30D3703011102
30D7318011101
30K2664011102
31K2650011102
01R0107011318
01R0107021318
01R0107031318
01R0107041318
01R0107051318
01R0107061318
01R0107071318
29XXX00010002
30XXX03010001
31XXX07010001
28XXX00030001
28XXX00030002
28XXX00030003
28XXX00030004
30XXX00010001
30XXX00010002
30XXX00010003
30XXX00010004
30XXX00010005
30XXX00010062


Unnamed: 0,ply_size,file_num,file_size,real,user,sys,id,group
0,73627212,4,641946890,6.049,5.441,1.537,30I8399019000,mms
1,75580581,4,635859292,6.057,5.464,1.522,30I8399019001,mms
2,75400437,4,641868078,6.106,5.488,1.548,30I8399019002,mms
3,77291247,4,649677538,6.279,5.56,1.649,30I8399019003,mms
4,73451226,4,648882006,6.164,5.602,1.49,30I8399019004,mms
5,75293463,4,637587546,6.092,5.46,1.56,30I8399019005,mms
6,75284391,3,605604597,6.048,5.4,1.576,30I8399019006,mms
7,74826147,3,618411615,5.985,5.436,1.478,30I8399019007,mms
8,75318006,4,629775264,6.057,5.495,1.492,30I8399019008,mms
9,77331153,4,650957842,6.206,5.605,1.531,30I8399019009,mms


In [5]:
downsampling_mean = df_downsampling.mean()
downsampling_sum = df_downsampling.sum()

reduce_ratio = downsampling_sum['ply_size'] / downsampling_sum['file_size']
elapsed_per_1gb = downsampling_sum['real'] / (downsampling_sum['file_size'] / 1024**3 )
print(elapsed_per_1gb, reduce_ratio, downsampling_sum['ply_size'] / 1024**2 / 40)

13.36488471343121 0.12300424984069563 71.78042516708373


In [6]:
df_downsampling['file_size_mb'] = df_downsampling['file_size'] / 1024**2
df_downsampling['ply_size_mb'] = df_downsampling['ply_size'] / 1024**2

for key, downsampling_sum in df_downsampling.groupby('group').sum().iterrows():
    reduce_ratio = downsampling_sum['ply_size'] / downsampling_sum['file_size']
    elapsed_per_1gb = downsampling_sum['real'] / (downsampling_sum['file_size'] / 1024**3 )
    print(key, elapsed_per_1gb, reduce_ratio)

df_downsampling.groupby('group').mean()

building 14.959669971046694 0.12398559741321595
construction 16.580541003364356 0.12533997234114727
mms 10.304802129184049 0.11844909872480014
terrain 11.17500190382415 0.12464337712348633


Unnamed: 0_level_0,ply_size,file_num,file_size,real,user,sys,file_size_mb,ply_size_mb
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
building,90282880.0,4.0,728172400.0,10.1451,9.2756,1.7277,694.439277,86.100469
construction,66560880.0,3.272727,531042700.0,8.200273,7.617273,1.512818,506.441815,63.477403
mms,75340390.0,3.8,636057100.0,6.1043,5.4951,1.5383,606.591288,71.850191
terrain,62228680.0,2.9,499253800.0,5.196,4.8301,1.2887,476.125556,59.345897


In [7]:
base_dir = '../'
download_dir = 'tmp/collect-all'
benchmark_dir = 'tmp/benchmark_throughput'

cmd_rm_ply = 'cd {base_dir} && mkdir -p {benchmark_dir} && rm -f {benchmark_dir}/mesh-{id}.ply'
cmd_calc_ply = 'cd {base_dir} && time python3 lastomesh.py --local-scheduler --workers 4 CreateMeshFromLasData --product-id {id} --output-dir {benchmark_dir} --work-dir {download_dir}/{id} --output-filename mesh-{id}.ply --simplify-type vertex-clustering'

meshing = []
for group, target_ids in target_projects.items():
    for id in target_ids:
        print(id)
        # 読み込み&特徴量算出 時間計測
        !{cmd_rm_ply.format(base_dir=base_dir, benchmark_dir=benchmark_dir, id=id)}
        output = !{cmd_calc_ply.format(base_dir=base_dir, download_dir=download_dir, benchmark_dir=benchmark_dir, id=id)}
        elapsed = parse_elapsed(output[-3:])

        # ファイル数, 容量
        las_files = list(glob.glob(os.path.join(base_dir, download_dir, id, '*.las')))
        fstat = file_stat(las_files)

        ply_files = list(glob.glob(os.path.join(base_dir, benchmark_dir, 'mesh-{}.ply'.format(id))))
        fstat_ply = file_stat(ply_files)

        # 結果まとめる
        record = dict(ply_size=fstat_ply['file_size'], **fstat, **elapsed, id=id, group=group)
        # print(record)
        meshing.append(record)

df_meshing = pd.DataFrame(meshing)
df_meshing

30I8399019000
30I8399019001
30I8399019002
30I8399019003
30I8399019004
30I8399019005
30I8399019006
30I8399019007
30I8399019008
30I8399019009
28XXX00040001
29D6152011105
29K2033011103
29K3481011101
29W9350011101
30D0230011102
30D0721011102
30D3703011102
30D7318011101
30K2664011102
31K2650011102
01R0107011318
01R0107021318
01R0107031318
01R0107041318
01R0107051318
01R0107061318
01R0107071318
29XXX00010002
30XXX03010001
31XXX07010001
28XXX00030001
28XXX00030002
28XXX00030003
28XXX00030004
30XXX00010001
30XXX00010002
30XXX00010003
30XXX00010004
30XXX00010005
30XXX00010062


Unnamed: 0,ply_size,file_num,file_size,real,user,sys,id,group
0,10981199,4,641946890,38.633,210.969,10.619,30I8399019000,mms
1,10606062,4,635859292,30.777,155.412,8.399,30I8399019001,mms
2,10048693,4,641868078,22.377,93.551,6.369,30I8399019002,mms
3,9963153,4,649677538,24.105,107.598,6.521,30I8399019003,mms
4,10368664,4,648882006,25.248,115.607,6.717,30I8399019004,mms
5,10217828,4,637587546,22.994,98.924,6.038,30I8399019005,mms
6,8947008,3,605604597,22.548,98.743,6.24,30I8399019006,mms
7,9325417,3,618411615,26.818,128.235,7.696,30I8399019007,mms
8,9164972,4,629775264,25.999,122.753,7.832,30I8399019008,mms
9,9690015,4,650957842,24.103,107.275,6.695,30I8399019009,mms


In [8]:
meshing_mean = df_meshing.mean()
meshing_sum = df_meshing.sum()

reduce_ratio = meshing_sum['ply_size'] / meshing_sum['file_size']
elapsed_per_1gb = meshing_sum['real'] / (meshing_sum['file_size'] / 1024**3 )
print(elapsed_per_1gb, reduce_ratio, meshing_sum['file_size'] / 1024**2 / 40, meshing_sum['ply_size'] / 1024**2 / 40)
meshing_sum

64.45968515485137 0.016578674243309163 583.5605294942856 9.674659919738769


ply_size                                             405784648
file_num                                                   143
file_size                                          24476302631
real                                                   1469.38
user                                                   7104.51
sys                                                    381.814
id           30I839901900030I839901900130I839901900230I8399...
group        mmsmmsmmsmmsmmsmmsmmsmmsmmsmmsconstructioncons...
dtype: object

In [9]:
df_meshing['file_size_mb'] = df_meshing['file_size'] / 1024**2
df_meshing['ply_size_mb'] = df_meshing['ply_size'] / 1024**2

for key, meshing_sum in df_meshing.groupby('group').sum().iterrows():
    reduce_ratio = meshing_sum['ply_size'] / meshing_sum['file_size']
    elapsed_per_1gb = meshing_sum['real'] / (meshing_sum['file_size'] / 1024**3 )
    print(key, elapsed_per_1gb, reduce_ratio)

df_meshing.groupby('group').mean()

building 70.19562402230672 0.01806078839745644
construction 50.06770839239708 0.016481949425397158
mms 44.49922924589507 0.015613852307253385
terrain 98.36281410164722 0.0157593490596977


Unnamed: 0_level_0,ply_size,file_num,file_size,real,user,sys,file_size_mb,ply_size_mb
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
building,13151370.0,4.0,728172400.0,47.6041,228.93,10.614,694.439277,12.542121
construction,8752619.0,3.272727,531042700.0,24.762091,94.526909,5.997,506.441815,8.347148
mms,9931301.0,3.8,636057100.0,26.3602,123.9067,7.3126,606.591288,9.471227
terrain,7867915.0,2.9,499253800.0,45.7354,253.6348,13.6581,476.125556,7.503429
