In [1]:
from itertools import chain
from collections import Counter
import re
import subprocess
import os
from pathlib import Path

import pandas as pd
import plotly.express as px
from pandas import Series, DataFrame
import cufflinks as cf
import lunchbox.tools as lbt
import numpy as np

import rolling_pin.tools as rpt
from rolling_pin.blob_etl import BlobETL

cf.go_offline()
cf.set_config_file(theme='henanigans', colorscale='henanigans')

In [29]:
def get_disk_data(root, exclude_regex='/proc|/sys|/dev|/tmp|/mnt'):
    data = rpt.directory_to_dataframe(root, exclude_regex=exclude_regex)
    mask = data.filepath.apply(os.path.isfile)
    data = data[mask]
    data.reset_index(drop=True, inplace=True)
    data['size_bytes'] = data.filepath.apply(lambda y: lbt.try_(lambda x: os.stat(x).st_size, y, 0))
    data['size_mb'] = data.size_bytes / 2**20
    return data

def get_sunburst_diagram(data, level):
    data['group'] = data.filepath \
        .apply(lambda x: Path(x).parts[:level]) \
        .apply(lambda x: Path(*x).as_posix())

    grp = data.groupby('group', as_index=False).size_mb.sum()
    del data['group']
    grp = grp.apply(lambda x: (x.group, x.size_mb), axis=1).tolist()
    grp = BlobETL(dict(grp)).to_dataframe().fillna('-->')
    grp['size_mb'] = grp['value'] + 0.001
    del grp['value']

    cols = grp.drop('size_mb', axis=1).columns.tolist()
    colorscale = [
        (0.00, '#242424'),
        (1.00, '#8AD9E3'),
    ]
    fig = px.sunburst(
        grp,
        path=cols,
        values='size_mb',
        color='size_mb',
        color_continuous_scale=colorscale,
    )
    fig.layout.paper_bgcolor = '#181818'
    fig.layout.font.color = '#A4A4A4'
    fig.layout.width = 1000
    fig.layout.height = 1000
    return fig

In [30]:
with lbt.LogRuntime(level='warn'):
    data = get_disk_data('/home/ubuntu/pdm')
    fig = get_sunburst_diagram(data, 10)
fig

In [7]:
with lbt.LogRuntime(level='warn'):
    data = get_disk_data('/usr')
    fig = get_sunburst_diagram(data, 5)
fig

In [9]:
with lbt.LogRuntime(level='warn'):
    data = get_disk_data('/lib')
    fig = get_sunburst_diagram(data, 5)
fig

In [49]:
with lbt.LogRuntime(level='warn'):
    data = get_disk_data('/home/ubuntu', exclude_regex='/rolling-pin')
    fig = get_sunburst_diagram(data, 5)
fig