In [None]:
# Run this notebook with access to the `strawb.Config.pandas_file_sync_db` file
# i.e. on a LRZ vm with mounted dss
# or via downloading `/dss/strawb/raw_module_data/pandas_file_sync_db.gz` to your local machine
# and adopted 'pandas.read_pickle(strawb.Config.pandas_file_sync_db)' accordingly

import plotly.offline
plotly.offline.init_notebook_mode(connected=True)

import strawb

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio

import pandas

import numpy as np
import datetime

pio.templates.default = "plotly_white"

In [None]:
pd_sync = pandas.read_pickle(strawb.Config.pandas_file_sync_db)
pd_sync = pd_sync.rename(columns={"outPath": "fullPath"})

pd_sync["fullPath"] += '/' + pd_sync['filename']

In [None]:
pd_sync

## File size by deviceCode

In [None]:
fig_1 = px.scatter(pd_sync, #[pd_sync['fileSize'] > 5e8],
                   x='dateFrom', y='fileSize',
                   color='deviceCode',
                   hover_data=['filename', 'dataProductCode'],
                   opacity=.5,
                  )
fig_1.show()

## File size by dataProductCode

In [None]:
fig_2 = px.scatter(pd_sync, #[pd_sync['fileSize'] > 5e8],
                   x='dateFrom', y='fileSize',
                   color='dataProductCode',
                   hover_data=['filename', 'deviceCode'],
                   opacity=.5,
                  )
fig_2.show()

## Total File Size per Day and Module

In [None]:
# cal. pins, each day one bin
bins = pandas.date_range(start=pd_sync['dateFrom'].min(),
                         end=pd_sync['dateFrom'].max() #+ datetime.timedelta(days=1)
                        ).to_numpy('datetime64')

# cal. for each module the total file size per day (bin)
size_per_day = {}
for dev_i in pd_sync['deviceCode'].unique():
    pd_dev_i = pd_sync[pd_sync['deviceCode']==dev_i]
    
    # exclude log file, as this is data not produced by the module rather than the ONC daq system
    pd_dev_i = pd_dev_i[pd_dev_i['dataProductCode'] != 'LF']
    hist, bin_edges = np.histogram(pd_dev_i['dateFrom'].to_numpy('datetime64'),
                                   bins=bins,
                                   weights=pd_dev_i['uncompressedFileSize'])
    
    size_per_day[dev_i] = hist

# convert it to a DataFrame
size_per_day = pandas.DataFrame(size_per_day)
size_per_day.index = bins[:-1]  # set day as index of DataFrame

In [None]:
# create a scatter plot
px.scatter(size_per_day,
           marginal_y="violin",
           labels={
                "index": "Date",
                "value": "File Size per Day (Byte)",
                "variable": "Module",
               },
           range_y=(size_per_day[size_per_day!=0].min().min()*.9,
                    size_per_day[size_per_day!=0].max().max()*1.1))

In [None]:
# create a line plot
fig = px.line(size_per_day, y=size_per_day.keys(), log_y=True, #range_x=[300e3, ]
            #markers=True,
        
        range_y=(49e3, #size_per_day[size_per_day!=0].min().min()*.9,
                 size_per_day[size_per_day!=0].max().max()*1.1),  # s_numpy[s_numpy>0].min()*.9
        #line_shape="spline",  # 'linear' or "spline"
        labels={
                "index": "Date",
                "value": "File Size per Day (Byte)",
                "variable": "Module",
               },
        render_mode="svg")

# fig.update_layout({'xaxis': {'linecolor': 'rgb(36,36,36)',
#                              'showline': True,
#                              'ticks': 'outside'},
#                    'yaxis': {'linecolor': 'rgb(36,36,36)',
#                              'showline': True,
#                              'ticks': 'outside'}
#                   })
fig.show()