<img src="https://raw.github.com/nsdf-fabric/tracy-stats/master/header2.png" width="100%">

![prisms](https://raw.github.com/nsdf-fabric/tracy-stats/master/logo.png)

<center>
<img src="https://nationalsciencedatafabric.org/assets/images/logo.png" /><br>
<A href="https://nationalsciencedatafabric.org">National Science Data Fabric  </A>
</center>

*This work was funded in part by NSF OAC award 2138811, NSF CI CoE award 2127548, DoE award DE-FE0031880, and the Intel oneAPI Centers of Excellence at University of Utah, the Exascale Computing Project (17-SC-20-SC), a collaborative effort of the DoE and the NNSA, and UT-Battelle, LLC under contract DE-AC05-00OR22725.  Results presented in this paper were obtained in part using the Chameleon, CloudLab, CloudBank, Fabric, and ACCESS testbeds supported by the National Science Foundation.*


# Clone files needed for the visualization

In [None]:
!if [ ! -d ./tracy-stats ] ; then git clone https://github.com/nsdf-fabric/tracy-stats ; fi

# Define some utility code

In [110]:
from glob import glob
import os,sys,pickle
from datetime import datetime, timezone
import pandas as p
from IPython.display import display, Markdown,HTML,Javascript
import ipywidgets as widgets

import plotly.express as px
pd.options.plotting.backend = "plotly"

# experiment start date
start_date=pd.to_datetime("2023-11-08 13:44:31", format="%Y-%m-%d %H:%M:%S")

# experiment end date
end_date  =pd.to_datetime("2023-11-14 07:32:08", format="%Y-%m-%d %H:%M:%S")

# current date
every_n_minutes = pd.date_range(start_date, end_date, freq='10 min')
options = [(it.strftime('%Y-%m-%d %H:%M:%S'), it) for it in every_n_minutes]
index = (0, len(options)-1)
slider = widgets.SelectionSlider(
    options=options,
    orientation='horizontal',
   layout={'width': '500px'}
)

In [111]:
display(slider)

SelectionSlider(layout=Layout(width='500px'), options=(('2023-11-08 13:44:31', Timestamp('2023-11-08 13:44:31'…

In [121]:
current_date=pd.to_datetime(slider.value, format='%Y-%m-%d %H:%M:%S')
# print("current_date=",current_date)

def FilterDates(df):
    df["date"]=pd.to_datetime(df["date"], format='%Y-%m-%d %H:%M:%S')
    return df[df["date"]<=current_date]

def GetFiles():
    return FilterDates(pd.read_csv("tracy-stats/files.csv"))

def GetFields():
    return FilterDates(pd.read_csv("tracy-stats/fields.csv"))

def GetAllDics():
    return FilterDates(pd.read_csv("tracy-stats/all_dicts.csv"))

def GetAllPars():
    return FilterDates(pd.read_csv("tracy-stats/all_pars.csv"))

def GetDics():
    with open("tracy-stats/dics.pickle","rb") as f:  p = pickle.load(f)
    ret=[]
    for it, df in p:
        df=FilterDates(df)
        if len(df): ret.append([it,df])
    return ret

def GetPars():
    with open("tracy-stats/pars.pickle","rb") as f: p = pickle.load(f)
    ret=[]
    for it, df in p:
        df=FilterDates(df)
        if len(df): ret.append([it,FilterDates(df)])
    return ret

df=GetFiles()

df=df.sort_values(by=['date'])
# df.set_index("date",inplace=True)

# cumulative number of files
df['num_files']=[I for I,it in enumerate(df.iterrows())]

# compute cumulative sum
df['cumsum'   ]  =df['size'].cumsum()

# I want the extension in a single col
df['ext'      ]=[os.path.splitext(it)[-1] for it in df['path']]

# I want the dirname in a single col
df['dirname'  ]=[os.path.dirname(it) for it in df['path']]

# show the files
df

Unnamed: 0,date,path,size,num_files,cumsum,ext,dirname
1,2023-11-08 13:44:31,/nfs/chess/raw/2023-3/id3a/berman-3804-a/align...,167682,0,167682,.log,/nfs/chess/raw/2023-3/id3a/berman-3804-a/align...
92,2023-11-08 13:56:30,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...,517,1,168199,.json,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...
93,2023-11-08 13:56:45,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...,4378,2,172577,.txt,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...
112,2023-11-08 13:56:46,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...,7902,3,180479,.mcs,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...
111,2023-11-08 13:56:46,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...,7059,4,187538,.dat,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...
...,...,...,...,...,...,...,...
1319,2023-11-09 00:24:23,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...,8388814,5974,1257346250495,.tif,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...
1320,2023-11-09 00:24:25,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...,8388814,5975,1257354639309,.tif,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...
1321,2023-11-09 00:24:27,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...,8388814,5976,1257363028123,.tif,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...
1322,2023-11-09 00:24:29,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...,8388814,5977,1257371416937,.tif,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...


# Plot the experiment storage activity

In [122]:
import plotly.graph_objects as go

fig = go.Figure()

# plot single file size by time
fig.add_trace(go.Scatter(x=df['date'], y=df['size'     ], name="size"))

# plot cumulative number of files by time
fig.add_trace(go.Scatter(x=df['date'], y=df['num_files'], name="num_files", yaxis="y2"))

# plot cumulative sum by time
fig.add_trace(go.Scatter(x=df['date'], y=df['cumsum'   ], name="cumsum"   , yaxis="y3"))

# show all the plots
fig.update_layout(
    showlegend=False,
    xaxis =dict(domain=[0.0, 0.85]),
    yaxis =dict(title="size"     ,side="right"                                           ,visible=True,titlefont=dict(color="blue" ),tickfont=dict(color="blue" )),
    yaxis2=dict(title="num_files",anchor="free",overlaying="y",side="right",position=0.90,visible=True,titlefont=dict(color="red"  ),tickfont=dict(color="red"  )),
    yaxis3=dict(title="cumsum"   ,anchor="free",overlaying="y",side="right",position=0.95,visible=True,titlefont=dict(color="green"),tickfont=dict(color="green")),
)
fig.layout.dragmode='pan'
fig.show(config=dict({'scrollZoom': True,'displayModeBar': False}))

# Show Type (Near Field vs Far Field) and dataset size

In [123]:
import h5py
fields=GetFields()
fields

Unnamed: 0,date,scan,info,type
0,2023-11-08 13:56:49,1,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...,far-field
1,2023-11-08 13:56:50,1,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...,far-field
2,2023-11-08 13:58:38,2,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...,far-field
3,2023-11-08 13:58:38,2,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...,far-field
4,2023-11-08 14:02:20,3,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg-de...,far-field
...,...,...,...,...
4617,2023-11-09 00:24:23,11,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...,near-field
4618,2023-11-09 00:24:25,11,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...,near-field
4619,2023-11-09 00:24:27,11,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...,near-field
4620,2023-11-09 00:24:29,11,/nfs/chess/raw/2023-3/id3a/berman-3804-a/mg1y-...,near-field


# Plot the scan numbers with acquisition type (Far Field  vs Near Field)

In [124]:
fig = px.scatter(fields, x="date", y="scan", hover_data=['info'], symbol='type',color='type')
fig.layout.dragmode='pan'
fig.update_yaxes(type="linear")
fig.update_layout(hovermode='x unified', legend=dict(title= None), hoverlabel=dict(bgcolor='rgba(255,255,255,0.45)'))
fig.update_traces(marker=dict(size=9,line=dict(width=1)),selector=dict(mode='markers'))
fig.show(config=dict({'scrollZoom': True}))

# Extract Sensor Metadata: DICS and PAR FILES

In [125]:
import pandas as pd
import json
import os,sys
import glob
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go

# ////////////////////////////////////////////////////////////////
def MultipleLines(df,x,ys,title=""):
    fig = go.Figure()
    fig.layout.dragmode='pan'
    kwargs={}
    for I,y in enumerate(ys):
        fig.add_trace(go.Scatter(x=df[x], y=df[y], name=y, yaxis=f"y{I+1}"))
        kwargs[f"yaxis{I+1}"]=dict(
            title=y,
            side="right",
            overlaying="y" if I else None,
            anchor="free"  if I else None,
            autoshift=True  if I else None
            )
    fig.update_layout(showlegend=True,title=title, **kwargs)
    return fig

dics=GetDics()
pars=GetPars()

all_dics=GetAllDics()
all_pars=GetAllPars()

# show them
MultipleLines(all_dics,"date",["TEN","load_newtons","ome","ramsx"    ],title="dic.par").show(config=dict({'scrollZoom': True}))
MultipleLines(all_pars,"date",["initial_load_newtons","initial_screw"],title="*.par"  ).show(config=dict({'scrollZoom': True}))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Plot Strain Stress

In [126]:
for filename,it in dics:
    fig = px.line(it, x="TEN", y="load_newtons", title='ToDO', markers=True, height=500,text=range(it.shape[0]))
    fig.layout.dragmode='pan'
    fig.update_layout(title=filename)
    fig.update_traces(textposition='top center')
    fig.show(config=dict({'scrollZoom': True}))

# Plot Time histogram

In [127]:
import plotly.express as px
fig =px.histogram(df, x="date")
fig.layout.dragmode='pan'
fig.update_layout(bargap=0.2)
fig.show(config=dict({'scrollZoom': True}))

In [128]:
fig=px.pie(df, names='ext', title='Number of Files per Extension')
fig.layout.dragmode='pan'
fig.show(config=dict({'scrollZoom': True}))

# Plot total size per extension

99% of the big files are `HDF5` for Far Field

In [129]:
fig=px.bar(df.groupby(['ext'])['size'].sum())
fig.layout.dragmode='pan'
fig.show(config=dict({'scrollZoom': True}))