In [45]:
# HEADER for HTML pages
from datetime import datetime
from IPython.display import display, Markdown,HTML
display(HTML("""
<section style="background-image: url('https://raw.github.com/nsdf-fabric/nsdf-web/master/assets/images/header.background.gif');background-size: 100%;background-position: center;">
<h1 style="font-family: helvetica;color: white;font-size: 3.25rem;text-align: center;"><br>gopalan stats</h1>
<h2 style="font-family: helvetica;color: white;font-size: 1.25rem;text-align: center;font-weight: normal;">Last Updated {todays_date} - by <a href='https://nationalsciencedatafabric.org/'> National Science Data Fabric </a><br><br><br><br></h2>
</section>
""".replace("{todays_date}",str(datetime.now()).split(".")[0])
))

In [None]:
from glob import glob
import os
import sys
from datetime import datetime, timezone
import pandas as pd
import plotly.express as px
pd.options.plotting.backend = "plotly"

# TO CHANGE AS NEEDED
DIR="/nfs/chess/id4b/2023-3/gopalan-3842-a"

files=[]
for filename in glob(f'{DIR}/**/*', recursive=True):
    if os.path.isfile(filename):
        stat = os.stat(filename)
        size=stat.st_size
        # ctime=datetime.fromtimestamp(stat.st_ctime)   # Time of most recent metadata change expressed in seconds.
        mtime=datetime.fromtimestamp(stat.st_mtime)     # Time of most recent content modification expressed in seconds.

        files.append((mtime,filename,size))


df = pd.DataFrame(files, columns =['date', 'path', 'size'])
df=df.sort_values(by=['date'])
# df.set_index("date",inplace=True)
df['num_files']=[I for I,it in enumerate(df.iterrows())]
df['cumsum']=df['size'].cumsum()
df['ext']=[os.path.splitext(it)[-1] for it in df['path']]
df['dirname']=[os.path.dirname(it) for it in df['path']]
#df

In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(x=df['date'], y=df['size']     , name="size"))
fig.add_trace(go.Scatter(x=df['date'], y=df['num_files'], name="num_files", yaxis="y2"))
fig.add_trace(go.Scatter(x=df['date'], y=df['cumsum']   , name="cumsum"   , yaxis="y3"))

fig.update_layout(
    showlegend=False,   
    xaxis =dict(domain=[0.0, 0.85]),
    yaxis =dict(title="size"     ,side="right"                                           ,visible=True,titlefont=dict(color="blue" ),tickfont=dict(color="blue" )),
    yaxis2=dict(title="num_files",anchor="free",overlaying="y",side="right",position=0.90,visible=True,titlefont=dict(color="red"  ),tickfont=dict(color="red"  )),
    yaxis3=dict(title="cumsum"   ,anchor="free",overlaying="y",side="right",position=0.95,visible=True,titlefont=dict(color="green"),tickfont=dict(color="green")),
)
fig.layout.dragmode='pan'
fig.show(config=dict({'scrollZoom': True,'displayModeBar': False}))

In [None]:
header=['date', 'temperature',"sample", "color", "basename","size"]
rows=[]
for index, row in df[df['ext']==".cbf"].iterrows():
    # /previx/
    # /nfs/chess/id4b/2023-3/gopalan-3842-a/aw6M/KNO_GSO/sample1/200/KNO_GSO_019/KNO_GSO_PIL10_019_01239.cbf 
    v=row['path'].split("/")
    temperature=int(v[-3])
    basename=os.path.basename(v[-1])
    sample="/".join(v[-6:-3])
    size=row['size']
    color="/".join(v[len(DIR.split("/")):-1])
    new_row=[row['date'], temperature, sample, color, basename, size]
    rows.append(new_row)

In [None]:
fields = pd.DataFrame(rows, columns=header).sort_values(by=['date'])
fig = px.line(fields, x="date", y="temperature", hover_data=['basename','size'], symbol='sample',color='color')

#for data in fig.data:
#    fig.add_scatter(x=[data.x[0]], y=[data.y[0]],mode='text',showlegend=False, text=[data.y[-1]], textposition='top right')

fig.layout.dragmode='pan'
fig.update_yaxes(type="linear")
fig.update_layout(showlegend=False)
fig.show(config=dict({'scrollZoom': True}))

# Time histogram

In [None]:
import plotly.express as px
fig =px.histogram(df, x="date")
fig.layout.dragmode='pan'
fig.update_layout(bargap=0.2)
fig.show(config=dict({'scrollZoom': True}))

In [None]:
fig=px.pie(df, names='ext', title='Number of Files per Extension') 
fig.layout.dragmode='pan'
fig.show(config=dict({'scrollZoom': True}))

# Total size per extension

In [None]:
fig=px.bar(df.groupby(['ext'])['size'].sum())
fig.layout.dragmode='pan'
fig.show(config=dict({'scrollZoom': True}))