In [None]:
from maggma.stores.advanced_stores import MongograntStore
from maggma.stores.advanced_stores import Sort
from typing import List
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import numpy as np
from datetime import timedelta, date, datetime
from monty.json import MontyDecoder
from datetime import timedelta
# configuration stuff
from sys import platform
import maggma
if platform == "linux" or platform == "linux2":
    import plotly.io as pio
    pio.orca.config.use_xvfb = True
import plotly.graph_objs as go
import plotly.offline as py
import plotly.express as px
import plotly.graph_objects as go
import plotly

In [None]:
renderer = "notebook" # change to pdf for live viewing

In [None]:
gdrive_mongo_store = MongograntStore(mongogrant_spec="rw:knowhere.lbl.gov/mp_core_mwu",
                                         collection_name="gdrive")
gdrive_mongo_store.connect()

tasks_mongo_store = MongograntStore(mongogrant_spec="ro:mongodb04.nersc.gov/mp_emmet_prod",
                                        collection_name="tasks")
tasks_mongo_store.connect()

In [None]:
import plotly.graph_objects as go

df = pd.DataFrame()
df["title"] = np.array(["Total in Gdrive", "Total Tasks"])
df["count"] = np.array([gdrive_mongo_store.count(), tasks_mongo_store.count()])
fig = px.pie(df, values='count', names='title', title="Tasks & GDrive")
fig.show(renderer=renderer)

print("WARNING: This Pie chart might not reflect the actual progress since there are tasks that belong to an deprecated material")

In [None]:
df = pd.DataFrame()
df["type"] = np.array(["Total in Gdrive","Total in NOMAD" ])
df["count"] = np.array([gdrive_mongo_store.count(criteria={"error": {"$eq": None}}), 
                        gdrive_mongo_store.count(criteria={"nomad_updated": {"$ne": None}})])
fig = px.bar(df,
             x="type",
             y="count", title="Num uploaded to Gdrive and NOMAD", color="type")
fig.show(renderer=renderer)





In [None]:
all_content_gdrive = gdrive_mongo_store.query(criteria={"error": None},properties={"file_size":1})
gdrive_size = 0
for c in all_content_gdrive:
    gdrive_size += c["file_size"]
print(f"GDrive: {gdrive_size} bytes = {gdrive_size*1e-6} mb = {gdrive_size*1e-9} gb")


all_content_nomad = gdrive_mongo_store.query(criteria={"$and": 
                                                       [{"error": None}, 
                                                        {"nomad_updated": {"$ne":None}}]
                                                      },
                                             properties={"file_size":1})
nomad_size = 0
for c in all_content_nomad:
    nomad_size += c["file_size"]
print(f"Nomad: {nomad_size} bytes = {nomad_size*1e-6} mb = {nomad_size*1e-9} gb")


df = pd.DataFrame()
df["title"] = np.array(["GDrive Upload GB","Nomad Upload GB" ])
df["bytes"] = np.array([gdrive_size*1e-9, nomad_size*1e-9])
fig = px.bar(df, y='bytes', x='title', color='title', title="GDrive & NOMAD by bytes")
fig.show(renderer=renderer)

In [None]:
df = pd.DataFrame()
df["title"] = np.array(["Success","Failed" ])
df["count"] = np.array([gdrive_mongo_store.count(criteria={"error": {"$eq": None}}), 
                        gdrive_mongo_store.count(criteria={"error": {"$ne": None}})])
fig = px.pie(df, values='count', names='title', title="GDrive Upload Status")
fig.show(renderer=renderer)



In [None]:
def find_dates_btw(start_dt, end_dt):
    """
    find the number of dates between start date and end date
    """
    def daterange(date1, date2):
        if date1 is None: date1 = date2
        if date2 is None: date2 = date1
        for n in range(int((date2 - date1).days)+1):
            yield date1 + timedelta(n)
    dates = []
    for dt in daterange(start_dt, end_dt+timedelta(days=1)):
        date_format = dt.date()
        dates.append(datetime(date_format.year, date_format.month, date_format.day))
    return dates

def find_earliest_date(store, field):
    """
    find the earliest record date
    """
    return list(store.query(criteria={"error": {"$eq": None}}, sort={field:maggma.core.store.Sort.Ascending}, limit=1))[0][field]

def find_latest_date(store, field):
    """
    find the latest_record date
    """
    return list(store.query(criteria={"error": {"$eq": None}}, sort={field:maggma.core.store.Sort.Descending}, limit=1))[0][field]

In [None]:
def make_time_series_data(field_name):
    """
    Find all time series data for that field, put them in buckets of dates.
    """
    dates = find_dates_btw(find_earliest_date(gdrive_mongo_store, field_name), 
                           find_latest_date(gdrive_mongo_store, field_name))
    # last_updated 
    result = dict()
    for i in range(len(dates)):
        if i == 0:
            result[dates[i]] = 0
        else:
            c = gdrive_mongo_store.count(criteria={field_name: {"$lte": dates[i]}})
            result[dates[i]] = c
    return result
def make_time_series_data_nomad(field_name="nomad_updated"):
    """
    Find all time series data for that field, put them in buckets of dates.
    """
    start = list(gdrive_mongo_store.query(criteria={field_name: {"$ne": None}}, 
                                            sort={field_name:maggma.core.store.Sort.Ascending}, limit=1))[0][field_name]
    
    end = list(gdrive_mongo_store.query(criteria={field_name: {"$ne": None}}, 
                                        sort={field_name:maggma.core.store.Sort.Descending}, limit=1))[0][field_name]
    dates = find_dates_btw(start, end)
    # last_updated 
    result = dict()
    for i in range(len(dates)):
        if i == 0:
            result[dates[i]] = 0
        else:
            c = gdrive_mongo_store.count(criteria={field_name: {"$lte": dates[i]}})
            result[dates[i]] = c
    return result

In [None]:
last_updated_data = make_time_series_data("last_updated")
nomad_updated_data = make_time_series_data_nomad()

In [None]:
Xs = set(last_updated_data.keys()).union(set(nomad_updated_data.keys()))
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(last_updated_data.keys()), y=list(last_updated_data.values()),
                    mode='lines+markers',
                    name='last_updated'))
fig.add_trace(go.Scatter(x=list(nomad_updated_data.keys()), y=list(nomad_updated_data.values()),
                    mode='lines+markers',
                    name='nomad_updated'))

# add features
fig.update_layout(
    title="GDrive Upload Status",
    xaxis_title="Time",
    yaxis_title="# Submission",
    font=dict(
        family="Franklin Gothic",
        size=14,
        color="#0d0d0d"
    ),   
    yaxis_type="log",
)
fig.show(renderer=renderer)