In [1]:
import logging
import os
import json
from pathlib import Path
import plotly
from mpcite.models import ConnectionModel
from mpcite.doi_builder import DoiBuilder
import pandas as pd
from typing import Union, List, Dict
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import maggma
from datetime import timedelta, date, datetime
from monty.json import MontyDecoder

  from tqdm.autonotebook import tqdm


In [2]:
# configuration stuff
config_file = Path(os.getcwd()) / "files" / "config.json"
assert config_file.exists(), "input config file does not exist"
bld: DoiBuilder = json.load(config_file.open("r"), cls=MontyDecoder)
bld

<mpcite.doi_builder.DoiBuilder at 0x7ff874a0d310>

In [3]:
doi_store = bld.doi_store
materials_store = bld.materials_store
doi_store.connect()
materials_store.connect()

In [4]:
def draw_pie_graph(labels:list, values:list, name:str):
    fig=go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5, name=name)])
    fig.update_layout(annotations=[dict(text=name, font_size=20, showarrow=False)])
    fig.show()

In [10]:
def make_doi_status_pie_chart_data(doi_store):
    values = doi_store.distinct("status")
    result = dict()
    for v in values:
        result[v] = doi_store.count(criteria={"status":v})
    return result
data = make_doi_status_pie_chart_data(doi_store=doi_store)
draw_pie_graph(list(data.keys()), list(data.values()), "Status")


In [6]:
def make_doi_valid_pie_chart_data(doi_store):
    values = doi_store.distinct("valid")
    result = dict()
    for v in values:
        result[v] = doi_store.count(criteria={"valid":v})
    return result
data = make_doi_valid_pie_chart_data(doi_store)
draw_pie_graph(labels = list(data.keys()), values=list(data.values()), name="Valid")

## Draw time dependent graphs

In [7]:
def find_dates_btw(start_dt, end_dt):
    def daterange(date1, date2):
        for n in range(int((date2 - date1).days)+1):
            yield date1 + timedelta(n)
    dates = []
    for dt in daterange(start_dt, end_dt):
        date_format = dt.date()
        dates.append(datetime(date_format.year, date_format.month, date_format.day))
    return dates

def find_earliest_date(store, field):
    return list(store.query(criteria={}, sort={field:maggma.core.store.Sort.Ascending}, limit=1))[0][field]

def find_latest_date(store, field):
    return list(store.query(criteria={}, sort={field:maggma.core.store.Sort.Descending}, limit=1))[0][field]



In [8]:
def make_time_series_data(field_name):
    dates = find_dates_btw(find_earliest_date(doi_store, field_name), find_latest_date(doi_store, field_name))
    # last_updated 
    result = dict()
    for i in range(len(dates)):
        if i == 0:
            result[dates[i]] = 0
        else:
            c = doi_store.count(criteria={field_name: {"$lte": dates[i]}})
            result[dates[i]] = c
    return result

In [11]:
last_updated_data = make_time_series_data("last_updated")
created_at_data = make_time_series_data("created_at")
last_validated_on_data = make_time_series_data("last_validated_on")
elsevier_created_on_data = make_time_series_data("elsevier_updated_on")
total = materials_store.count()
total_data = {i:total for i in doi_store.distinct("created_at")}

fig = go.Figure()
fig.add_trace(go.Scatter(x=list(last_updated_data.keys()), y=list(last_updated_data.values()),
                    mode='lines+markers',
                    name='last_updated'))

fig.add_trace(go.Scatter(x=list(created_at_data.keys()), y=list(created_at_data.values()),
                    mode='lines+markers',
                    name='created_at'))

fig.add_trace(go.Scatter(x=list(last_validated_on_data.keys()), y=list(last_validated_on_data.values()),
                    mode='lines+markers',
                    name='last_validated_on'))

fig.add_trace(go.Scatter(x=list(elsevier_created_on_data.keys()), y=list(elsevier_created_on_data.values()),
                    mode='lines+markers',
                    name='elsevier_created_on'))

fig.add_trace(go.Scatter(x=list(total_data.keys()), y=list(total_data.values()),
                    mode='lines+markers',
                    name='Total'))

fig.update_layout(
    title="MPCite Status",
    xaxis_title="Time",
    yaxis_title="# Submission",
    font=dict(
        family="Franklin Gothic",
        size=14,
        color="#0d0d0d"
    ),   
    yaxis_type="log"
)

fig.show()