In [1]:
from models import *
from utility import *
import logging
import os
import yaml
from pathlib import Path
import plotly
from adapter import OstiMongoAdapter
import pandas as pd
from typing import Union, List, Dict
import plotly.express as px
import plotly.graph_objects as go

  from tqdm.autonotebook import tqdm


In [2]:
# configuration stuff
config_file = Path(os.path.abspath('')).parent / "files" / "config.yaml"
config = yaml.load(open(config_file.as_posix(), 'r'), Loader=yaml.SafeLoader)

# prepare
oma = OstiMongoAdapter.from_config(config)
elink = ConnectionModel.parse_obj(config["osti"]["elink"])
explorer = ConnectionModel.parse_obj(config["osti"]["explorer"])
osti = OSTIModel(elink=elink, explorer=explorer)
oma.materials_store.connect()
oma.doi_store.connect()

In [3]:
def mongo_to_panda(mongo_store, 
                   criteria:dict = {}, 
                   properties:Union[Dict, List, None]=None,
                   skip:int=0, limit:int=0, no_id=True):
    cursor = list(mongo_store.query(criteria=criteria, properties=properties, skip=skip, limit=limit))
    df = pd.DataFrame(list(cursor))
    if no_id:
        del df['_id']
    return df

In [4]:
def make_doi_status():
    df = mongo_to_panda(oma.doi_store)
    labels=df.status.unique()
    values = df.status.value_counts()
    fig=go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5, name="status")])
    fig.update_layout(annotations=[dict(text="Status", font_size=20, showarrow=False)])
    return fig
def make_doi_valid():
    df = mongo_to_panda(oma.doi_store)
    labels=df.valid.unique()
    values = df.valid.value_counts()
    fig=go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5, name="valid")])
    fig.update_layout(annotations=[dict(text="Valid", font_size=20, showarrow=False)])
    return fig
def make_doi_last_updated():
    df = mongo_to_panda(oma.doi_store)
    df.last_updated = [f"{t.year}/{t.month}/{t.day}" for t in df.last_updated] # only get the days
    labels=df.last_updated.unique()
    values = df.last_updated.value_counts()
    fig=go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5, name="last_updated")])
    fig.update_layout(annotations=[dict(text="Last Updated", font_size=20, showarrow=False)])
    return fig
def make_doi_bibtex():
    df = mongo_to_panda(oma.doi_store)
    bibtex_bool = ["Does not have Bibtex" if i == None else "Has Bibtex" for i in df.bibtex]
    df["bibtex_bool"] = bibtex_bool
    labels=df.bibtex_bool.unique()
    values = df.bibtex_bool.value_counts()
    fig=go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5, name="bibtex")])
    fig.update_layout(annotations=[dict(text="bibtex", font_size=20, showarrow=False)])
    return fig

In [5]:
def make_doi_citation_created():
    labels = ["DOI Citation Not Created", "DOI Citation Pending", "DOI Citation Completed"]
    pending_len = len([i for i in oma.doi_store.query(criteria={"status":"PENDING"})])
    values = [oma.materials_store.count()-oma.doi_store.count(),
              pending_len,
              oma.doi_store.count() - pending_len
             ]
    fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5)])
    fig.update_layout(annotations=[dict(text="DOI Citation", font_size=20, showarrow=False)])
    return fig

In [19]:
fig = make_doi_citation_created()
fig.show()

In [6]:
cursor = list(oma.doi_store.query())
df = pd.DataFrame(list(cursor))
df

Unnamed: 0,_id,material_id,doi,bibtex,status,valid,last_updated,created_at,last_validated_on
0,5ed7ed19f3951ad25c70899d,mp-1173151,10.17188/1479772,"@article{osti_1479772,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.221
1,5ed7ed19f3951ad25c70899e,mp-1194956,10.17188/1479783,"@article{osti_1479783,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.222
2,5ed7ed19f3951ad25c70899f,mp-1204405,10.17188/1479784,"@article{osti_1479784,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.222
3,5ed7ed19f3951ad25c7089a0,mp-556728,10.17188/1479785,"@article{osti_1479785,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.222
4,5ed7ed19f3951ad25c7089a1,mp-542374,10.17188/1479786,"@article{osti_1479786,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.222
5,5ed7ed19f3951ad25c7089ab,mp-979270,10.17188/1479802,"@article{osti_1479802,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.222
6,5ed7ed19f3951ad25c7089ac,mp-1197073,10.17188/1479803,"@article{osti_1479803,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.222
7,5ed7ed19f3951ad25c7089ad,mp-1147638,10.17188/1479804,"@article{osti_1479804,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.222
8,5ed7ed19f3951ad25c7089ae,mp-1176596,10.17188/1479805,"@article{osti_1479805,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.222
9,5ed7ed19f3951ad25c7089af,mp-30656,10.17188/1479826,"@article{osti_1479826,\n title = {Mate...",COMPLETED,True,2020-06-03 11:34:32.838,2020-06-03 11:34:32.838,2020-06-04 17:23:36.223
