Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create new dedicated data-library website #168

Open
tkittel opened this issue Mar 8, 2024 · 2 comments
Open

Create new dedicated data-library website #168

tkittel opened this issue Mar 8, 2024 · 2 comments
Assignees

Comments

@tkittel
Copy link
Member

tkittel commented Mar 8, 2024

As discussed with @MilanKlausz the data library page in the wiki needs a serious overhaul and upgrade (cf. also #123).

We are considering several ideas:

  • Generate the DB, plot files, etc. in github CI
  • Make the user experience more interactive, most likely with a wonderful mix of javascript and various technologies (to be investigated by Milan).
@tkittel
Copy link
Member Author

tkittel commented Mar 8, 2024

Here is for reference the script we discussed today:

#!/usr/bin/env python3

import NCrystal as NC
import pathlib

def createDBEntry( filelistentry, *, plotfolder = None ):
    e = Entry( filelistentry,
               plotfolder = plotfolder )
    d = {}
    d['key'] = e.key
    d['shortkey'] = e.shortkey
    d['safekey'] = e.safekey
    d['ncmat_header'] = e.ncmat_header
    d['dump'] = e.dump
    d['ncmat_contents'] = e.textData.rawData
    d['plot_filename_xsect'] = e.plot_filename_xsect
    d['extra_keywords'] = ''
    return d

class Entry:

    @property
    def loaded_mat( self ): return self.__mat
    @property
    def key( self ): return self.__key
    @property
    def shortkey( self ): return self.__shortkey
    @property
    def safekey( self ): return self.__safekey
    @property
    def filelistentry( self ): return self.__fe
    @property
    def ncmat_header( self ): return self.__ncmathdr
    @property
    def textData( self ): return self.__td
    @property
    def dump( self ): return self.__dump
    @property
    def plot_filename_xsect( self ): return self.__plot_xsect_file

    def __init__( self, filelistentry, *, plotfolder = None ):
        key = filelistentry.fullKey
        shortkey = ( filelistentry.fullKey
                     if not filelistentry.fullKey.startswith('stdlib::')
                     else filelistentry.fullKey[len('stdlib::'):] )
        #Fixme: we need to check against clashes. Perhaps also better use some
        #generic urlencode function or some such?
        self.__safekey = key.replace('/','_').replace(':','_').replace('.','d')
        self.__key = key
        self.__shortkey = shortkey
        if plotfolder is None:
            plotfolder = pathlib.Path('.').absolute()

        self.__mat = NC.load(key)
        self.__fe = filelistentry
        self.__td = NC.createTextData(key)
        self.__ncmathdr = [ e for e in self.__td
                           if (e and (e[0]=='@' or  e.startswith('#') ) ) ]
        i = [ i for i,e in enumerate(self.__ncmathdr) if e[0]=='@' ][0]
        self.__ncmathdr = [ e[1:] for e in self.__ncmathdr[0:i] ]
        while all( (not e or e.startswith(' ')) for e in self.__ncmathdr ):
            self.__ncmathdr = [ e[1:] for e in self.__ncmathdr ]
        while not self.__ncmathdr[0].strip():
            self.__ncmathdr = self.__ncmathdr[1:]
        while not self.__ncmathdr[-1].strip():
            self.__ncmathdr = self.__ncmathdr[:-1]

        import subprocess
        p = subprocess.run(['nctool','-d',key],
                           capture_output=True,check=True)
        self.__dump = p.stdout.decode()

        #Generate plots:
        self.__mat.plot(do_show=False)
        import matplotlib.pyplot as plt

        self.__plot_xsect_file = '%s.png'%self.__safekey
        plt.savefig(plotfolder.joinpath(self.__plot_xsect_file))

def create_DB_contents( plotfolder ):
    plotfolder = pathlib.Path(plotfolder)
    if plotfolder.exists():
        raise RuntimeError(f'Plot folder already exists: {plotfolder}')
    plotfolder.mkdir(parents=True)
    db = []
    for fe in NC.browseFiles():
        if not ( fe.name.startswith('Ac')
                 or 'gasmix::BF3' in fe.fullKey ):
            continue
        print(f"Processing {fe.fullKey}")
        db.append( createDBEntry( fe, plotfolder = plotfolder ) )
    return db

def create_DB( outfolder ):
    outfolder = pathlib.Path(outfolder)
    if outfolder.exists():
        raise RuntimeError(f'Folder already exists: {outfolder}')
    outfolder.mkdir(parents=True)
    jsonfile = outfolder / 'db.json'
    plotfolder = outfolder / 'plots'
    db = create_DB_contents(plotfolder)
    import pprint
    pprint.pprint(db)
    import json
    with pathlib.Path(jsonfile).open('wt') as fh:
        json.dump(db, fh )
    #print(f"Wrote {jsonfile}")
    return outfolder

if __name__=='__main__':
    create_DB('./autogen_db')

@MilanKlausz MilanKlausz self-assigned this Mar 12, 2024
@tkittel
Copy link
Member Author

tkittel commented Apr 5, 2024

For reference, here are instructions from @marquezj for the new EXFOR interface which we will eventually need when adding exfor data points.

https://nds.iaea.org/exfor/x4guide/API/

import numpy as np
import NCrystal as NC
import pandas as pd
from urllib.request import urlopen, Request
import io
import matplotlib.pyplot as plt

x4ids = [11762002, 11355002]

for x4id in x4ids:
    req = Request(
        url=f'https://nds.iaea.org/exfor/x4get?DatasetID={x4id}&op=csv', 
        data=None, 
        headers={
            'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
        }
    )
    with urlopen(req) as f:
        df = pd.read_csv(io.StringIO(f.read().decode('utf-8')))
    x4_E, x4_xs = 1e6*df['EN (MEV) 1.1'].values, df['DATA (B) 0.1'].values
    plt.loglog(x4_E, x4_xs, '.', label=f'X4: {x4id}')

pc_ni = NC.createScatter('Ni_sg225.ncmat')
ab_ni = NC.createAbsorption('Ni_sg225.ncmat')
E = np.geomspace(1e-4,5,1000)

a = plt.loglog(E, pc_ni.crossSectionNonOriented(E)+ab_ni.crossSectionNonOriented(E), label='NCrystal')
a = plt.legend()
a = plt.xlabel('Energy [eV]')
a = plt.ylabel('Total cross section per atom [b]')

"the column selection in pandas might require some massaging... I am not sure they use the same units always"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants