An example of a reader of files from the database

<a target="_blank" href="https://colab.research.google.com/github/qgtcollab/QGT-database/blob/main/Reader.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [16]:
import sys,os
import numpy as np
import pandas as pd
import torch
torch.set_default_dtype(torch.float64)



conf = {}
options = {}


def load_config(fname):

    L=open(fname).readlines()
    D = {}
    for l in L:
        try:
            exec(l,D)
        except:
            print('ERR at the input.py. Look for %s'%l)
            sys.exit()

    conf.update(D['conf'])

def isnumeric(value):
  try:
    int(value)
    return True
  except:
    return False


class _READER:

    def apply_cuts(self,tab):
        if  'filters' in conf['datasets'][self.reaction]:
            for f in conf['datasets'][self.reaction]['filters']:
                try: tab=tab.query(f)
                except: pass
        return tab

    def load_data_sets(self,reaction,verb=True):
        self.reaction=reaction
        if reaction not in conf['datasets']: return None
        CSV=conf['datasets'][reaction]['csv']
        TAB={}
        for k in CSV:
            if verb: print('loading %s data sets %s'%(reaction,k))
            fname=conf['datasets'][reaction]['csv'][k]
            if  fname.startswith('./'):
                tab=pd.read_csv(fname)
            else:
                tab=pd.read_csv('%s/%s?raw=true'%(os.environ['FITPACK'],fname))
            tab=self.modify_table(tab)
            npts=tab.index.size
            if npts==0: continue
            TAB[k]=tab.to_dict(orient='list')
            for kk in TAB[k]:
                if  isnumeric(TAB[k][kk][0]):
                    TAB[k][kk]=torch.tensor(TAB[k][kk])

        return TAB

In [21]:
class READER(_READER):

    def __init__(self):
        self.aux=[]

    def get_xB(self,tab):
        cols=tab.columns.values
        if any([c=='xB' for c in cols])==False:
            if any([c=='W' for c in cols]):
                tab['xB']=pd.Series(tab['Q2']/(tab['W']**2-self.aux.M_proton**2+tab['Q2']),index=tab.index)
            else:
                print('cannot retrieve xB values')
                sys.exit()
        return tab

    def get_units(self,tab):
        units=np.ones(len(tab.index))
        try:
            if 'nb' in tab.units.values[0]:
                units*=0.389379e6
        except:
            pass
        tab['Units']=pd.Series(units,index=tab.index)
        return tab

    def get_beam_charge(self,tab):
        cols=tab.columns.values
        beam=tab['lepton'].values[0]
        if any([c=='beamcharge' for c in cols])==False:
            if beam=='e+' or beam=='mu+':
                tab['beamcharge']='+'
            elif beam=='e-' or beam=='mu-':
                tab['beamcharge']='-'
            elif beam=='e+/e-' or beam=='mu+/mu-':
                tab['beamcharge']='+/-'
            else:
                print('cannot retrieve beam charge')
                sys.exit()
        return tab

    def get_idx(self,tab):
        tab['idx']=pd.Series(tab.index,index=tab.index)
        return tab

    def modify_table(self,tab):

        tab=self.get_xB(tab)
        tab['xi']=pd.Series(tab['xB']/(2-tab['xB']),index=tab.index)
        tab=self.get_units(tab)
        tab=self.get_beam_charge(tab)
        tab=self.apply_cuts(tab)
        tab=self.get_idx(tab)

        return tab

In [23]:
os.environ['FITPACK'] = 'https://github.com/qgtcollab/QGT-database/blob/main/Tables/'

conf['datasets']={}
conf['datasets']['dvcs']={}
conf['datasets']['dvcs']['filters']=[]
conf['datasets']['dvcs']['filters'].append('t<0.25*Q2')
conf['datasets']['dvcs']['csv']={}
conf['datasets']['dvcs']['csv']['Data1']='Ref_18_Table_1.csv'
conf['datasets']['dvcs']['csv']['Data2']='Ref_18_Table_2.csv'

tab=READER().load_data_sets('dvcs')

print(tab)

loading dvcs data sets Data1
loading dvcs data sets Data2
{'Data1': {'Unnamed: 0': tensor([   0,    1,    2,  ..., 1930, 1931, 1932]), 'xB': tensor([0.1260, 0.1260, 0.1260,  ..., 0.4750, 0.4750, 0.4750]), 'Q2': tensor([1.1100, 1.1100, 1.1100,  ..., 3.7700, 3.7700, 3.7700]), 't': tensor([-0.1100, -0.1100, -0.1100,  ..., -0.4500, -0.4500, -0.4500]), 'phi': tensor([ 68.3300,  82.3500,  97.1300,  ..., 322.7700, 337.6800, 352.5500]), 'deg/rad': ['deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'deg', 'd

In [27]:
tab['Data1'].keys()

dict_keys(['Unnamed: 0', 'xB', 'Q2', 't', 'phi', 'deg/rad', 'value', 'stat_u', 'syst_u', 'col', 'obs', 'units', 'hadron', 'lepton', 'E_lepton', 'E_hadron', 'xi', 'Units', 'beamcharge', 'idx'])