# Read Converted Hologram dataFormat

- author Sylvie Dagoret-Campagne
- creation date 2024-09-23
- last update : 2024-09-27 : add csv
- last update : 2024-09-30 : v4 extended version
- affiliation : IJCLab
- Kernel @usdf **w_2024_16**
- Office emac : mamba_py311
- Home emac : base (conda)
- laptop : conda_py310


**Goal** :Notebook to read back and check the format.

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
import os

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'


# new color correction model
import pickle


from astropy.table import Table
from astropy.io import fits

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
import ipywidgets as widgets
%matplotlib widget

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

### Load Holo fit results

In [None]:
version_results = "v4"

In [None]:
atmfilenamesdict = {"v1" : "data/spectro/auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_testWithMaskedEdges_newBoundaries_newPolysRescaled_newFitBounds_adjustA1_lockedOrder2_removeThroughputTails_2.npy",
                    "v2" : "auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_PeekFinder.npy",
                    "v3" : "u_dagoret_auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_PeekFinder_20240924T161119Z.npy",
                    "v4" : "u_dagoret_auxtel_atmosphere_202301_v3.1.0_doSensorFlat_rebin2_lockedOrder2_FixA1_FixA2_FitAngstrom_WithGaia_freePressure_newThroughput6_BG40Scaled1.09_AtmoFitPressureA2_SpecErr_PeekFinder_20240924T161119Z_spectrfullextend.npy"}

In [None]:
atmfilename = atmfilenamesdict[version_results]

## Convert into files

In [None]:
flag_HDF5 = True
flag_PARQUET = True
flag_FITS = True
flag_SQL = True
flag_CSV = True

In [None]:
input_fn_root = re.findall("(.*)[.]npy$",atmfilename)
if len(input_fn_root)>0:
    input_fn_root = input_fn_root[0]
else:
    print("error in extracting root for filename {atmfilename}, rootfilename = ",input_fn_root)   

In [None]:
if flag_CSV:
    input_fn = f"{input_fn_root}.csv"
    if os.path.exists(input_fn):
        try:
          
            df_spec = pd.read_csv(input_fn)   
            print(f"============= Read csv file {input_fn} ============")
        except Exception as inst:
            print(type(inst))    # the exception type
            print(inst.args)     # arguments stored in .args
            print(inst)   
            df_spec = None
        finally:
            if df_spec is not None:
                print(df_spec.head(n=1))

In [None]:
from sqlalchemy import create_engine
engine = create_engine('sqlite://', echo=False)

if flag_SQL:
    input_fn = f"{input_fn_root}.sql"
    if os.path.exists(input_fn):
        try:
            with engine.begin() as connection:
                df_spec.to_sql(input_fn,con=connection,if_exists='replace')   
                print(f"============= Read sql file {input_fn} ============")
        except Exception as inst:
            print(type(inst))    # the exception type
            print(inst.args)     # arguments stored in .args
            print(inst)   
            df_spec = None
        finally:
            if df_spec is not None:
                print(df_spec.head(n=1))

In [None]:
if flag_HDF5:
    input_fn = f"{input_fn_root}.hdf5"
    if os.path.exists(input_fn):
        try:
            df_spec= pd.read_hdf(input_fn,key='data', data_columns=True)   
            print(f"============= Read hdf5 file {input_fn} ============")
        except Exception as inst:
            print(type(inst))    # the exception type
            print(inst.args)     # arguments stored in .args
            print(inst)  
            df_spec = None
        finally:
            if df_spec is not None:
                print(df_spec.head(n=1))

In [None]:
# not working with pyarraow then try fastparquet
#! pip install fastparquet

#import pyarrow.dataset as ds
#parquet_format = ds.ParquetFileFormat()
#file_options = parquet_format.make_write_options(coerce_timestamps='us', allow_truncated_timestamps=True)


if flag_PARQUET:
    input_fn = f"{input_fn_root}.parquet.gzip"
    #output_fn = f"{output_fn_root}.parquet"
    if os.path.exists(input_fn):
        try:
            df_spec = pd.read_parquet(input_fn,engine='fastparquet')
            print(f"============= Read parquet file {input_fn} ============")
        except Exception as inst:
            print(type(inst))    # the exception type
            print(inst.args)     # arguments stored in .args
            print(inst)   
            df_spec = None
        finally:
            if df_spec is not None:
                print(df_spec.head(n=1))
 

In [None]:
if flag_FITS:
    input_fn = f"{input_fn_root}.fits"   
    if os.path.exists(input_fn):
        print(fits.info(input_fn))
        with fits.open(input_fn) as hdulist:
            hdu = TableHDU(data=hdulist[1].data)
            
