# Prueba de representación

## Paquetes

In [11]:
import numpy as np
import re
import pandas as pd
from importlib import reload
import os
import sys
import inspect

parentdir = r'F:\Clase\Universidad\04 - Cuarto\TFG\DQM-DC NMF'

sys.path.append(parentdir)


import utils.df_utils as df_utils
import utils.plot_utils as plot_utils

pd.set_option('colheader_justify', 'center')

import functools

import matplotlib.pyplot as plt
# %matplotlib qt
plt.rcParams['figure.dpi'] = 300
plt.rcParams["figure.figsize"] = (3,2)
plt.rcParams.update({'font.size': 6})

import datetime
import logging
timestamp = datetime.datetime.utcnow().strftime('%Y%m%d')
filename=f'{parentdir}/tmp/read_data_{timestamp}.log'
formatter = logging.Formatter('[%(asctime)s] %(name)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s')

file_handler = logging.FileHandler(filename=filename, mode='a+')
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(formatter)

stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(logging.INFO)

# The handlers have to be at a root level since they are the final output
logging.basicConfig(
    level=logging.DEBUG, 
    format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
    handlers=[
        file_handler,
        stream_handler
    ]
)

logging.info(f'\n\n\nRun at {datetime.datetime.utcnow().strftime("%H-%M-%S")}')

[{3291413173.py:52} INFO - 


Run at 10-53-02


## Código

### Lectura del dataset

Creamos una tabla de archivos.

In [12]:
names = np.array([f.split('.')[0] for f in os.listdir(f'{parentdir}/data/csv') if os.path.isfile(os.path.join(f'{parentdir}/data/csv', f))])

cols =['chi2','eta','phi','pt']
inds = ['A','B','C','D']

names_df  =pd.DataFrame(names.reshape(4,4).T, index=inds,columns=cols)
df_utils.df_pprint(names_df,head=-1)

Unnamed: 0,chi2,eta,phi,pt
A,GlbMuon_Glb_chi2OverDf_MuonCert_Labeled_UL2018A_Reduced,GlbMuon_Glb_eta_MuonCert_Labeled_UL2018A_Reduced,GlbMuon_Glb_phi_MuonCert_Labeled_UL2018A_Reduced,GlbMuon_Glb_pt_MuonCert_Labeled_UL2018A_Reduced
B,GlbMuon_Glb_chi2OverDf_MuonCert_Labeled_UL2018B_Reduced,GlbMuon_Glb_eta_MuonCert_Labeled_UL2018B_Reduced,GlbMuon_Glb_phi_MuonCert_Labeled_UL2018B_Reduced,GlbMuon_Glb_pt_MuonCert_Labeled_UL2018B_Reduced
C,GlbMuon_Glb_chi2OverDf_MuonCert_Labeled_UL2018C_Reduced,GlbMuon_Glb_eta_MuonCert_Labeled_UL2018C_Reduced,GlbMuon_Glb_phi_MuonCert_Labeled_UL2018C_Reduced,GlbMuon_Glb_pt_MuonCert_Labeled_UL2018C_Reduced


Escogemos un observable en un periodo.

In [13]:
observable = 'eta'
file = pd.read_csv(f'{parentdir}/data/csv/'+names_df.at['A',observable]+'.csv')

file['histo'] = file['histo'].apply(df_utils.str2arr)



df_utils.df_pprint(file)

Unnamed: 0,fromrun,fromlumi,labels,hname,histo,entries,Xbins,Xmin,Xmax
0,315489,707,1,GlbMuon_Glb_eta,[ 0. 0. 0. 0. 2. 0. 3. 2. 3. 10. 33. 35. 44. 36. 47. 43. 35. 45.  47. 38. 38. 39. 34. 40. 35. 39. 37. 38. 47. 36. 41. 26. 38. 33. 34. 32.  26. 29. 31. 34. 44. 39. 42. 38. 29. 23. 29. 29. 31. 31. 41. 29. 29. 27.  28. 30. 45. 40. 38. 32. 37. 35. 30. 33. 35. 37. 32. 36. 39. 28. 32. 33.  42. 58. 38. 47. 28. 57. 34. 40. 47. 48. 56. 47. 47. 54. 37. 34. 49. 44.  12. 10. 3. 1. 0. 2. 0. 0. 0. 0.],3046.0,100,-3.0,3.0
1,316060,547,1,GlbMuon_Glb_eta,[ 0. 0. 0. 1. 1. 2. 3. 7. 6. 18. 62. 66. 56. 55. 64. 55. 59. 46.  65. 45. 64. 50. 66. 53. 47. 66. 59. 60. 51. 52. 46. 49. 44. 59. 49. 56.  53. 48. 65. 52. 62. 54. 48. 48. 61. 42. 43. 57. 56. 51. 59. 46. 55. 48.  38. 39. 53. 59. 45. 41. 40. 52. 55. 44. 47. 48. 54. 49. 54. 63. 40. 60.  63. 62. 53. 62. 59. 57. 60. 46. 48. 74. 67. 78. 76. 56. 56. 70. 48. 40.  18. 9. 6. 4. 0. 0. 0. 0. 0. 0.],4423.0,100,-3.0,3.0
2,316060,548,1,GlbMuon_Glb_eta,[ 0. 0. 0. 1. 1. 2. 4. 2. 6. 22. 44. 63. 66. 48. 69. 67. 42. 50.  39. 59. 57. 52. 66. 54. 60. 50. 54. 61. 61. 64. 55. 51. 56. 55. 51. 34.  61. 52. 55. 46. 64. 52. 48. 60. 55. 47. 40. 54. 80. 60. 53. 73. 52. 49.  50. 54. 62. 62. 55. 48. 67. 58. 65. 52. 59. 55. 47. 52. 47. 54. 57. 54.  52. 54. 64. 57. 65. 68. 52. 67. 50. 76. 73. 73. 63. 67. 55. 75. 62. 50.  22. 14. 6. 6. 4. 2. 0. 0. 0. 0.],4642.0,100,-3.0,3.0


Guardamos el histograma en una carpeta.

In [20]:
reload(plot_utils)
_= [plot_utils.plot_df2hist(file,row,path=f'{parentdir}/graphs/test_all_{observable}') for row in range(20)]


In [15]:
print(np.histogram(file['labels'],bins=(0,0.5,1)))

(array([  643, 51261], dtype=int64), array([0. , 0.5, 1. ]))


