In [None]:
import gzip
import shutil
from pathlib import Path
import tempfile
from glob import glob
import os
import logging
import pandas as pd
import numpy as np
from datetime import datetime
from utils import run_manager
from utils import file_handler


In [None]:
# save info in a dataframe
from tqdm import tqdm
color_list = ['BLU', 'NERO', 'ROSSO']

rows=[]
for color in color_list:
    #List the files to parse
    path = f'/group/Muography/abiolchi/test/RAW_GZ/{color}/'
    file_list = glob(f"{path}/SLOWCONTROL*.gz")
    print(f"Number of files to decompress and parse:{len(file_list)}")

    #Loop over the files
    for file in tqdm(file_list):

        #print(f"Processing file: {file}")
        decompressed_file, err = file_handler.decompress(file)
        if err is None:
            #print(f"Decompressed file: {decompressed_file}")
            slowcontrol_data = file_handler.parse_slow_control(decompressed_file, target_run=12345)
            timestamp = slowcontrol_data[0]['timestamp'] 
            run = slowcontrol_data[0]['run']+1 
            day = slowcontrol_data[0]['day'] 
            hour = slowcontrol_data[0]['hour']
            temperature = slowcontrol_data[0]['temperature'] 
            trigger_rate = slowcontrol_data[0]['tr'] 
            accidental_rate = slowcontrol_data[0]['ar']
            orientation = run_manager.get_orientation(run, color)
            rows.append({
                "hodoscope": color,
                "run": run,
                "timestamp": timestamp,
                "day": day,
                "hour": hour,
                "temperature": temperature,
                "trigger_rate": trigger_rate,
                "accidental_rate": accidental_rate,
                "orientation": orientation,
            })
        else:
            print(f"Error decompressing file: {err}")

df = pd.DataFrame(rows)
df.to_csv("run_index.csv", index=True)
df.to_pickle("data_scan.pkl")
print(df)





Number of files to decompress and parse:10


100%|██████████| 10/10 [00:00<00:00, 1812.73it/s]


Number of files to decompress and parse:24


100%|██████████| 24/24 [00:00<00:00, 2104.78it/s]


Number of files to decompress and parse:11


100%|██████████| 11/11 [00:00<00:00, 1321.00it/s]

   hodoscope    run      timestamp         day   hour  temperature  \
0        BLU  10089  1663863806075  2022-09-22  18:23         22.7   
1        BLU  10081  1663826992630  2022-09-22  08:09         17.6   
2        BLU  10080  1663822958694  2022-09-22  07:02         17.8   
3        BLU  10087  1663853236993  2022-09-22  15:27         23.2   
4        BLU  10084  1663839121954  2022-09-22  11:32         20.3   
5        BLU  10088  1663858553653  2022-09-22  16:55         23.3   
6        BLU  10083  1663835067114  2022-09-22  10:24         18.8   
7        BLU  10085  1663842394942  2022-09-22  12:26         22.2   
8        BLU  10086  1663847863467  2022-09-22  13:57         22.7   
9        BLU  10082  1663831053489  2022-09-22  09:17         17.7   
10      NERO   2507  1572143168320  2019-10-27  03:26         16.0   
11      NERO  10089  1630138733053  2021-08-28  10:18         26.2   
12      NERO   2501  1572121917353  2019-10-26  22:31         17.4   
13      NERO  11429 




In [8]:
# inquiry

def select_by_date(df, start_day, end_day) -> pd.DataFrame:
    # Convert input to datetime.date
    start_day = pd.to_datetime(start_day).date()
    end_day   = pd.to_datetime(end_day).date()

    df = df.copy()
    df["day"] = pd.to_datetime(df["day"]).dt.date  # force conversion always

    print("Start:", start_day, "End:", end_day)


    mask = (df["day"] >= start_day) & (df["day"] <= end_day)
    print("Rows selected:", mask.sum(), "out of", len(df))

    return df[(df["day"] >= start_day) & (df["day"] <= end_day)]


result = select_by_date(df, "2020-10-20", "2022-10-30")
print(result)

df.isna().any() 
df.isna().sum()        # count per column





Start: 2020-10-20 End: 2022-10-30
Rows selected: 32 out of 45
   hodoscope    run      timestamp         day   hour  temperature  \
0        BLU  10089  1663863806075  2022-09-22  18:23         22.7   
1        BLU  10081  1663826992630  2022-09-22  08:09         17.6   
2        BLU  10080  1663822958694  2022-09-22  07:02         17.8   
3        BLU  10087  1663853236993  2022-09-22  15:27         23.2   
4        BLU  10084  1663839121954  2022-09-22  11:32         20.3   
5        BLU  10088  1663858553653  2022-09-22  16:55         23.3   
6        BLU  10083  1663835067114  2022-09-22  10:24         18.8   
7        BLU  10085  1663842394942  2022-09-22  12:26         22.2   
8        BLU  10086  1663847863467  2022-09-22  13:57         22.7   
9        BLU  10082  1663831053489  2022-09-22  09:17         17.7   
11      NERO  10089  1630138733053  2021-08-28  10:18         26.2   
13      NERO  11429  1656384927187  2022-06-28  04:55         29.2   
14      NERO  10081  1630104

hodoscope          0
run                0
timestamp          0
day                0
hour               0
temperature        0
trigger_rate       1
accidental_rate    1
orientation        0
dtype: int64