In [1]:
# Default imports and basic settings
import os
import sys
import pathlib

for i in ['CERNBOX_HOMEDIR', 'HOME']:
    if i in os.environ.keys():
        datadir = pathlib.Path(os.environ[i]) / "data"
        break
else:
    datadir = 'data/'
print(f"Using directory '{datadir}' for data files")   

sys.path.append(f"{os.environ['PWD']}/O2 Playground")
print(f"Activating TRD SWAN stuff from {os.environ['PWD']}/O2 Playground")
import trdswanutils as trd

Using directory '/eos/user/t/tdietel/data' for data files
Activating TRD SWAN stuff from /home/tdietel/O2 Playground
Username: tdietel
Role: tdietel (other roles: trddrop)
Full name: Thomas Dietel
Email: Tom.Dietel@cern.ch
Certificate DN: OU=tdietel, CN=tdietel, CN=Users, O=AliEn2, C=ch (expiring on 2022-05-29T12:25:40+02:00[Europe/Zurich], which is in 30d 19:28)
Connected from: 188.184.38.69
Home directory: /alice/cern.ch/user/t/tdietel/


In [2]:
# run = 504419
# # run = 504428 # suggested by Archita on MM, 20 April 2022
# # filepattern = f"/alice/data/2021/OCT/{run}/raw"

# outdir = datadir / f"pilot/{run}"

# from pathlib import Path
# Path(outdir).mkdir(parents=True, exist_ok=True)
# %cd $outdir

Runs - Pilot Beam
==============

Anastasia wrote on 16 Feb 2022 in an email:
> some time ago we discussed which run is the best and Ole recommended 505673 (2h) or 505658 (2h), magnets +/+ (for more info you can see [Taku’s table](https://docs.google.com/spreadsheets/d/1Nea1mitHrerJJQkL1TVOypeTxvLRTbPdYlj8r0K2c5g/edit#gid=0))


In [3]:
# Read Taku's table

import pandas as pd
import numpy as np
taku_table = pd.read_csv("https://docs.google.com/spreadsheets/d/"
            "1Nea1mitHrerJJQkL1TVOypeTxvLRTbPdYlj8r0K2c5g"
            "/gviz/tq?tqx=out:csv")

# display(pilot_runs.columns)

taku_table = taku_table.loc[pd.notna(taku_table['Run '])]

all_pilot_runs = pd.DataFrame( {
    'run': taku_table['Run '].astype(int),
    'year': 2021,
    'period': 'OCT',
    'start': taku_table['Start'],
    'end': taku_table['End'],
    'duration': taku_table['End-Start'],
    'TRD': taku_table['TRD'] == 'O'
})

all_pilot_runs['ctf_pattern'] = all_pilot_runs['run'].apply(
    lambda run: f"/alice/data/2021/OCT/{run}/raw/0???/o2_ctf_*.root")
all_pilot_runs['rawtf_pattern'] = all_pilot_runs['run'].apply(
    lambda run: f"/alice/data/2021/OCT/{run}/raw/0???/o2_rawtf_*.tf")


all_pilot_runs

# Only use pilot runs recommended by Ole
pilot_runs = all_pilot_runs.loc[[21,23]]
pilot_runs


Unnamed: 0,run,year,period,start,end,duration,TRD,ctf_pattern,rawtf_pattern
21,505658,2021,OCT,2:55:45,5:01:00,2:05:15,True,/alice/data/2021/OCT/505658/raw/0???/o2_ctf_*....,/alice/data/2021/OCT/505658/raw/0???/o2_rawtf_...
23,505673,2021,OCT,6:44:27,8:53:09,2:08:42,True,/alice/data/2021/OCT/505673/raw/0???/o2_ctf_*....,/alice/data/2021/OCT/505673/raw/0???/o2_rawtf_...


Noise Runs
=========


In [4]:
# check_run_list = pd.concat(check_runs)
noise_runs = pd.DataFrame( {
    "run": [ 504419, 504428 ],
    "year": 2021, 
    "period": "OCT_TRD",
} )

noise_runs['ctf_pattern'] = noise_runs['run'].apply(
    lambda run: f"/alice/data/2021/OCT_TRD/{run}/raw/o2_ctf_*.root")
noise_runs['rawtf_pattern'] = noise_runs['run'].apply(
    lambda run: f"/alice/data/2021/OCT/{run}/raw/o2_rawtf_*.tf")

noise_runs

Unnamed: 0,run,year,period,ctf_pattern,rawtf_pattern
0,504419,2021,OCT_TRD,/alice/data/2021/OCT_TRD/504419/raw/o2_ctf_*.root,/alice/data/2021/OCT/504419/raw/o2_rawtf_*.tf
1,504428,2021,OCT_TRD,/alice/data/2021/OCT_TRD/504428/raw/o2_ctf_*.root,/alice/data/2021/OCT/504428/raw/o2_rawtf_*.tf


Query file catalog
==============

For each run in `check_runs`, we query the file catalog to get the number and size of TFs and CTFs.

This can take a long time, maybe we should implement caching.

In [5]:
check_runs = pd.concat([pilot_runs, noise_runs], ignore_index=True, sort=False)
# display(check_runs)

print("Determining available statistics for requested runs")
def file_summary(row):
    print(f"AliEn ls: {row[0]}")
    fileinfo = trd.alien_ls(row[0])
#     print(len(fileinfo), sum([x.size for x in fileinfo]))
    return len(fileinfo), sum([x.size for x in fileinfo])

check_runs[['ctf_files','ctf_size']] = check_runs[['ctf_pattern']].apply(
    file_summary, axis='columns', result_type='expand')

check_runs[['rawtf_files','rawtf_size']] = check_runs[['rawtf_pattern']].apply(
    file_summary, axis='columns', result_type='expand')

check_runs[['run', 'ctf_files', 'ctf_size', 'rawtf_files', 'rawtf_size']]

Determining available statistics for requested runs
AliEn ls: /alice/data/2021/OCT/505658/raw/0???/o2_ctf_*.root
AliEn ls: /alice/data/2021/OCT/505673/raw/0???/o2_ctf_*.root
AliEn ls: /alice/data/2021/OCT_TRD/504419/raw/o2_ctf_*.root
AliEn ls: /alice/data/2021/OCT_TRD/504428/raw/o2_ctf_*.root
AliEn ls: /alice/data/2021/OCT/505658/raw/0???/o2_rawtf_*.tf
AliEn ls: /alice/data/2021/OCT/505673/raw/0???/o2_rawtf_*.tf
AliEn ls: /alice/data/2021/OCT/504419/raw/o2_rawtf_*.tf
AliEn ls: /alice/data/2021/OCT/504428/raw/o2_rawtf_*.tf


Unnamed: 0,run,ctf_files,ctf_size,rawtf_files,rawtf_size
0,505658,1258,2410795864591,170746,338661042440398
1,505673,1218,2245653671863,165743,327205659762528
2,504419,1297,655511204728,2,47937776
3,504428,38,23111247743,0,0


In [16]:
def get_file_list(run, filetype, outfile=None, maxfiles=None):
    if isinstance(outfile,str):
        outfile = pathlib.Path(outfile)
    pattern = check_runs.loc[check_runs['run']==504419][filetype+'_pattern']
    fileinfo = trd.alien_ls(pattern)

    if outfile is None:
        return fileinfo
    else:
        outdir = outfile.parent #pathlib.Path(outdir)
        outdir.mkdir(parents=True, exist_ok=True)
        print(f"Write list of {len(fileinfo)} files to {outfile}")
        with open(outfile,"w") as of:
            for f in fileinfo:
                of.write(str(f.path)+"\n")

Export information
===============

In the following, we export the collected information to create data sets. This will be a rather manual process, so we will not automate it, and prevent the notebook from executing it automatically.

In [7]:
# Make sure the notebook does not proceed beyond this point automatically
assert(False)

AssertionError: 

Create directory for output files
-------------------------------------

Create a directory where all the files will be created

In [14]:
outdir = datadir / "noise3"
outdir.mkdir(parents=True, exist_ok=True)
%cd {outdir}
%ls -l

/eos/home-t/tdietel/data/noise3
total 0


Create file list
----------------

The file list is saved in a text file that can be used by e.g. the `o2-ctf-reader-workflow`.

In [17]:
get_file_list(504419, 'rawtf', outfile="files.txt", maxfiles=100)
%ls -l

Write list of 2 files to files.txt
total 1
-rw-r--r--. 1 tdietel 1395 134 Apr 28 17:07 files.txt


## Script to convert timeframe to digits/tracklets

Note that you will have to run this script on lxplus for the actual conversion.

In [18]:
%%genfile -m 0755 ctf2trd.sh
#!/bin/sh

eval $(alienv printenv VO_ALICE@O2::nightly-20220124-1)
o2-ctf-reader-workflow --onlyDet TRD --ctf-input files.txt \
| o2-trd-digittracklet-writer \
| o2-dpl-run --run --batch


Generated file 'ctf2trd.sh'


Create a Makefile
-----------------

The Makefile provides a few handy shortcuts.

In [26]:
%%genfile Makefile

help: Makefile ## Print this help text
    @perl -nle 'printf("  %-20s %s\n",$$1,$$2) if /^(\S+):.*##\s*(.*)/' $^

ctf2trd: trddigits.root trdtracklets.root ## Convert CTF to digits, tracklets files

trddigits.root trdtracklets.root: ctf2trd.sh files.txt
    ./ctf2trd.sh
    
clean: ## Clean up
    rm -f trdtracklets.root trddigits.root
    rm -f core_dump_*

Generated file 'Makefile'


In [24]:
%ls -l

total 2
-rwxr-xr-x. 1 tdietel 1395 187 Apr 28 17:08 [0m[01;32mctf2trd.sh[0m*
-rw-r--r--. 1 tdietel 1395 134 Apr 28 17:07 files.txt
-rw-r--r--. 1 tdietel 1395 178 Apr 28 17:09 Makefile
