In [1]:
import os
import ROOT as r
import uproot
import time
import argparse
import pandas as pd



Welcome to JupyROOT 6.24/07


In [2]:
class runInfo:
    
    def __init__(self):
        
        self.run = -1
        self.file = -1
        self.rawDirectory = None
        self.offlineDirectory = None
        self.daqFile = None
        self.trigFile = None
        self.matchFile = None
        self.offlineFile = None
        self.totalEvents = 0
        self.unmatchedEvents = 0
        self.startTime = -1
        self.unmatchedBoards = 0
    
    def __str__(self):
        out = 'run: {0} file: {1}\n\t\
               daqFile: {2}\n\t\
               trigFile: {3}\n\t\
               matchFile: {4}\n\t\
               offlineFile: {5}'.format(self.run, self.file, self.daqFile, self.trigFile, self.matchFile, self.offlineFile)
        return out
        
        
    

In [3]:
rawDirectories = ['1000', '1100']
rawSubDirectories = ['0000', '0001', '0002', '0003', '0004', '0005', '0006', '0007', '0008', '0009']

In [4]:
class fileChecker():
    
    def __init__(self):
        self.min_run = 1022
        self.max_run = 1123
        self.rawDir = ''
        self.offlineDir = ''
        
        self.parse_args()
        if self.args.rawDir: self.rawDir = self.args.rawDir
        if self.args.offlineDir: self.offlineDir = self.args.offlineDir
            
        self.initializePlots()

        self.daqFiles = {}
        self.trigFiles = {}
        self.matchedFiles = {}
        self.offlineFiles = {}
        
        self.runInfos = pd.DataFrame(columns=['run', 'file', 'rawDir', 'offlineDir', 'daqFile', 'trigFile',
                                  'matchFile', 'offlineFile', 'totalEvents', 'unmatchedEvents', 
                                  'startTime', 'unmatchedBoards'])
        #self.runInfos.set_index(['run', 'file'], inplace=True)
        
        self.debug = False
        
    def parse_args(self):
        parser=argparse.ArgumentParser()
        parser.add_argument("-r", "--rawDir", type=str, default = '/store/user/milliqan/run3/', help="Raw data directory")
        parser.add_argument("-o", "--offlineDir", type=str, default = '/store/user/milliqan/trees/v33/', help="Offline data directory")
        self.args = parser.parse_args(args=[])

    def initializePlots(self):

        bins = self.max_run - self.min_run
        self.h_total = r.TH1F("h_total", "Total Number of Events in Run", bins, self.min_run, self.max_run)
        self.h_unmatched = r.TH1F("h_unmatched", "Number of Unmatched Events in Run", bins, self.min_run, self.max_run)
        self.h_startTimes = r.TH1F("h_startTimes", "Start Times of Runs", bins, self.min_run, self.max_run)
        self.h_boardUnmatched = r.TH1F("h_boardUnmatched", "Number of Boards Unmatched", bins, self.min_run, self.max_run)

        self.c1  = r.TCanvas("c1", "c1", 800,800)
        
        
    def checkOfflineFiles(self, fileList):

        total_unmatched = 0
        for events in uproot.iterate(

            #files
            fileList,

            #branches
            ['runNumber', 'fileNumber', 'boardsMatched'],

            #cut
            #cut="",

            how="zip",

            step_size=1000,

            num_workers=8,

            ):

            unmatchedCut = events[:, "boardsMatched"] == 0
            unmatched = events[unmatchedCut, "boardsMatched"]
            self.h_boardUnmatched.Fill(events[0, 'runNumber'], len(unmatched))
            
            self.runInfos['unmatchedBoards'].loc[(self.runInfos['run'] == events[0, 'runNumber']) & (self.runInfos['file'] == events[0, 'fileNumber'])] += len(unmatched)
    
    def getRunFile(self, filename):
        runNum = filename.split('Run')[-1].split('.')[0]
        fileNum = filename.split('.')[1].split('_')[0]
        return runNum, fileNum
    
    
    '''def printFileCounts(self):
        #fout = open('fileCounts.csv', 'w')
        print("{0:<10} {1:>8} {2:>8} {3:>8} {4:>10} {5:>15} {6:>15} {7:>15} {8:>15}".format('Run', 'DAQ Files', 'Trig Files', 'Matched Files', 'Unmatched Files', 'Total Events', 'Unmatched Events', 'Frac Unmatched', 'Start Time'))
        for key, value in self.daqFiles.items():
            numDAQ = len(self.daqFiles[key].files)
            numTrig = 0
            numMatched = 0
            boardsUnmatched = 0
            missing = ''
            totalEvents, unmatched, frac = 0, 0, 0
            if key in self.trigFiles.keys(): numTrig = len(self.trigFiles[key].files)
            if key in self.matchedFiles.keys(): 
                numMatched = len(self.matchedFiles[key].files)
                totalEvents = self.matchedFiles[key].totalEvents
                unmatched = self.matchedFiles[key].unmatchedEvents
                startTime = self.matchedFiles[key].startTime
                if totalEvents != 0: frac = unmatched/totalEvents 
            if key in self.offlineFiles.keys():
                boardsUnmatched = self.offlineFiles[key].unmatchedBoards
            if numDAQ != numMatched: missing = 'x'

            print("{0:<10} {1:>8} {2:>8} {3:>8} {4:>10} {5:>15} {6:>15} {7:>15.3f} {8:>15}".format(key, numDAQ, numTrig, numMatched, missing, totalEvents, unmatched, frac, startTime))
            #fout.write('{0},{1},{2},{3},{4},{5},{6},{7},{8}\n'.format(key, numDAQ, numTrig, numMatched, missing, totalEvents, unmatched, startTime, boardsUnmatched))
        #fout.close()'''
    
    def saveJson(self):
        self.runInfos.to_json('checkMatching.json', orient = 'split', compression = 'infer', index = 'true')
    
    
    def checkMatchedFiles(self, fileList):
        total_unmatched = 0
        for events in uproot.iterate(

            #files
            fileList,

            #branches
            ['runNum', 'eventNum', 'trigger', 'startTime'],

            #cut
            #cut="",

            #needs to be 1000 events for file number to be correct
            step_size=1000,

            num_workers=8,
            
        ):

            unmatchedCut = events[:, "trigger"] == -1
            unmatched = events[unmatchedCut, "trigger"]
            self.h_unmatched.Fill(events[0, 'runNum'], len(unmatched))
            self.h_total.Fill(events[0, 'runNum'], len(events))
            thisbin = self.h_startTimes.FindBin(events[0, 'runNum'])
            self.h_startTimes.SetBinContent(thisbin, events[0, 'startTime'])
            
            fileNum = events[0, 'eventNum']/1000 + 1
            self.runInfos['startTime'].loc[(self.runInfos['run'] == events[0, 'runNum']) & (self.runInfos['file'] == fileNum)] = events[0, 'startTime']
            
            self.runInfos['totalEvents'].loc[(self.runInfos['run'] == events[0, 'runNum']) & (self.runInfos['file'] == fileNum)] += len(events)
            self.runInfos['unmatchedEvents'].loc[(self.runInfos['run'] == events[0, 'runNum']) & (self.runInfos['file'] == fileNum)] += len(unmatched)
    
    def getOfflineInfo(self):
        rawFiles = self.runInfos[['run', 'file']].to_numpy()
        for pair in rawFiles:
            offlineFile = 'MilliQan_Run{0}.{1}_v33_firstPedestals.root'.format(pair[0], pair[1])
            if os.path.exists(self.offlineDir+'/'+offlineFile): 
                self.runInfos['offlineFile'].loc[(self.runInfos['run'] == pair[0]) & (self.runInfos['file'] == pair[1])] = offlineFile
                self.runInfos['offlineDir'].loc[(self.runInfos['run'] == pair[0]) & (self.runInfos['file'] == pair[1])] = self.offlineDir
    
    def getRawInfo(self):
        for directory in rawDirectories:
            for sub in rawSubDirectories:
                fullPath = self.rawDir+directory+'/'+sub
                if not os.path.isdir(fullPath): continue
                for ifile, filename in enumerate(os.listdir(fullPath)):
                    if not filename.endswith('.root'): continue
                    if not filename.startswith('MilliQan'): continue
                    if self.debug and len(self.runInfos) > 100: break

                    thisRun = runInfo()
                    runNum, fileNum = self.getRunFile(filename)
                    thisRun.run = int(runNum)
                    thisRun.file = int(fileNum)
                    thisRun.daqFile = filename
                    thisRun.rawDir = fullPath
                    trigName = "TriggerBoard_Run{0}.{1}.root".format(runNum, fileNum)
                    matchName = "MatchedEvents_Run{0}.{1}_rematch.root".format(runNum, fileNum)
                    if os.path.exists(fullPath+'/'+trigName): thisRun.trigFile = trigName
                    if os.path.exists(fullPath+'/'+matchName): thisRun.matchFile = matchName
                    
                    self.runInfos.loc[len(self.runInfos.index)] = thisRun.__dict__
                            
    def runCheckMatchedFiles(self):
        runs = self.runInfos.run.unique()
        for run in runs:
            runList = self.runInfos[['rawDir', 'matchFile']].loc[self.runInfos['run']==run].apply('/'.join, axis=1).tolist()
            runList = [x+':matchedTrigEvents' for x in runList]
            self.checkMatchedFiles(runList)
        
    def runCheckOfflineFiles(self):
        #now look at offline files post processing info
        runs = self.runInfos.run.unique()
        for run in runs:
            runList = self.runInfos[['offlineDir', 'offlineFile']].loc[self.runInfos['run']==run].apply('/'.join, axis=1).tolist()
            runList = [x+':t' for x in runList]
            self.checkOfflineFiles(runList)


In [5]:
if __name__ == "__main__":
    
    myfileChecker = fileChecker()
    myfileChecker.debug = False
    myfileChecker.getRawInfo()
    myfileChecker.getOfflineInfo()
    myfileChecker.runCheckMatchedFiles()
    myfileChecker.runCheckOfflineFiles()
    myfileChecker.saveJson()


TypeError: sequence item 1: expected str instance, NoneType found

In [None]:
myfileChecker.runInfos
#myfileChecker.runInfos.set_index('run', inplace=True)
#myfileChecker.runInfos.index

In [None]:
def checkOfflineFiles(runInfo):
    filelist = getFiles(runInfo.directory, runInfo.run, runInfo.files, 't', 'combined')
    
    total_unmatched = 0
    for events in uproot.iterate(
    
        #files
        filelist,
        
        #branches
        ['boardsMatched'],
        
        #cut
        #cut="",
        
        how="zip",
        
        step_size=1000,
        
        num_workers=8,
    
    ):

        unmatchedCut = events[:, "boardsMatched"] == 0
        unmatched = events[unmatchedCut, "boardsMatched"]
        h_boardUnmatched.Fill(runInfo.run, len(unmatched))
        #print("Running over run {}".format(runInfo.run))
    
    return runInfo
    

In [None]:
def getRunFile(filename):
    runNum = filename.split('Run')[-1].split('.')[0]
    fileNum = filename.split('.')[1].split('_')[0]
    return runNum, fileNum

In [None]:
def printFileCounts():
    #fout = open('fileCounts.csv', 'w')
    print("{0:<10} {1:>8} {2:>8} {3:>8} {4:>10} {5:>15} {6:>15} {7:>15} {8:>15}".format('Run', 'DAQ Files', 'Trig Files', 'Matched Files', 'Unmatched Files', 'Total Events', 'Unmatched Events', 'Frac Unmatched', 'Start Time'))
    for key, value in daqFiles.items():
        numDAQ = len(daqFiles[key].files)
        numTrig = 0
        numMatched = 0
        boardsUnmatched = 0
        missing = ''
        totalEvents, unmatched, frac = 0, 0, 0
        if key in trigFiles.keys(): numTrig = len(trigFiles[key].files)
        if key in matchedFiles.keys(): 
            numMatched = len(matchedFiles[key].files)
            totalEvents = matchedFiles[key].totalEvents
            unmatched = matchedFiles[key].unmatchedEvents
            startTime = matchedFiles[key].startTime
            if totalEvents != 0: frac = unmatched/totalEvents 
        if key in offlineFiles.keys():
            boardsUnmatched = offlineFiles[key].unmatchedBoards
        if numDAQ != numMatched: missing = 'x'

        print("{0:<10} {1:>8} {2:>8} {3:>8} {4:>10} {5:>15} {6:>15} {7:>15.3f} {8:>15}".format(key, numDAQ, numTrig, numMatched, missing, totalEvents, unmatched, frac, startTime))
        #fout.write('{0},{1},{2},{3},{4},{5},{6},{7},{8}\n'.format(key, numDAQ, numTrig, numMatched, missing, totalEvents, unmatched, startTime, boardsUnmatched))
    #fout.close()

In [None]:
def getFiles(directory, run, files, tree, tag=''):
    if tree == 'matchedTrigEvents':
        filelist = ["{0}/MatchedEvents_Run{1}.{2}_rematch.root:{3}".format(directory, run, x, tree) for x in files]
    elif tree == 't':
        if tag == 'combined':
            filelist = ["{0}/MilliQan_Run{1}_{4}.root:{3}".format(directory, run, x, tree, tag) for x in files]
        else:
            filelist = ["{0}/MilliQan_Run{1}.{2}_{4}.root:{3}".format(directory, run, x, tree, tag) for x in files]
    else:
        print("Name of tree not recognized")
        sys.exit(1)
    return filelist
    

In [None]:
def checkMatchedFiles(runInfo):
    filelist = getFiles(runInfo.directory, runInfo.run, runInfo.files, 'matchedTrigEvents')
    
    total_unmatched = 0
    for events in uproot.iterate(
    
        #files
        filelist,
        
        #branches
        ['trigger', 'startTime'],
        
        #cut
        #cut="",
        
        how="zip",
        
        step_size="10 MB",
        
        num_workers=8,
    
    ):

        unmatchedCut = events[:, "trigger"] == -1
        unmatched = events[unmatchedCut, "trigger"]
        h_unmatched.Fill(runInfo.run, len(unmatched))
        h_total.Fill(runInfo.run, len(events))
        runInfo.totalEvents += len(events)
        runInfo.unmatchedEvents += len(unmatched)
        runInfo.startTime = events[0, 'startTime']
        thisbin = h_startTimes.FindBin(int(runInfo.run))
        h_startTimes.SetBinContent(thisbin, events[0, 'startTime'])
    
    return runInfo
        

In [None]:
if __name__ == "__main__":
    
    myfileChecker = fileChecker()
    
    

In [None]:
'''if __name__ == "__main__":

    path = '/store/user/milliqan/run3/'
    #directories = ['1000', '1100']
    directories = ['1000']
    #subdirectories = ['0000', '0001', '0002', '0003', '0004', '0005', '0006', '0007', '0008', '0009']
    subdirectories = ['0002']
    
    offlineDirectory = '/store/user/milliqan/trees/v31/combined/'

    daqFiles = {}
    trigFiles = {}
    matchedFiles = {}
    offlineFiles = {}

    
    min_run = 1022
    max_run = 1123
    bins = max_run - min_run
    h_total = r.TH1F("h_total", "Total Number of Events in Run", bins, min_run, max_run)
    h_unmatched = r.TH1F("h_unmatched", "Number of Unmatched Events in Run", bins, min_run, max_run)
    h_startTimes = r.TH1F("h_startTimes", "Start Times of Runs", bins, min_run, max_run)
    h_boardUnmatched = r.TH1F("h_boardUnmatched", "Number of Boards Unmatched", bins, min_run, max_run)

    c1  = r.TCanvas("c1", "c1", 800,800)

    for directory in directories:
        for sub in subdirectories:
            fullPath = path+directory+'/'+sub
            if not os.path.isdir(fullPath): continue
            for filename in os.listdir(fullPath):
                if not filename.endswith('.root'): continue
                if filename.startswith('MilliQan'):
                    runNum, fileNum = getRunFile(filename)
                    if runNum in daqFiles:
                        daqFiles[runNum].files.append(fileNum)
                    else:
                        daqFiles[runNum] = runInfo(fullPath, runNum, fileNum)
                elif filename.startswith('TriggerBoard'):
                    runNum, fileNum = getRunFile(filename)
                    if runNum in trigFiles:
                        trigFiles[runNum].files.append(fileNum)
                    else:
                        trigFiles[runNum] = runInfo(fullPath, runNum, fileNum)
                elif filename.startswith('MatchedEvents'):
                    if not 'rematch' in filename: continue
                    runNum, fileNum = getRunFile(filename)
                    if runNum in matchedFiles:
                        matchedFiles[runNum].files.append(fileNum)
                    else:
                        matchedFiles[runNum] = runInfo(fullPath, runNum, fileNum)
    
    for key, value in matchedFiles.items():
        #if int(key) > 1005: break
        matchedFiles[key] = checkMatchedFiles(value)
        
        
    #now look at offline files post processing info
    #TODO simplyfy so that there are not so many runInfo objects everywhere
    for filename in os.listdir(offlineDirectory):
        if not filename.endswith('.root'): continue
        #runNum, fileNum = getRunFile(filename)
        runNum = filename.split('Run')[1].split('_')[0]
        fileNum = -1
        if int(runNum) < min_run or int(runNum) > max_run: continue
        if runNum in offlineFiles:
            offlineFiles[runNum].files.append(fileNum)
        else:
            offlineFiles[runNum] = runInfo(offlineDirectory, runNum, fileNum)
        
    for key, value in offlineFiles.items():
        offlineFiles[key] = checkOfflineFiles(value)
                 
    
    printFileCounts()'''


In [None]:
%%timeit

for key, value in offlineFiles.items():
    offlineFiles[key] = checkOfflineFiles(value)

In [None]:
h_ratio = h_unmatched.Clone()
h_ratio.Divide(h_total)
h_ratio.Draw()
c1.SetLogy()
c1.Draw()

In [None]:
h_total.Draw()
h_unmatched.SetLineColor(2)
h_unmatched.Draw("same")
c1.SetLogy(0)
c1.Draw()


In [None]:
matchedFiles['1000'].unmatchedEvents

In [None]:
h_startTimes.Draw()
c1.Draw()

In [None]:
for i in range(0, 170):
    print(h_boardUnmatched.GetBinContent(i))

In [None]:
h_boardUnmatched.Draw()
c1.SetLogy()
c1.Draw()