## Reporting

This module processes model-generated log files and extracts metrics such as ratios, precision, recall, confusion matrix values, and histogram data. The results are aggregated into structured CSV reports for further analysis or record-keeping.

 Main Functions:
   - get_row_from_log: Parses individual log files to extract metrics.
   - generates_csv_file: Writes global metric data to CSV.
   - generates_csv_file_hist: Writes histogram data to CSV.
   - generates_csv_files: Entry point for report generation.

In [1]:
########################################################################
# Import Section
#
# This section loads required Python libraries and modules used across 
# the reporting script.
########################################################################

from activity_constants import *  # Imports all constants defined in the local 'activity_constants' module

import gc       # Provides access to garbage collection functionality (not used explicitly in this script)
import os       # Enables interaction with the operating system, e.g., file path manipulation and directory listing

import fnmatch  # Used to match filenames using Unix shell-style wildcards (e.g., for selecting log files)
import csv      # Enables reading from and writing to CSV files

In [2]:
########################################################################
# get_row_from_log: Extracts metrics and summary statistics from a model
# log file, parsing each line depending on its expected content and 
# returns a structured list of values.
#
# Inputs:
#   - model_folder: Path to the directory containing the log file.
#   - file_name: Name of the log file to parse.
#
# Returns:
#   - str_return: List containing filename, extracted metrics, and summary.
########################################################################
def get_row_from_log (model_folder, file_name):
    str_return = [file_name]
    # Using readlines()
    file1 = open(model_folder + '/' + file_name , 'r')
    Lines = file1.readlines()

    count = 0
    total_elements = 0
    # Strips the newline character
    for line in Lines:
        print("Line{}: {}".format(count, line))

        #str_return = str_return + ','
            
        if (count >= 0 and  count <= 2):
          str_return.append(line.split(' ')[2].replace('\n', ''))
        
        elif (count == 3):
            numnber_list = line.replace('[', '').replace(']', '').split(',')
            numnber_list = [s.strip() for s in numnber_list]
            #numnber_list = [s.replace('[', '') for s in numnber_list]
            #numnber_list = [s.replace(']', '') for s in numnber_list]
            
            print("numnber_list: {}".format(numnber_list))
            str_return.append(numnber_list[1])
            
            for i in range (2,4):
                if (len(numnber_list) >= i+1):
                    str_return.append(numnber_list[i])
                else:
                    str_return.append('')
                    
        elif (count >= 4 and  count <= 5):
            numnber_list = line.replace('[', '').replace(']', '').replace('\n', '').split(' ')
            
            print("numnber_list: {}".format(numnber_list))
            
            for number in numnber_list:
                if (len(number) > 0):
                    str_return.append(number)
                    
                    print("numnber: {}".format(number))
                    total_elements = total_elements + (int)(number)
        count += 1
        
    str_return.append(total_elements)
    
    print("output: {}".format(str_return))
    return str_return

In [3]:
########################################################################
# generates_csv_file: Aggregates results from multiple global log files
# into a single CSV report.
#
# Inputs:
#   - p_model_folder: Directory path containing the log files.
#   - file_filter: Pattern to match target log files.
#   - file_output: Output CSV filename.
#
# Returns:
#   - None (writes data directly to CSV file).
########################################################################
def generates_csv_file (p_model_folder, file_filter, file_output):
    
    print("Analyzing folder: {}".format(p_model_folder))
    print("Writting report: {}".format(p_model_folder + '/' + file_output))

    with open(p_model_folder + '/' + file_output, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['fila_name','Gobal ratio','Negative ratio','Positive ratio', 'Precision','Recall','F1','True Negatives','False Positives','False Negatives','True Positives', 'total elements'])
        
        for file in fnmatch.filter(os.listdir(p_model_folder), file_filter):
            writer.writerow(get_row_from_log(p_model_folder, file))
            



In [4]:
########################################################################
# generates_csv_file_hist: Creates a CSV report summarizing histogram
# data from model histogram log files.
#
# Inputs:
#   - p_model_folder: Directory containing histogram log files.
#   - file_filter: Pattern to identify histogram log files.
#   - file_output: Name of the CSV file to be created.
#
# Returns:
#   - None (writes histogram content to CSV).
########################################################################
def generates_csv_file_hist (p_model_folder, file_filter, file_output):
    
    print("Analyzing folder: {}".format(p_model_folder))
    print("Writting report: {}".format(p_model_folder + '/' + file_output))

    with open(p_model_folder + '/' + file_output, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        header = ['fila_name','hist_type']
        file_list = fnmatch.filter(os.listdir(p_model_folder), file_filter);
        
        if (len(file_list) > 0):
            file1 = open(p_model_folder + '/' + file_list[0] , 'r')
            lines = file1.readlines()
            num_elements = len(lines[0].split(','))
            
            print (str(num_elements))
            
            for i in range (num_elements):
                header.append(str(i+1))
            
            writer.writerow(header)
        
            for file in file_list:
                file1 = open(p_model_folder + '/' + file , 'r')
                lines = file1.readlines()
                #print (file)
                line = "{0},{1},{2}".format(file, file.split("##")[2].split(".")[0].split("_")[1], lines[0])
                #print (line)
                csv_file.write(line)

In [5]:
########################################################################
# generates_csv_files: Orchestrates generation of both global and
# histogram CSV reports for a given study and assay.
#
# Inputs:
#   - p_model_folder: Root folder where the study folder resides.
#   - p_study_name: Name of the study folder.
#   - p_assay: Assay identifier used for file filtering.
#
# Returns:
#   - None (invokes other CSV generation routines).
########################################################################
def generates_csv_files (p_model_folder, p_study_name, p_assay):
    generates_csv_file_hist (model_folder + '/' + p_study_name, 'model_' + p_assay + '*##hist_*.log', 'report_hist.csv')
    generates_csv_file (model_folder + '/' + p_study_name, 'model_' + p_assay + '*##global.log', 'report.csv')

In [6]:
generates_csv_files (model_folder, 'study_layer', '1806')

Analyzing folder: Model/study_layer
Writting report: Model/study_layer/report_hist.csv
5
Analyzing folder: Model/study_layer
Writting report: Model/study_layer/report.csv
Line0: Gobal ratio: 0.9505208134651184

Line1: Negative ratio: 0.9427083333333334

Line2: Positive ratio: nan

Line3: [0.15826548635959625, 0.9505208134651184, 0.0833333358168602, 0.011363636702299118, 0.019999997690320015]

numnber_list: ['0.15826548635959625', '0.9505208134651184', '0.0833333358168602', '0.011363636702299118', '0.019999997690320015']
Line4: [[362   0]

numnber_list: ['362', '', '', '0']
numnber: 362
numnber: 0
Line5:  [ 22   0]]

numnber_list: ['', '', '22', '', '', '0']
numnber: 22
numnber: 0
output: ['model_1806#16_13#32_13#64_13##128_0##global.log', '0.9505208134651184', '0.9427083333333334', 'nan', '0.9505208134651184', '0.0833333358168602', '0.011363636702299118', '362', '0', '22', '0', 384]
Line0: Gobal ratio: 0.9427083134651184

Line1: Negative ratio: 0.9427083333333334

Line2: Positive ratio

In [7]:
generates_csv_files (model_folder, 'study_dense', '1806')

Analyzing folder: Model/study_dense
Writting report: Model/study_dense/report_hist.csv
5
Analyzing folder: Model/study_dense
Writting report: Model/study_dense/report.csv
Line0: Gobal ratio: 0.953125

Line1: Negative ratio: 0.955026455026455

Line2: Positive ratio: 0.8333333333333334

Line3: [0.2577473223209381, 0.953125, 0.0833333358168602, 0.018939394503831863, 0.03086419403553009]

numnber_list: ['0.2577473223209381', '0.953125', '0.0833333358168602', '0.018939394503831863', '0.03086419403553009']
Line4: [[361   1]

numnber_list: ['361', '', '', '1']
numnber: 361
numnber: 1
Line5:  [ 17   5]]

numnber_list: ['', '', '17', '', '', '5']
numnber: 17
numnber: 5
output: ['model_1806#16_7#32_7##64_0#64_0##global.log', '0.953125', '0.955026455026455', '0.8333333333333334', '0.953125', '0.0833333358168602', '0.018939394503831863', '361', '1', '17', '5', 384]
Line0: Gobal ratio: 0.9609375

Line1: Negative ratio: 0.967741935483871

Line2: Positive ratio: 0.8333333333333334

Line3: [0.23572520