# Help for the homic package

In [2]:
import sys
import pandas as pd
import numpy as np
sys.path.append('/gpfs/commons/home/mgarbulowski/homic_package/src')
from homic import file_readers, simulate_16S, kraken2, dl_model, dl_evaluation, process_data, make_plots

homic package imported


2025-09-25 11:02:26.002812: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758812546.544112 3893726 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758812546.722557 3893726 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-25 11:02:27.716152: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Module: file_readers

### Function: fasta

In [3]:
help(file_readers.fasta)

Help on function fasta in module homic.file_readers:

fasta(path)
    Reads fasta file and prints the number of organisms in the file.
    
    No default parameters. All must be specified.
    
    Parameters
    ----------
    path : string,
        path to the .fasta file.
    
    Returns
    -------
    fasta_dict
        a dictionary with organisms included in the .fasta file



### Function: make_benchmark_table

In [2]:
help(file_readers.make_benchmark_table)

Help on function make_benchmark_table in module homic.file_readers:

make_benchmark_table(path, reads, krk_preds, bcodes)
    Creates a table for benchmarking spots.
    
    No default parameters. All must be specified.
    
    Parameters
    ----------
    path : string,
        path to the gold trurth species / genus list.
    reads : list,
        "reads" list read with the file_readers.fastq function
    krk_preds : pandas Series,
        taken as a column "taxa" from output of file_readers.load_kraken2_output(path)
    bcodes : pandas DataFrame,
        the output from file_readers.load_barcodes(path)
        
    
    Returns
    -------
    fastq_spot_d
        a dict indicating which coordinates (spots) belong to what read. Keys are spot IDs, values are reads IDs.
    info
        pandas DataFrame with following columns:
        'fastq' - fastq full header
        'tile' - tile id (from header)
        'x' - position x  (from header)
        'y' - position y (from header)
   

## Module: simulate_16S

## Module: kraken2

### Function: prepare_db

In [3]:
help(kraken2.prepare_db)

Help on function prepare_db in module homic.kraken2:

prepare_db(db_path, ref_path)
    Builds db for kraken2.
    
    Parameters
    ----------
    db_path : string,
        a path to the folder where kraken db will be created
    ref_path : string,
        a path to the input .fasta file with reference sequences
        
    Returns
    -------
    no output
        files are saved to the folder under "db_path"



### Function: classify

In [4]:
help(kraken2.classify)

Help on function classify in module homic.kraken2:

classify(db_path, input_file, confidence=0.01, threads=8, min_hit_gr=2)
    Classifies reads to genus / species according to db.
    
    Parameters
    ----------
    db_path : string,
        a path to kraken db
    input_file : string,
        a path to input .fastq file
    confidence : float,
        kraken2 parameter - confidence (-T)
    threads : intiger,
        kraken2 parameter - number of threads (-p)
    min_hit_gr : intiger,
        kraken2 parameter - minimum hitting group (-g)
    
    Returns
    -------
    output
        a data frame with following columns 'outcome', 'seqid', 'taxid', 'seqlen' and 'kmers'



### Function: decontaminate_single

In [5]:
help(kraken2.decontaminate_single)

Help on function decontaminate_single in module homic.kraken2:

decontaminate_single(db_path, input_file, output, confidence=0.5, threads=8, min_base_qual=22)
    Decontamination with kraken2 for single .fastq (unpaired).
    
    Parameters
    ----------
    db_path : string,
        a path to kraken db
    input_file : string,
        a path to the .fastq file
    output : string,
        a path to the output .fastq file where host reads are removed
    confidence : float,
        kraken2 parameter - confidence (--confidence)
    threads : intiger,
        kraken2 parameter - number of threads (--threads)
    min_base_qual : intiger,
        kraken2 parameter - minimum base quality (--minimum-base-quality)
        
    Returns
    -------
    no output, files are saved under "output"



### Function: decontaminate_paired

In [6]:
help(kraken2.decontaminate_paired)

Help on function decontaminate_paired in module homic.kraken2:

decontaminate_paired(db_path, input_file1, input_file2, output, confidence=0.5, threads=12, min_base_qual=22)
    Decontamination with kraken2 for paired .fastq files.
    
    Parameters
    ----------
    db_path : string,
        a path to kraken db
    input_file1 : string,
        a path to the first .fastq file
    input_file2 : string,
        a path to the second .fastq file
    output : string,
        a path to the output .fastq file where host reads are removed
    confidence : float,
        kraken2 parameter - confidence (--confidence)
    threads : intiger,
        kraken2 parameter - number of threads (--threads)
    min_base_qual : intiger,
        kraken2 parameter - minimum base quality (--minimum-base-quality)
        
    Returns
    -------
    no output, files are saved under "output"



### Function: evaluate_kraken

In [7]:
help(kraken2.evaluate_kraken)

Help on function evaluate_kraken in module homic.kraken2:

evaluate_kraken(krk_path, gs_path)
    Evaluates kraken2 prediction with gold standard.
    
    Parameters
    ----------
    krk_path : string,
        a path to kraken2 result (.csv)
    gs_path : string,
        a path to the gold stanard file (.txt)
    
    Returns
    -------
        a value of accuracy



## Module: dl_model

## Module: dl_evaluation

### Function: reassign_classes_per_spot

In [8]:
help(dl_evaluation.reassign_classes_per_spot)

Help on function reassign_classes_per_spot in module homic.dl_evaluation:

reassign_classes_per_spot(info, model, encoder)
    Prints statistics for spots.
    
    No default parameters. All must be specified.
    
    Parameters
    ----------
    info : pandas DataFrame with following columns,
        'fastq' - fastq full header
        'tile' - tile id (from header)
        'x' - position x  (from header)
        'y' - position y (from header)
        'taxa1' - species part I, truth
        'taxa2' - species part II, truth
        'read' - read sequence 
        'taxa_predictions' - taxid of predictions from Kraken2
        'taxa' - truth species, truth
        'taxa_order' - truth taxa information, ordered 
        'superkingdom' - taxid predictions from Kraken2 translated to taxa info via ete3
        'phylum' - taxid predictions from Kraken2 translated to taxa info via ete3
        'class' - taxid predictions from Kraken2 translated to taxa info via ete3
        'order' - taxid 

### Function: merge_prediction_results

In [9]:
help(dl_evaluation.merge_prediction_results)

Help on function merge_prediction_results in module homic.dl_evaluation:

merge_prediction_results(info, cluster_l, fastq_spot_d, taxa_orders, reassign_d)
    Merges prediction results for spots.
    
    No default parameters. All must be specified.
    
    Parameters
    ----------
    info : pandas DataFrame with following columns,
        'fastq' - fastq full header
        'tile' - tile id (from header)
        'x' - position x  (from header)
        'y' - position y (from header)
        'taxa1' - species part I, truth
        'taxa2' - species part II, truth
        'read' - read sequence 
        'taxa_predictions' - taxid of predictions from Kraken2
        'taxa' - truth species, truth
        'taxa_order' - truth taxa information, ordered 
        'superkingdom' - taxid predictions from Kraken2 translated to taxa info via ete3
        'phylum' - taxid predictions from Kraken2 translated to taxa info via ete3
        'class' - taxid predictions from Kraken2 translated to tax

### Function: per_spot_stats

In [10]:
help(dl_evaluation.per_spot_stats)

Help on function per_spot_stats in module homic.dl_evaluation:

per_spot_stats(info, reassign_d, fastq_spot_d, taxa_orders)
    Prints statistics for spots.
    
    No default parameters. All must be specified.
    
    Parameters
    ----------
    info : pandas DataFrame with following columns,
        'fastq' - fastq full header
        'tile' - tile id (from header)
        'x' - position x  (from header)
        'y' - position y (from header)
        'taxa1' - species part I, truth
        'taxa2' - species part II, truth
        'read' - read sequence 
        'taxa_predictions' - taxid of predictions from Kraken2
        'taxa' - truth species, truth
        'taxa_order' - truth taxa information, ordered 
        'superkingdom' - taxid predictions from Kraken2 translated to taxa info via ete3
        'phylum' - taxid predictions from Kraken2 translated to taxa info via ete3
        'class' - taxid predictions from Kraken2 translated to taxa info via ete3
        'order' - taxid

## Module: process_data

## Module: make_plots