# Help for the homic package

In [4]:
import sys
import pandas as pd
import numpy as np
sys.path.append('/gpfs/commons/home/mgarbulowski/homic_package/src')
from homic import file_readers, simulate_16S, kraken2, dl_model, dl_evaluation, process_data, make_plots

## Module: file_readers

In [10]:
help(file_readers.fasta)
help(file_readers.fastq)
help(file_readers.save_fastq_as_rev_comp)
help(file_readers.save_fasta_as_rev_comp)
help(file_readers.load_barcodes)
help(file_readers.make_benchmark_table)
help(file_readers.load_kraken2_output)

Help on function fasta in module homic.file_readers:

fasta(path)
    Reads fasta file and prints the number of organisms in the file.
    
    No default parameters. All must be specified.
    
    Parameters
    ----------
    path : string,
        path to the .fasta file.
    
    Returns
    -------
    fasta_dict
        a dictionary with organisms included in the .fasta file

Help on function fastq in module homic.file_readers:

fastq(path)
    Reads fastq file.
    
    No default parameters. All must be specified.
    
    Parameters
    ----------
    path : string,
        path to the .fasta file.
    
    Returns
    -------
    r2_header_lines
        a list of all fastq headers
    r2_read_lines
        a list of reads
    r2_qual_lines
        a list of all quality lines

Help on function save_fastq_as_rev_comp in module homic.file_readers:

save_fastq_as_rev_comp(path)
    Reads fastq file and creates new fastq with reverse complementary reads.
    
    No default param

## Module: simulate_16S

In [11]:
help(simulate_16S.training_data)
help(simulate_16S.training_data_fast)
help(simulate_16S.validation_data)
help(simulate_16S.simulate_barcodes)

Help on function training_data in module homic.simulate_16S:

training_data(n_reads, output_path, score_thr, mic_refs, r2_header_lines, r2_read_lines, r2_qual_lines, impute_errors=True, trunc_range=[0, 0], print_stats=True)
    Creates data for training DL model with feature selection based on alignment scores.
    
    Parameters
    ----------
    n_reads : intiger,
       number of reads to generate. In case this number exceeds the number of reads in real data, randomly (with replecement) generated reads are created.
    output_path : string,
       path to the foler where simulated data are created
    score_thr : float,
       threshold for feature selection (alignment score threshold)
    mic_refs : dict,
       a dict of micriobiome references
    r2_header_lines : list,
       a list of headers from the real data
    r2_read_lines : list,
       a list of reads (sequences) from the real data
    r2_qual_lines : list,
       a list of quality lines from the real data
    impute_

## Module: kraken2

In [12]:
help(kraken2.prepare_db)
help(kraken2.classify)
help(kraken2.decontaminate_single)
help(kraken2.decontaminate_paired)
help(kraken2.evaluate_kraken)

Help on function prepare_db in module homic.kraken2:

prepare_db(db_path, ref_path)
    Builds db for kraken2.
    
    Parameters
    ----------
    db_path : string,
        a path to the folder where kraken db will be created
    ref_path : string,
        a path to the input .fasta file with reference sequences
        
    Returns
    -------
    no output
        files are saved to the folder under "db_path"

Help on function classify in module homic.kraken2:

classify(db_path, input_file, confidence=0.01, threads=8, min_hit_gr=2)
    Classifies reads to genus / species according to db.
    
    Parameters
    ----------
    db_path : string,
        a path to kraken db
    input_file : string,
        a path to input .fastq file
    confidence : float,
        kraken2 parameter - confidence (-T)
    threads : intiger,
        kraken2 parameter - number of threads (-p)
    min_hit_gr : intiger,
        kraken2 parameter - minimum hitting group (-g)
    
    Returns
    -------
  

## Module: dl_model

In [13]:
help(dl_model.prepare_data)
help(dl_model.one_hot_encoder)
help(dl_model.one_hot_model)
help(dl_model.predict_class_for_reads)

Help on function prepare_data in module homic.dl_model:

prepare_data(input_fq, ref_d, taxa_skip=False, asf=False)
    Prepares the data for encoding and learning.
    
    Parameters
    ----------
    input_fq : string,
        a path to .fastq file
    ref_d : string,
        an output from "file_readers.species_outcome()"
    taxa_skip : boolean,
        if true, skips taxa info and uses labels. Otherwise, runs taxa assignment via ete3.
    asf : boolean,
        use ASF species names instead of ASF id's.
        
    Returns
    -------
    df_merge
        a data frame with reads and taxonomic assignments



## Module: dl_evaluation

In [None]:
help(dl_evaluation.reassign_classes_per_spot)
help(dl_evaluation.merge_prediction_results)
help(dl_evaluation.per_spot_stats)

## Module: process_data

## Module: make_plots