## Load python libraries

In [29]:
import configparser
import gzip
import os
import glob
import subprocess
import multiprocessing
import shutil
import pandas as pd

## Import information from configuration file

In [30]:
config = configparser.ConfigParser()
config.read('config.ini')

path_data = config.get('paths', 'path_data')

print(path_data)

../data


## Define functions

In [51]:
'''
Function to obtain paths to the directories where the fastq files are located.
Currently requires demultiplexed data with gzipped files.
'''
def fastq_directory_paths():
    fastq_dir = set()
    file_paths = glob.glob(path_data + '/**/*.fastq.gz', recursive=True)
    directory_paths = map(os.path.dirname, file_paths)
    fastq_dir.update(directory_paths)
    return sorted(fastq_dir)

'''
Function to obtain sample names that are used as base to track the samples through the analysis
'''
basename = lambda paths: [path.split('/')[-2] if path.endswith('/') else path.split('/')[-1] for path in paths]

'''
Functions to count the number of raw sequences per sample
'''
def count_sequences(file_path):
    count = 0
    with gzip.open(file_path, 'rt') as gz_file:
        for line in gz_file:
            if line.startswith('@'):
                count += 1
    return count

def accumulate_counts(folder_path):
    counts = {}
    for folder in folder_path:
        counts[folder] = 0  # Initialize the folder key with count 0
        for file_name in os.listdir(folder):
            file_path = os.path.join(folder, file_name)
            if file_name.endswith('.fastq.gz'):
                counts[folder] += count_sequences(file_path)
    return counts

'''
Wrapper function for NanoFilt
'''
def filtering(folder_list, minimum_length, maximum_length, qscore, base_list):
    for folder, base in zip(folder_list, base_list):
        if os.path.exists(os.path.join('results/qc', base)) and os.path.isdir(os.path.join('results/qc', base)):
            pass
        else:
            os.makedirs(os.path.join('results/qc', base))
        for file_name in os.listdir(folder):
            if file_name.endswith('.fastq.gz'):
                file_path = os.path.join(folder, file_name)
                file_name_unzipped = file_name[:-3]
                command = f'gunzip -c {file_path} | NanoFilt \
                          --length {minimum_length} \
                          --maxlength {maximum_length} \
                          -q {qscore} | gzip > ./results/qc/{base}/{file_name}'
                subprocess.run(command, shell=True)
                
'''
Function to concatenate filtered sequences to one file per sample
'''
def concatenate(bases):
    for base in bases:
        command = f'cat ./results/qc/{base}/*.fastq.gz > ./results/qc/{base}/{base}_concatenated.fastq.gz'
        subprocess.run(command, shell=True)
        
        command = f'gunzip ./results/qc/{base}/{base}_concatenated.fastq.gz'
        subprocess.run(command, shell=True)
        
        command = f'sed -n "1~4s/^@/>/p;2~4p" ./results/qc/{base}/{base}_concatenated.fastq > ./results/qc/{base}/{base}_concatenated.fasta'
        subprocess.run(command, shell=True)

        #command = f'gzip ./results/qc/{base}/{base}_concatenated.fasta'
        #subprocess.run(command, shell=True)
        
'''
Function to count the number of sequences per sample after filtering
'''
def count_sequences_concat(base_name):
    counts = {}
    for base in base_name:
        counts[base] = 0  # Initialize the count for each base
        file_path = './results/qc/' + base + '/' + base + '_concatenated.fasta'
        with open(file_path, 'r') as file:
            for line in file:
                if line.startswith('>'):
                    counts[base] += 1
    return counts

'''
Function to convert sequence files in fasta format to csv
'''
def fasta2csv(base_name):
    for base in base_name:
        fasta = './results/qc/' + base + '/' + base + '_concatenated.fasta'
        output = './results/qc/' + base + '/' + base + '_concatenated.csv'

        out_lines = []
        temp_line = ''
        with open(fasta, 'r') as fp:
            for line in fp:
                if line.startswith('>'):
                    out_lines.append(temp_line)
                    temp_line = line.strip() + ','
                else:
                    temp_line += line.strip()
        out_lines.append(temp_line)

        with open(output, 'w') as fp_out:
            fp_out.write('id,sequence' + '\n'.join(out_lines))
            
'''
Wrapper function to run the ashure clustering algorithm
'''
def cluster(base):
    os.chdir(wdir)
    shutil.copy('./ashure.py', './results/qc/' + base)
    shutil.copy('./bilge_pype.py', './results/qc/' + base)
    os.chdir('./results/qc/' + base)
    script_path = "./ashure.py"
    input_file = base + "_concatenated.csv"
    output_file = base + "_clusters.csv"    
    
    command = [
        script_path,
        "clst",
        "-i", input_file,
        "-o", output_file,
        "-iter", config.get('ashure', 'niter'),
        "-r"
    ]   
    
    subprocess.run(command)
    os.chdir(wdir)

'''
Function to convert sequence files in csv format to fasta
'''
def csv2fasta(base_name):
    for base in base_name:
        csv_path = './results/qc/' + base + '/' + base + '_clusters.csv'
        output_path = './results/qc/' + base + '/' + base + '_clusters.fasta'

        if os.path.exists(csv_path):
            out_lines = []
            temp_line = ''
            with open(csv_path, 'r') as csv_file:
                for line in csv_file:
                    cols = line.split(",")
                    out_lines.append(temp_line)
                    temp_line = ">" + cols[0] + "\n" + cols[1] + "\n"

            out_lines.append(temp_line)

            with open(output_path, 'w') as csv_out:
                csv_out.write(''.join(out_lines)[13:])
                
'''
Wrapper function to run cutadapt
'''
def remove_primers(base_name):
    for base in base_name:
        file_path = './results/qc/' + base + '/' + base + '_clusters.fasta'
        out_path = './results/qc/' + base + '/' + base + '_clusters_cut.fasta'
        if os.path.exists(file_path):
            command = [
                'cutadapt',
                '-a', 'CAGCAGCCGCGGTAATTCC;max_error_rate=0.20',
                '-g', 'CCCGTGTTGAGTCAAATTAAGC;max_error_rate=0.20',
                '--revcomp',
                '-o', out_path,
                file_path
            ]
            subprocess.run(command)

'''
Wrapper function to run blastn
'''                        
def blast(base_name):
    for base in base_name:
        file_path = './results/qc/' + base + '/' + base + '_clusters_cut.fasta'
        db = config.get('BLAST', 'db')
        if os.path.exists(file_path):
            print("Running blastn on", base)
            output_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn.csv'

            command = [
                "blastn",
                "-db", config.get('paths', 'path_to_blastdb'),
                "-query", file_path,
                "-task", "blastn",
                "-dust", "no",
                "-num_threads", str(config.get('BLAST', 'numthreads')),
                "-outfmt", "7 delim=, sseqid stitle qacc sacc evalue bitscore length pident",
                "-max_target_seqs", str(config.get('BLAST', 'mts')),
                "-perc_identity", str(config.get('BLAST', 'pct_ident')),
                "-out", output_csv
            ]

            subprocess.run(command)

'''
Function to handle the blastn output files and generate a concatenated table with the taxonomic annotations
'''
def make_output_file(base_name):
    db = config.get('BLAST', 'db')
    for base in base_name:
        input_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn.csv'
        output_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn2.csv'
        if os.path.exists(input_csv):
            with open(input_csv, 'r') as infile, open(output_csv, 'w') as outfile:
                for line in infile:
                    if not line.startswith('#'):
                        outfile.write(line)
                    
    for base in base_name:
        input_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn2.csv'
        print(input_csv)
        output_csv = './results/qc/' + base + '/' + base + '_' + db + '_ASV.csv'
        if os.path.exists(input_csv) and os.path.getsize(input_csv) > 0:
            # load file
            df = pd.read_csv(input_csv, sep=',')

            # add column names
            df.columns=['accession', 'taxonomic_annotation', 'cluster', 'accession', 'evalue', 'bitscore', 'alignment_length', 'percentage_identity']

            # select only rows with alignment length >= 500 bp
            df2 = df[df['alignment_length'] >= 500]

            # arrange rows by match percentage
            df3 = df2.sort_values(by=['percentage_identity'], ascending=False)

            # keep only first row of each ASV
            df4 = df3.drop_duplicates(subset=['cluster'], keep='first', inplace=False, ignore_index=False)

            # add sample name information
            df4['#sample_name'] = base

            df4['taxonomy'] = df4['taxonomic_annotation'].replace('"', '')

            df5 = df4[['#sample_name', 'cluster', 'accession', 'evalue', 'bitscore', 'alignment_length', 'percentage_identity', 'taxonomic_annotation']]

            df5.to_csv(output_csv, sep=';', index=False, header=False)

    intermediate = '_' + db + '_eDNA.csv'
    final = db + '_eDNA.csv'
    if os.path.exists(intermediate):
        os.remove(intermediate)

    for base in base_name:
        file_path = './results/qc/' + base + '/' + base + '_' + db + '_ASV.csv'
        if os.path.exists(file_path):
            with open(file_path, "r") as input_file, open(intermediate, "a") as output_file:
                output_file.write(input_file.read())

    with open(intermediate, "r") as input_file, open(final, "w") as output_file:
        output_file.write("counts,cluster,accession,accession,evalue,bitscore,alignment_length,percentage_identity,taxonomic_annotation\n")
        for line in input_file:
            if not line.startswith("#"):
                output_file.write(line.replace(";", ",").replace("|", ","))

## Actual execution of the workflow

In [52]:

# Load paths to directories where fastq files are located
fastq_dir = fastq_directory_paths()
print(fastq_dir)

# Extract sample names from data
base_name = basename(fastq_dir)
print(base_name)

# Count number of raw sequences per sample
print(accumulate_counts(fastq_dir))

# Filter sequences using NanoFilt
filtering(fastq_dir, config.get('NanoFilt', 'minlength'), config.get('NanoFilt', 'maxlength'), config.get('NanoFilt', 'qscore'), base_name)

# Concatenate filtered sequences to one file per sample
concatenate(base_name)
        
# Count number of sequences per sample after filtering
print(count_sequences_concat(base_name))

# Convert fasta files to csv
fasta2csv(base_name)

# Make sure you save the original working directory before using moving around directories
wdir = os.getcwd()
print(wdir)

# Run the actual clustering
if __name__ == '__main__':
    num_processes = 8  # Number of available CPU cores 
    with multiprocessing.Pool(processes=num_processes) as pool:
        pool.map(cluster, base_name)

# Move back to original working directory after clustering
os.chdir(wdir)

# Convert csv files to fasta
csv2fasta(base_name)

# Remove primers using a wrapper function for cutadapt
remove_primers(base_name)

# Taxonomic annotation using blastn
blast(base_name)

# Handle the blastn output files and generate a concatenated table with the taxonomic annotations
make_output_file(base_name)

['../data/barcode01', '../data/barcode02', '../data/barcode03']
['barcode01', 'barcode02', 'barcode03']
{'../data/barcode01': 28513, '../data/barcode02': 28554, '../data/barcode03': 28469}
{'barcode01': 25040, 'barcode02': 26721, 'barcode03': 20929}
/home/pascal/Documents/git_projects/grifo/src_0.4
pid[178468] 2023-06-24 23:44:04.481 INFO: check_toolchain: /home/pascal/anaconda3/envs/GEANS/bin/minimap2 found
pid[178469] 2023-06-24 23:44:04.481 INFO: check_toolchain: /home/pascal/anaconda3/envs/GEANS/bin/minimap2 found
pid[178468] 2023-06-24 23:44:04.481 INFO: check_toolchain: /usr/bin/bwa found
pid[178469] 2023-06-24 23:44:04.481 INFO: check_toolchain: /usr/bin/bwa found
pid[178468] 2023-06-24 23:44:04.481 INFO: check_toolchain: /usr/bin/bowtie2 found
pid[178469] 2023-06-24 23:44:04.481 INFO: check_toolchain: /usr/bin/bowtie2 found
pid[178468] 2023-06-24 23:44:04.481 INFO: check_toolchain: /home/pascal/anaconda3/envs/GEANS/bin/spoa found
pid[178469] 2023-06-24 23:44:04.481 INFO: check_

pid[178467] 2023-06-24 23:44:04.731 INFO: cluster_sample: qlen=26721 progress=0/5
pid[178467] 2023-06-24 23:44:04.734 INFO: Making directory ./clusters/


[M::mm_idx_gen::0.000*9.63] collected minimizers
[M::mm_idx_gen::0.001*4.92] sorted minimizers
[M::main::0.001*4.86] loaded/built the index for 0 target sequence(s)
[M::mm_mapopt_update::0.001*4.74] mid_occ = 0
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 0
[M::mm_idx_stat::0.001*4.65] distinct minimizers: 0 (-nan% are singletons); average occurrences: -nan; average spacing: -nan
[M::worker_pipeline::0.085*2.43] mapped 20929 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.086 sec; CPU: 0.207 sec; Peak RSS: 0.116 GB
[M::mm_idx_gen::0.000*11.37] collected minimizers
[M::mm_idx_gen::0.001*5.56] sorted minimizers
[M::main::0.001*5.51] loaded/built the index for 0 target sequence(s)
[M::mm_mapopt_update::0.001*5.38] mid_occ = 0
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 0
[M::mm_idx_stat::0.001*5.29] distinct minimizers: 0 (-nan% are

pid[178468] 2023-06-24 23:44:05.652 INFO: cluster_sample: qlen=20929 progress=1/5
pid[178469] 2023-06-24 23:44:05.825 INFO: cluster_sample: qlen=25040 progress=1/5


[M::worker_pipeline::0.108*2.35] mapped 25040 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.108 sec; CPU: 0.254 sec; Peak RSS: 0.120 GB
[M::mm_idx_gen::0.001*9.81] collected minimizers
[M::mm_idx_gen::0.001*5.71] sorted minimizers
[M::main::0.001*5.70] loaded/built the index for 0 target sequence(s)
[M::mm_mapopt_update::0.001*5.53] mid_occ = 1725152489
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 0
[M::mm_idx_stat::0.001*5.42] distinct minimizers: 0 (-nan% are singletons); average occurrences: -nan; average spacing: -nan
[M::worker_pipeline::0.119*2.49] mapped 26721 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.119 sec; CPU: 0.297 sec; Peak RSS: 0.122 GB


pid[178467] 2023-06-24 23:44:06.216 INFO: cluster_sample: qlen=26721 progress=1/5


[M::mm_idx_gen::0.000*7.05] collected minimizers
[M::mm_idx_gen::0.001*4.48] sorted minimizers
[M::main::0.001*4.44] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.001*4.32] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.001*4.25] distinct minimizers: 122 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.262
[M::mm_idx_gen::0.000*7.94] collected minimizers
[M::mm_idx_gen::0.001*4.64] sorted minimizers
[M::main::0.001*4.63] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.001*4.43] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.001*4.28] distinct minimizers: 125 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.680
[M::mm_idx_gen::0.000*10.49] collected minimizers
[M::mm_idx_gen::0.001*5.52] sorted minimizers
[M::main::0.001*5.50] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.001*

pid[178468] 2023-06-24 23:44:08.934 INFO: cluster_sample: qlen=5299 progress=2/5


[M::mm_idx_gen::0.001*4.07] collected minimizers
[M::mm_idx_gen::0.002*3.55] sorted minimizers
[M::main::0.002*3.54] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.002*3.47] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.002*3.43] distinct minimizers: 138 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.217
[M::worker_pipeline::0.231*2.88] mapped 5299 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.232 sec; CPU: 0.667 sec; Peak RSS: 0.151 GB


pid[178468] 2023-06-24 23:44:09.685 INFO: cluster_sample: qlen=4760 progress=3/5
pid[178469] 2023-06-24 23:44:09.776 INFO: cluster_sample: qlen=9101 progress=2/5
pid[178467] 2023-06-24 23:44:09.838 INFO: cluster_sample: qlen=18970 progress=2/5


[M::mm_idx_gen::0.001*5.69] collected minimizers
[M::mm_idx_gen::0.001*4.19] sorted minimizers
[M::main::0.001*4.16] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.001*4.08] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.001*4.02] distinct minimizers: 120 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.450
[M::worker_pipeline::0.099*2.78] mapped 4760 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.099 sec; CPU: 0.275 sec; Peak RSS: 0.150 GB


pid[178468] 2023-06-24 23:44:10.098 INFO: cluster_sample: qlen=4680 progress=4/5


[M::mm_idx_gen::0.001*3.84] collected minimizers
[M::mm_idx_gen::0.002*3.46] sorted minimizers
[M::main::0.002*3.44] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.002*3.39] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.002*3.36] distinct minimizers: 113 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.460
[M::mm_idx_gen::0.000*6.87] collected minimizers
[M::mm_idx_gen::0.001*4.46] sorted minimizers
[M::main::0.001*4.45] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.001*4.36] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.001*4.29] distinct minimizers: 111 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.730
[M::worker_pipeline::0.086*2.81] mapped 4680 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/rea

pid[178468] 2023-06-24 23:44:10.481 INFO: perform_cluster: iter = 0/5


[M::mm_idx_gen::0.001*4.93] collected minimizers
[M::mm_idx_gen::0.002*4.02] sorted minimizers
[M::main::0.002*4.02] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.002*3.96] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.002*3.92] distinct minimizers: 112 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.545


pid[178469] 2023-06-24 23:44:11.035 INFO: cluster_sample: qlen=7465 progress=3/5


[M::worker_pipeline::0.376*2.83] mapped 18970 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.376 sec; CPU: 1.064 sec; Peak RSS: 0.149 GB
[M::mm_idx_gen::0.001*6.66] collected minimizers
[M::mm_idx_gen::0.001*4.69] sorted minimizers
[M::main::0.001*4.68] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.002*4.59] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.002*4.53] distinct minimizers: 130 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.362
[M::mm_idx_gen::0.001*5.59] collected minimizers
[M::mm_idx_gen::0.001*4.27] sorted minimizers
[M::main::0.001*4.26] loaded/built the index for 5 target sequence(s)
[M::mm_mapopt_update::0.001*4.11] mid_occ = 5
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 5
[M::mm_idx_stat::0.001*4.02] distinct minimizers: 542 (91

pid[178467] 2023-06-24 23:44:11.416 INFO: cluster_sample: qlen=18205 progress=3/5


[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.154 sec; CPU: 0.370 sec; Peak RSS: 0.129 GB
[M::worker_pipeline::0.441*2.91] mapped 7465 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.441 sec; CPU: 1.284 sec; Peak RSS: 0.162 GB


pid[178469] 2023-06-24 23:44:12.192 INFO: cluster_sample: qlen=7082 progress=4/5


[M::mm_idx_gen::0.001*6.54] collected minimizers
[M::mm_idx_gen::0.001*4.43] sorted minimizers
[M::main::0.001*4.42] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.001*4.30] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.001*4.22] distinct minimizers: 114 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.596
[M::mm_idx_gen::0.001*7.35] collected minimizers
[M::mm_idx_gen::0.001*4.94] sorted minimizers
[M::main::0.001*4.93] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.002*4.82] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.002*4.75] distinct minimizers: 115 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.409
[M::worker_pipeline::0.328*2.87] mapped 7082 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/rea

pid[178469] 2023-06-24 23:44:13.231 INFO: perform_cluster: iter = 0/5
pid[178467] 2023-06-24 23:44:14.029 INFO: cluster_sample: qlen=16150 progress=4/5


[M::mm_idx_gen::0.001*5.06] collected minimizers
[M::mm_idx_gen::0.002*3.95] sorted minimizers
[M::main::0.002*3.93] loaded/built the index for 5 target sequence(s)
[M::mm_mapopt_update::0.002*3.84] mid_occ = 6
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 5
[M::mm_idx_stat::0.002*3.78] distinct minimizers: 521 (90.02% are singletons); average occurrences: 1.142; average spacing: 5.509
[M::worker_pipeline::0.167*2.47] mapped 25040 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.167 sec; CPU: 0.412 sec; Peak RSS: 0.136 GB


pid[178468] 2023-06-24 23:44:14.701 INFO: cluster_eval: number of clusters = 5


[M::mm_idx_gen::0.001*5.69] collected minimizers
[M::mm_idx_gen::0.002*4.32] sorted minimizers
[M::main::0.002*4.30] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.002*4.23] mid_occ = 2
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.002*4.13] distinct minimizers: 122 (100.00% are singletons); average occurrences: 1.000; average spacing: 5.303
[M::worker_pipeline::0.655*2.89] mapped 16150 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.655 sec; CPU: 1.891 sec; Peak RSS: 0.149 GB


pid[178468] 2023-06-24 23:44:15.569 INFO: cluster_split: splitting on cid=cluster0 0/5
pid[178468] 2023-06-24 23:44:15.584 INFO: cluster_compute: computing pairwise distance matrix


[M::mm_idx_gen::0.024*1.18] collected minimizers
[M::mm_idx_gen::0.030*1.53] sorted minimizers
[M::main::0.030*1.53] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.031*1.51] mid_occ = 964
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.032*1.50] distinct minimizers: 69324 (80.29% are singletons); average occurrences: 3.485; average spacing: 5.400


pid[178467] 2023-06-24 23:44:16.124 INFO: perform_cluster: iter = 0/5


[M::worker_pipeline::1.423*2.93] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.427 sec; CPU: 4.177 sec; Peak RSS: 0.150 GB
[M::mm_idx_gen::0.001*5.97] collected minimizers
[M::mm_idx_gen::0.002*4.51] sorted minimizers
[M::main::0.002*4.49] loaded/built the index for 5 target sequence(s)
[M::mm_mapopt_update::0.002*4.33] mid_occ = 5
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 5
[M::mm_idx_stat::0.002*4.25] distinct minimizers: 536 (94.78% are singletons); average occurrences: 1.067; average spacing: 5.493
[M::worker_pipeline::0.150*2.30] mapped 26721 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.150 sec; CPU: 0.345 sec; Peak RSS: 0.134 GB


pid[178469] 2023-06-24 23:44:17.761 INFO: cluster_eval: number of clusters = 5
pid[178468] 2023-06-24 23:44:18.266 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:44:18.296 INFO: cluster_split: splitting on cid=cluster0 0/5
pid[178469] 2023-06-24 23:44:18.309 INFO: cluster_compute: computing pairwise distance matrix


[M::mm_idx_gen::0.025*1.18] collected minimizers
[M::mm_idx_gen::0.032*1.54] sorted minimizers
[M::main::0.032*1.54] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.034*1.51] mid_occ = 993
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.035*1.50] distinct minimizers: 62513 (80.39% are singletons); average occurrences: 3.738; average spacing: 5.518


pid[178468] 2023-06-24 23:44:18.655 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:44:18.656 INFO: Running OPTICS
pid[178468] 2023-06-24 23:44:18.682 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:44:18.682 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178468] 2023-06-24 23:44:18.810 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178468] 2023-06-24 23:44:18.811 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178468] 2023-06-24 23:44:18.911 INFO: clust_OPTICS: clusters=1 outliers=1887 delta=500
pid[178468] 2023-06-24 23:44:18.911 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178468] 2023-06-24 23:44:18.973 INFO: clust_OPTICS: clusters=1 outliers=1866 delta=250
pid[178468] 2023-06-24 23:44:18.973 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178468] 2023-06-24 23:44:19.044 INFO: clust_OPTICS: clusters=1 outliers=1594 delta=125
pid[178468] 2023-06-24 23:44:19.044 INFO: clust_OPTICS: iter=4 using min_samples=63
pid[178468] 2023-06-24

[M::worker_pipeline::1.509*2.93] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.515 sec; CPU: 4.427 sec; Peak RSS: 0.165 GB


pid[178468] 2023-06-24 23:44:20.289 INFO: clust_OPTICS: clusters=2 outliers=292 delta=8
pid[178468] 2023-06-24 23:44:20.289 INFO: clust_OPTICS: iter=11 using min_samples=21
pid[178468] 2023-06-24 23:44:20.528 INFO: clust_OPTICS: clusters=1 outliers=204 delta=4
pid[178468] 2023-06-24 23:44:20.528 INFO: clust_OPTICS: iter=12 using min_samples=27
pid[178468] 2023-06-24 23:44:20.732 INFO: clust_OPTICS: clusters=2 outliers=308 delta=2
pid[178468] 2023-06-24 23:44:20.732 INFO: clust_OPTICS: iter=13 using min_samples=30
pid[178468] 2023-06-24 23:44:20.896 INFO: clust_OPTICS: clusters=2 outliers=328 delta=-3
pid[178468] 2023-06-24 23:44:20.896 INFO: clust_OPTICS: iter=14 using min_samples=30
pid[178467] 2023-06-24 23:44:21.088 INFO: cluster_eval: number of clusters = 5
pid[178469] 2023-06-24 23:44:21.123 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:44:21.125 INFO: clust_OPTICS: clusters=2 outliers=328 delta=-3
pid[178468] 2023-06-24 23:44:21.126 INFO: clust_OPTICS: iter=15 using 

[M::mm_idx_gen::0.025*1.20] collected minimizers
[M::mm_idx_gen::0.032*1.56] sorted minimizers
[M::main::0.032*1.56] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.033*1.53] mid_occ = 574
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.034*1.52] distinct minimizers: 87698 (81.99% are singletons); average occurrences: 2.774; average spacing: 5.437


pid[178469] 2023-06-24 23:44:21.958 INFO: clust_OPTICS: clusters=1 outliers=237 delta=62
pid[178469] 2023-06-24 23:44:21.958 INFO: clust_OPTICS: iter=5 using min_samples=156
pid[178469] 2023-06-24 23:44:22.025 INFO: clust_OPTICS: clusters=4 outliers=1239 delta=31
pid[178469] 2023-06-24 23:44:22.025 INFO: clust_OPTICS: iter=6 using min_samples=202
pid[178469] 2023-06-24 23:44:22.085 INFO: clust_OPTICS: clusters=1 outliers=1569 delta=-46
pid[178469] 2023-06-24 23:44:22.085 INFO: clust_OPTICS: iter=7 using min_samples=133
pid[178469] 2023-06-24 23:44:22.151 INFO: clust_OPTICS: clusters=4 outliers=1012 delta=-23
pid[178469] 2023-06-24 23:44:22.151 INFO: clust_OPTICS: iter=8 using min_samples=99
pid[178467] 2023-06-24 23:44:22.216 INFO: cluster_split: splitting on cid=cluster0 0/5
pid[178467] 2023-06-24 23:44:22.235 INFO: cluster_compute: computing pairwise distance matrix
pid[178469] 2023-06-24 23:44:22.238 INFO: clust_OPTICS: clusters=4 outliers=633 delta=34
pid[178469] 2023-06-24 23:44:2

[M::mm_idx_gen::0.024*1.20] collected minimizers
[M::mm_idx_gen::0.031*1.57] sorted minimizers
[M::main::0.031*1.57] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.033*1.54] mid_occ = 896
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.034*1.53] distinct minimizers: 64089 (79.97% are singletons); average occurrences: 3.690; average spacing: 5.422
[M::worker_pipeline::0.877*2.86] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.882 sec; CPU: 2.516 sec; Peak RSS: 0.140 GB


pid[178469] 2023-06-24 23:44:22.475 INFO: clust_OPTICS: clusters=4 outliers=921 delta=-12
pid[178469] 2023-06-24 23:44:22.475 INFO: clust_OPTICS: iter=12 using min_samples=119
pid[178469] 2023-06-24 23:44:22.556 INFO: clust_OPTICS: clusters=4 outliers=921 delta=-12
pid[178469] 2023-06-24 23:44:22.556 INFO: clust_OPTICS: iter=13 using min_samples=125
pid[178469] 2023-06-24 23:44:22.631 INFO: clust_OPTICS: clusters=4 outliers=1012 delta=-6
pid[178469] 2023-06-24 23:44:22.631 INFO: clust_OPTICS: iter=14 using min_samples=125
pid[178469] 2023-06-24 23:44:22.710 INFO: clust_OPTICS: clusters=4 outliers=1012 delta=-6
pid[178469] 2023-06-24 23:44:22.710 INFO: clust_OPTICS: iter=15 using min_samples=128
pid[178469] 2023-06-24 23:44:22.782 INFO: clust_OPTICS: clusters=4 outliers=1012 delta=-3
pid[178469] 2023-06-24 23:44:22.782 INFO: clust_OPTICS: iter=16 using min_samples=129
pid[178469] 2023-06-24 23:44:22.851 INFO: clust_OPTICS: clusters=4 outliers=1012 delta=-1
pid[178469] 2023-06-24 23:44:2

[M::mm_idx_gen::0.025*1.19] collected minimizers
[M::mm_idx_gen::0.034*1.61] sorted minimizers
[M::main::0.034*1.61] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.035*1.58] mid_occ = 942
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.036*1.57] distinct minimizers: 67512 (80.29% are singletons); average occurrences: 3.444; average spacing: 5.514


pid[178468] 2023-06-24 23:44:23.659 INFO: preparing precomputed data


[M::worker_pipeline::1.452*2.94] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.458 sec; CPU: 4.271 sec; Peak RSS: 0.144 GB


pid[178468] 2023-06-24 23:44:24.079 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:44:24.079 INFO: Running OPTICS
pid[178468] 2023-06-24 23:44:24.097 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:44:24.098 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178468] 2023-06-24 23:44:24.228 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178468] 2023-06-24 23:44:24.228 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178468] 2023-06-24 23:44:24.308 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500
pid[178468] 2023-06-24 23:44:24.308 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178468] 2023-06-24 23:44:24.368 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=250
pid[178468] 2023-06-24 23:44:24.368 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178468] 2023-06-24 23:44:24.422 INFO: clust_OPTICS: clusters=1 outliers=1998 delta=125
pid[178468] 2023-06-24 23:44:24.422 INFO: clust_OPTICS: iter=4 using min_samples=63
pid[178468] 2023-06-24

[M::worker_pipeline::1.276*2.92] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.280 sec; CPU: 3.734 sec; Peak RSS: 0.150 GB


pid[178468] 2023-06-24 23:44:24.688 INFO: clust_OPTICS: clusters=3 outliers=568 delta=31
pid[178468] 2023-06-24 23:44:24.688 INFO: clust_OPTICS: iter=6 using min_samples=78
pid[178468] 2023-06-24 23:44:24.747 INFO: clust_OPTICS: clusters=3 outliers=1911 delta=15
pid[178468] 2023-06-24 23:44:24.747 INFO: clust_OPTICS: iter=7 using min_samples=78
pid[178468] 2023-06-24 23:44:24.806 INFO: clust_OPTICS: clusters=3 outliers=1911 delta=15
pid[178468] 2023-06-24 23:44:24.806 INFO: clust_OPTICS: iter=8 using min_samples=101
pid[178468] 2023-06-24 23:44:24.865 INFO: clust_OPTICS: clusters=3 outliers=1911 delta=-23
pid[178468] 2023-06-24 23:44:24.866 INFO: clust_OPTICS: iter=9 using min_samples=112
pid[178468] 2023-06-24 23:44:24.920 INFO: clust_OPTICS: clusters=2 outliers=1993 delta=-11
pid[178468] 2023-06-24 23:44:24.920 INFO: clust_OPTICS: iter=10 using min_samples=95
pid[178468] 2023-06-24 23:44:24.979 INFO: clust_OPTICS: clusters=3 outliers=1911 delta=-6
pid[178468] 2023-06-24 23:44:24.979 

[M::mm_idx_gen::0.003*2.32] collected minimizers
[M::mm_idx_gen::0.004*2.46] sorted minimizers
[M::main::0.004*2.45] loaded/built the index for 174 target sequence(s)
[M::mm_mapopt_update::0.005*2.32] mid_occ = 76
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 174
[M::mm_idx_stat::0.005*2.24] distinct minimizers: 12570 (84.61% are singletons); average occurrences: 1.672; average spacing: 5.458
[M::worker_pipeline::0.041*2.79] mapped 174 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.042 sec; CPU: 0.116 sec; Peak RSS: 0.141 GB


pid[178468] 2023-06-24 23:44:25.486 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:44:25.519 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:44:25.519 INFO: Running OPTICS
pid[178468] 2023-06-24 23:44:25.520 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:44:25.520 INFO: clust_OPTICS: iter=0 using min_samples=87
pid[178468] 2023-06-24 23:44:25.522 INFO: clust_OPTICS: clusters=0 outliers=174 delta=43.5
pid[178468] 2023-06-24 23:44:25.522 INFO: clust_OPTICS: iter=1 using min_samples=44
pid[178468] 2023-06-24 23:44:25.525 INFO: clust_OPTICS: clusters=1 outliers=132 delta=43
pid[178468] 2023-06-24 23:44:25.525 INFO: clust_OPTICS: iter=2 using min_samples=23
pid[178468] 2023-06-24 23:44:25.527 INFO: clust_OPTICS: clusters=1 outliers=101 delta=21
pid[178468] 2023-06-24 23:44:25.527 INFO: clust_OPTICS: iter=3 using min_samples=13
pid[178468] 2023-06-24 23:44:25.532 INFO: clust_OPTICS: clusters=2 outliers=48 delta=10
pid[178468] 2023-06-24 23:44:25.532 INFO: clust_OPT

[M::mm_idx_gen::0.025*1.18] collected minimizers
[M::mm_idx_gen::0.032*1.54] sorted minimizers
[M::main::0.032*1.54] loaded/built the index for 1922 target sequence(s)
[M::mm_mapopt_update::0.033*1.52] mid_occ = 973
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1922
[M::mm_idx_stat::0.034*1.50] distinct minimizers: 69795 (81.21% are singletons); average occurrences: 3.338; average spacing: 5.396


pid[178467] 2023-06-24 23:44:25.917 INFO: clust_OPTICS: clusters=2 outliers=843 delta=125
pid[178467] 2023-06-24 23:44:25.917 INFO: clust_OPTICS: iter=4 using min_samples=63
pid[178469] 2023-06-24 23:44:26.002 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:44:26.002 INFO: Running OPTICS
pid[178469] 2023-06-24 23:44:26.024 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:44:26.024 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178467] 2023-06-24 23:44:26.043 INFO: clust_OPTICS: clusters=2 outliers=450 delta=62
pid[178467] 2023-06-24 23:44:26.043 INFO: clust_OPTICS: iter=5 using min_samples=32
pid[178469] 2023-06-24 23:44:26.186 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178469] 2023-06-24 23:44:26.186 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178469] 2023-06-24 23:44:26.286 INFO: clust_OPTICS: clusters=1 outliers=1368 delta=500
pid[178469] 2023-06-24 23:44:26.286 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178467] 2023-06-24 23:

[M::worker_pipeline::1.384*2.93] mapped 1922 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.387 sec; CPU: 4.055 sec; Peak RSS: 0.153 GB


pid[178469] 2023-06-24 23:44:27.448 INFO: clust_OPTICS: clusters=2 outliers=479 delta=-11
pid[178469] 2023-06-24 23:44:27.448 INFO: clust_OPTICS: iter=9 using min_samples=33
pid[178469] 2023-06-24 23:44:27.629 INFO: clust_OPTICS: clusters=2 outliers=315 delta=-6
pid[178469] 2023-06-24 23:44:27.629 INFO: clust_OPTICS: iter=10 using min_samples=25
pid[178467] 2023-06-24 23:44:27.630 INFO: clust_OPTICS: clusters=2 outliers=30 delta=7
pid[178467] 2023-06-24 23:44:27.630 INFO: clust_OPTICS: iter=8 using min_samples=20
pid[178469] 2023-06-24 23:44:27.871 INFO: clust_OPTICS: clusters=3 outliers=254 delta=8
pid[178469] 2023-06-24 23:44:27.871 INFO: clust_OPTICS: iter=11 using min_samples=21
pid[178467] 2023-06-24 23:44:28.002 INFO: clust_OPTICS: clusters=2 outliers=127 delta=3
pid[178467] 2023-06-24 23:44:28.002 INFO: clust_OPTICS: iter=9 using min_samples=20
pid[178469] 2023-06-24 23:44:28.198 INFO: clust_OPTICS: clusters=3 outliers=182 delta=4
pid[178469] 2023-06-24 23:44:28.198 INFO: clust_

[M::mm_idx_gen::0.024*1.30] collected minimizers
[M::mm_idx_gen::0.030*1.62] sorted minimizers
[M::main::0.030*1.62] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.032*1.59] mid_occ = 907
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.033*1.57] distinct minimizers: 69332 (80.63% are singletons); average occurrences: 3.420; average spacing: 5.477


pid[178468] 2023-06-24 23:44:29.334 INFO: clust_OPTICS: clusters=1 outliers=250 delta=60
pid[178468] 2023-06-24 23:44:29.335 INFO: clust_OPTICS: iter=5 using min_samples=31
pid[178468] 2023-06-24 23:44:29.536 INFO: clust_OPTICS: clusters=1 outliers=139 delta=30
pid[178468] 2023-06-24 23:44:29.536 INFO: clust_OPTICS: iter=6 using min_samples=16
pid[178467] 2023-06-24 23:44:29.557 INFO: clust_OPTICS: clusters=2 outliers=189 delta=-2
pid[178467] 2023-06-24 23:44:29.557 INFO: clust_OPTICS: iter=14 using min_samples=28
pid[178467] 2023-06-24 23:44:29.841 INFO: clust_OPTICS: clusters=2 outliers=193 delta=-1
pid[178467] 2023-06-24 23:44:29.841 INFO: clust_OPTICS: iter=15 using min_samples=28
pid[178468] 2023-06-24 23:44:29.986 INFO: clust_OPTICS: clusters=1 outliers=85 delta=15
pid[178468] 2023-06-24 23:44:29.986 INFO: clust_OPTICS: iter=7 using min_samples=9
pid[178467] 2023-06-24 23:44:30.127 INFO: clust_OPTICS: clusters=2 outliers=193 delta=-1
pid[178467] 2023-06-24 23:44:30.127 INFO: n_cl

[M::mm_idx_gen::0.008*1.65] collected minimizers
[M::mm_idx_gen::0.011*1.91] sorted minimizers
[M::main::0.011*1.91] loaded/built the index for 459 target sequence(s)
[M::mm_mapopt_update::0.011*1.87] mid_occ = 213
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 459
[M::mm_idx_stat::0.012*1.84] distinct minimizers: 21900 (82.75% are singletons); average occurrences: 2.486; average spacing: 5.455
[M::worker_pipeline::0.156*2.84] mapped 459 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.158 sec; CPU: 0.445 sec; Peak RSS: 0.147 GB
[M::worker_pipeline::1.363*2.92] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.374 sec; CPU: 3.995 sec; Peak RSS: 0.151 GB


pid[178467] 2023-06-24 23:44:30.700 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:44:30.783 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:44:30.784 INFO: Running OPTICS
pid[178467] 2023-06-24 23:44:30.785 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:44:30.785 INFO: clust_OPTICS: iter=0 using min_samples=230
pid[178467] 2023-06-24 23:44:30.793 INFO: clust_OPTICS: clusters=0 outliers=459 delta=115.0
pid[178467] 2023-06-24 23:44:30.794 INFO: clust_OPTICS: iter=1 using min_samples=115
pid[178467] 2023-06-24 23:44:30.801 INFO: clust_OPTICS: clusters=1 outliers=435 delta=115
pid[178467] 2023-06-24 23:44:30.801 INFO: clust_OPTICS: iter=2 using min_samples=58
pid[178467] 2023-06-24 23:44:30.807 INFO: clust_OPTICS: clusters=1 outliers=377 delta=57
pid[178467] 2023-06-24 23:44:30.807 INFO: clust_OPTICS: iter=3 using min_samples=30
pid[178468] 2023-06-24 23:44:30.811 INFO: clust_OPTICS: clusters=1 outliers=22 delta=7
pid[178468] 2023-06-24 23:44:30.811 INFO: clust_

[M::mm_idx_gen::0.023*1.22] collected minimizers
[M::mm_idx_gen::0.030*1.57] sorted minimizers
[M::main::0.030*1.57] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.031*1.54] mid_occ = 1024
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.032*1.53] distinct minimizers: 62806 (80.09% are singletons); average occurrences: 3.720; average spacing: 5.475


pid[178469] 2023-06-24 23:44:31.634 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:44:32.004 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:44:32.004 INFO: Running OPTICS
pid[178469] 2023-06-24 23:44:32.026 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:44:32.026 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178469] 2023-06-24 23:44:32.157 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178469] 2023-06-24 23:44:32.157 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178469] 2023-06-24 23:44:32.245 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500
pid[178469] 2023-06-24 23:44:32.245 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178469] 2023-06-24 23:44:32.318 INFO: clust_OPTICS: clusters=1 outliers=1688 delta=250
pid[178469] 2023-06-24 23:44:32.318 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178469] 2023-06-24 23:44:32.404 INFO: clust_OPTICS: clusters=1 outliers=1069 delta=125
pid[178469] 2023-06-24 23:44:32.404 I

[M::worker_pipeline::1.478*2.94] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.483 sec; CPU: 4.346 sec; Peak RSS: 0.156 GB


pid[178469] 2023-06-24 23:44:32.748 INFO: clust_OPTICS: clusters=1 outliers=255 delta=31
pid[178469] 2023-06-24 23:44:32.748 INFO: clust_OPTICS: iter=6 using min_samples=17
pid[178469] 2023-06-24 23:44:33.190 INFO: clust_OPTICS: clusters=3 outliers=133 delta=15
pid[178469] 2023-06-24 23:44:33.190 INFO: clust_OPTICS: iter=7 using min_samples=10
pid[178468] 2023-06-24 23:44:33.708 INFO: clust_OPTICS: clusters=2 outliers=9 delta=3
pid[178468] 2023-06-24 23:44:33.708 INFO: clust_OPTICS: iter=9 using min_samples=5
pid[178467] 2023-06-24 23:44:33.960 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:44:34.071 INFO: clust_OPTICS: clusters=2 outliers=80 delta=7
pid[178469] 2023-06-24 23:44:34.072 INFO: clust_OPTICS: iter=8 using min_samples=20
pid[178467] 2023-06-24 23:44:34.343 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:44:34.343 INFO: Running OPTICS
pid[178467] 2023-06-24 23:44:34.374 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:44:34.374 INFO: clust_OPTICS: it

[M::mm_idx_gen::0.024*1.25] collected minimizers
[M::mm_idx_gen::0.031*1.58] sorted minimizers
[M::main::0.031*1.58] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.032*1.55] mid_occ = 1012
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.033*1.54] distinct minimizers: 67212 (80.40% are singletons); average occurrences: 3.622; average spacing: 5.373


pid[178469] 2023-06-24 23:44:37.996 INFO: clust_OPTICS: clusters=3 outliers=109 delta=1
pid[178469] 2023-06-24 23:44:37.996 INFO: clust_OPTICS: iter=15 using min_samples=15
pid[178467] 2023-06-24 23:44:38.118 INFO: clust_OPTICS: clusters=5 outliers=117 delta=-3
pid[178467] 2023-06-24 23:44:38.118 INFO: clust_OPTICS: iter=11 using min_samples=13
pid[178469] 2023-06-24 23:44:38.586 INFO: clust_OPTICS: clusters=3 outliers=110 delta=-1
pid[178469] 2023-06-24 23:44:38.587 INFO: clust_OPTICS: iter=16 using min_samples=15
pid[178467] 2023-06-24 23:44:38.766 INFO: clust_OPTICS: clusters=2 outliers=83 delta=4
pid[178467] 2023-06-24 23:44:38.766 INFO: clust_OPTICS: iter=12 using min_samples=19
pid[178469] 2023-06-24 23:44:39.163 INFO: clust_OPTICS: clusters=3 outliers=110 delta=-1
pid[178469] 2023-06-24 23:44:39.164 INFO: n_clusters=3 n_unclustered=103 N=2000
pid[178469] 2023-06-24 23:44:39.174 INFO: Making directory ./clusters/
pid[178467] 2023-06-24 23:44:39.208 INFO: clust_OPTICS: clusters=5 

[M::worker_pipeline::1.716*2.94] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.720 sec; CPU: 5.047 sec; Peak RSS: 0.189 GB
[M::mm_idx_gen::0.023*1.25] collected minimizers
[M::mm_idx_gen::0.029*1.58] sorted minimizers
[M::main::0.029*1.58] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.031*1.55] mid_occ = 924
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.032*1.54] distinct minimizers: 67565 (80.51% are singletons); average occurrences: 3.455; average spacing: 5.480


pid[178467] 2023-06-24 23:44:39.520 INFO: clust_OPTICS: clusters=4 outliers=162 delta=-3
pid[178467] 2023-06-24 23:44:39.520 INFO: clust_OPTICS: iter=14 using min_samples=17
pid[178467] 2023-06-24 23:44:39.916 INFO: clust_OPTICS: clusters=5 outliers=117 delta=-2
pid[178467] 2023-06-24 23:44:39.917 INFO: clust_OPTICS: iter=15 using min_samples=15
pid[178467] 2023-06-24 23:44:40.406 INFO: clust_OPTICS: clusters=2 outliers=91 delta=2
pid[178467] 2023-06-24 23:44:40.407 INFO: clust_OPTICS: iter=16 using min_samples=18
pid[178468] 2023-06-24 23:44:40.676 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:44:40.795 INFO: clust_OPTICS: clusters=5 outliers=127 delta=1
pid[178467] 2023-06-24 23:44:40.795 INFO: clust_OPTICS: iter=17 using min_samples=19


[M::worker_pipeline::1.322*2.92] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.327 sec; CPU: 3.870 sec; Peak RSS: 0.147 GB


pid[178468] 2023-06-24 23:44:41.069 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:44:41.069 INFO: Running OPTICS
pid[178468] 2023-06-24 23:44:41.087 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:44:41.087 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178467] 2023-06-24 23:44:41.158 INFO: clust_OPTICS: clusters=5 outliers=129 delta=-1
pid[178467] 2023-06-24 23:44:41.158 INFO: clust_OPTICS: iter=18 using min_samples=19
pid[178468] 2023-06-24 23:44:41.194 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178468] 2023-06-24 23:44:41.194 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178468] 2023-06-24 23:44:41.273 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500
pid[178468] 2023-06-24 23:44:41.274 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178468] 2023-06-24 23:44:41.350 INFO: clust_OPTICS: clusters=1 outliers=1103 delta=250
pid[178468] 2023-06-24 23:44:41.350 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178468] 2023-06-24 

[M::mm_idx_gen::0.025*1.21] collected minimizers
[M::mm_idx_gen::0.032*1.54] sorted minimizers
[M::main::0.032*1.54] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.033*1.52] mid_occ = 1052
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.034*1.50] distinct minimizers: 60106 (79.59% are singletons); average occurrences: 3.903; average spacing: 5.531


pid[178469] 2023-06-24 23:44:42.231 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:44:42.231 INFO: Running OPTICS
pid[178469] 2023-06-24 23:44:42.250 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:44:42.251 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178469] 2023-06-24 23:44:42.382 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178469] 2023-06-24 23:44:42.383 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178469] 2023-06-24 23:44:42.472 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500
pid[178469] 2023-06-24 23:44:42.472 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178469] 2023-06-24 23:44:42.543 INFO: clust_OPTICS: clusters=1 outliers=1815 delta=250
pid[178469] 2023-06-24 23:44:42.543 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178469] 2023-06-24 23:44:42.636 INFO: clust_OPTICS: clusters=1 outliers=794 delta=125
pid[178469] 2023-06-24 23:44:42.636 INFO: clust_OPTICS: iter=4 using min_samples=63
pid[178469] 2023-06-24 

[M::worker_pipeline::1.898*2.94] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.903 sec; CPU: 5.593 sec; Peak RSS: 0.148 GB
[M::mm_idx_gen::0.001*6.31] collected minimizers
[M::mm_idx_gen::0.002*4.78] sorted minimizers
[M::main::0.002*4.77] loaded/built the index for 13 target sequence(s)
[M::mm_mapopt_update::0.002*4.56] mid_occ = 13
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 13
[M::mm_idx_stat::0.002*4.39] distinct minimizers: 979 (69.97% are singletons); average occurrences: 1.754; average spacing: 5.411


pid[178469] 2023-06-24 23:44:44.089 INFO: clust_OPTICS: clusters=4 outliers=62 delta=7
pid[178469] 2023-06-24 23:44:44.089 INFO: clust_OPTICS: iter=8 using min_samples=7


[M::worker_pipeline::0.259*2.67] mapped 20929 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.260 sec; CPU: 0.693 sec; Peak RSS: 0.133 GB


pid[178467] 2023-06-24 23:44:45.084 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:44:45.487 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:44:45.487 INFO: Running OPTICS
pid[178467] 2023-06-24 23:44:45.507 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:44:45.507 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178467] 2023-06-24 23:44:45.619 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178467] 2023-06-24 23:44:45.620 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178467] 2023-06-24 23:44:45.706 INFO: clust_OPTICS: clusters=1 outliers=1859 delta=500
pid[178467] 2023-06-24 23:44:45.706 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178467] 2023-06-24 23:44:45.773 INFO: clust_OPTICS: clusters=1 outliers=1859 delta=250
pid[178467] 2023-06-24 23:44:45.773 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178467] 2023-06-24 23:44:45.851 INFO: clust_OPTICS: clusters=1 outliers=739 delta=125
pid[178467] 2023-06-24 23:44:45.852 IN

[M::mm_idx_gen::0.024*1.16] collected minimizers
[M::mm_idx_gen::0.030*1.50] sorted minimizers
[M::main::0.030*1.50] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.031*1.48] mid_occ = 1015
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.032*1.47] distinct minimizers: 61857 (79.66% are singletons); average occurrences: 3.772; average spacing: 5.481
[M::mm_idx_gen::0.025*1.19] collected minimizers
[M::mm_idx_gen::0.031*1.52] sorted minimizers
[M::main::0.031*1.52] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.033*1.50] mid_occ = 1136
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.033*1.49] distinct minimizers: 60299 (79.63% are singletons); average occurrences: 3.858; average spacing: 5.594


pid[178468] 2023-06-24 23:44:49.513 INFO: Running kmeans with n_clusters = 4
pid[178468] 2023-06-24 23:44:49.741 INFO: Getting results
pid[178468] 2023-06-24 23:44:49.825 INFO: cluster_sweep: uncovered 4117/20929
pid[178468] 2023-06-24 23:44:49.827 INFO: cluster_compute: computing pairwise distance matrix


  df_c.append(df_q[df_q['id'].isin(qry[ridx[:rsize]])][['id','sequence']])
  df_c = df_c.append(db)
  df_c = df_c.append(db)
  df_c = df_c.append(db)
  df_c = df_c.append(db)
  df_c = df_c.append(db)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

[M::worker_pipeline::1.003*2.90] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.008 sec; CPU: 2.912 sec; Peak RSS: 0.155 GB
[M::worker_pipeline::1.876*2.90] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.880 sec; CPU: 5.449 sec; Peak RSS: 0.160 GB
[M::worker_pipeline::2.054*2.94] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 2.058 sec; CPU: 6.046 sec; Peak RSS: 0.166 GB


pid[178468] 2023-06-24 23:44:52.081 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:44:52.238 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:44:52.471 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:44:52.471 INFO: Running OPTICS
pid[178468] 2023-06-24 23:44:52.488 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:44:52.488 INFO: clust_OPTICS: iter=0 using min_samples=998
pid[178468] 2023-06-24 23:44:52.591 INFO: clust_OPTICS: clusters=0 outliers=1996 delta=499.0
pid[178468] 2023-06-24 23:44:52.592 INFO: clust_OPTICS: iter=1 using min_samples=499
pid[178469] 2023-06-24 23:44:52.610 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:44:52.610 INFO: Running OPTICS
pid[178469] 2023-06-24 23:44:52.635 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:44:52.635 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178467] 2023-06-24 23:44:52.649 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:44:52.694 INFO: clust_OPTICS: clusters=0 out

pid[178468] 2023-06-24 23:44:58.275 INFO: clust_OPTICS: clusters=8 outliers=697 delta=2
pid[178468] 2023-06-24 23:44:58.275 INFO: clust_OPTICS: iter=19 using min_samples=24
pid[178469] 2023-06-24 23:44:58.312 INFO: clust_OPTICS: clusters=1 outliers=11 delta=3
pid[178469] 2023-06-24 23:44:58.312 INFO: clust_OPTICS: iter=9 using min_samples=11
pid[178468] 2023-06-24 23:44:58.452 INFO: clust_OPTICS: clusters=12 outliers=868 delta=1
pid[178468] 2023-06-24 23:44:58.452 INFO: clust_OPTICS: iter=20 using min_samples=25


[M::mm_idx_gen::0.001*7.74] collected minimizers
[M::mm_idx_gen::0.001*5.59] sorted minimizers
[M::main::0.002*5.58] loaded/built the index for 17 target sequence(s)
[M::mm_mapopt_update::0.002*5.31] mid_occ = 16
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 17
[M::mm_idx_stat::0.002*5.10] distinct minimizers: 897 (60.76% are singletons); average occurrences: 2.459; average spacing: 5.446


pid[178468] 2023-06-24 23:44:58.637 INFO: clust_OPTICS: clusters=12 outliers=899 delta=-1
pid[178468] 2023-06-24 23:44:58.637 INFO: clust_OPTICS: iter=21 using min_samples=25
pid[178468] 2023-06-24 23:44:58.808 INFO: clust_OPTICS: clusters=12 outliers=899 delta=-1
pid[178468] 2023-06-24 23:44:58.808 INFO: n_clusters=15 n_unclustered=1244 N=1996
pid[178468] 2023-06-24 23:44:58.819 INFO: Making directory ./clusters/


[M::worker_pipeline::0.375*2.66] mapped 26721 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.376 sec; CPU: 1.000 sec; Peak RSS: 0.141 GB


pid[178468] 2023-06-24 23:44:59.054 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178468] 2023-06-24 23:44:59.054 INFO: cluster_spoa_merge: reading consensus
pid[178468] 2023-06-24 23:44:59.078 INFO: Making directory ./clusters/
pid[178469] 2023-06-24 23:44:59.114 INFO: clust_OPTICS: clusters=3 outliers=67 delta=1
pid[178469] 2023-06-24 23:44:59.115 INFO: clust_OPTICS: iter=10 using min_samples=13
pid[178469] 2023-06-24 23:44:59.632 INFO: clust_OPTICS: clusters=3 outliers=106 delta=-2
pid[178469] 2023-06-24 23:44:59.632 INFO: clust_OPTICS: iter=11 using min_samples=13


[M::mm_idx_gen::0.002*3.95] collected minimizers
[M::mm_idx_gen::0.002*3.59] sorted minimizers
[M::main::0.002*3.59] loaded/built the index for 28 target sequence(s)
[M::mm_mapopt_update::0.002*3.35] mid_occ = 26
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 28
[M::mm_idx_stat::0.002*3.20] distinct minimizers: 1831 (70.13% are singletons); average occurrences: 2.073; average spacing: 5.381


pid[178469] 2023-06-24 23:45:00.089 INFO: clust_OPTICS: clusters=3 outliers=106 delta=-2
pid[178469] 2023-06-24 23:45:00.089 INFO: clust_OPTICS: iter=12 using min_samples=14
pid[178469] 2023-06-24 23:45:00.515 INFO: clust_OPTICS: clusters=3 outliers=116 delta=-1
pid[178469] 2023-06-24 23:45:00.515 INFO: clust_OPTICS: iter=13 using min_samples=14
pid[178469] 2023-06-24 23:45:00.941 INFO: clust_OPTICS: clusters=3 outliers=116 delta=-1
pid[178469] 2023-06-24 23:45:00.941 INFO: n_clusters=3 n_unclustered=67 N=2000
pid[178469] 2023-06-24 23:45:00.950 INFO: Making directory ./clusters/
pid[178469] 2023-06-24 23:45:00.994 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178469] 2023-06-24 23:45:00.994 INFO: cluster_spoa_merge: reading consensus
pid[178469] 2023-06-24 23:45:01.007 INFO: Making directory ./clusters/


[M::worker_pipeline::1.824*2.90] mapped 20929 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.825 sec; CPU: 5.283 sec; Peak RSS: 0.138 GB
[M::mm_idx_gen::0.001*5.75] collected minimizers
[M::mm_idx_gen::0.001*4.44] sorted minimizers
[M::main::0.001*4.44] loaded/built the index for 19 target sequence(s)
[M::mm_mapopt_update::0.002*4.24] mid_occ = 19
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 19
[M::mm_idx_stat::0.002*4.08] distinct minimizers: 1102 (58.17% are singletons); average occurrences: 2.200; average spacing: 5.456
[M::worker_pipeline::0.357*2.73] mapped 25040 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.358 sec; CPU: 0.978 sec; Peak RSS: 0.138 GB


pid[178468] 2023-06-24 23:45:04.640 INFO: cluster_eval: number of clusters = 28
pid[178468] 2023-06-24 23:45:04.730 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:04.730 INFO: max_eps = 0.9
pid[178468] 2023-06-24 23:45:04.730 INFO: clust_OPTICS: iter=0 using min_samples=2
pid[178468] 2023-06-24 23:45:04.734 INFO: clust_OPTICS: clusters=4 outliers=8 delta=1.0
pid[178468] 2023-06-24 23:45:04.734 INFO: n_clusters=4 n_unclustered=8 N=28
pid[178467] 2023-06-24 23:45:04.782 INFO: cluster_eval: number of clusters = 17
pid[178468] 2023-06-24 23:45:05.585 INFO: cluster_merge: 20/8 clusters to merge
pid[178468] 2023-06-24 23:45:05.585 INFO: cluster_merge: doing merging on 8 clusters, 0/4
pid[178468] 2023-06-24 23:45:05.620 INFO: cluster_compute: computing pairwise distance matrix


[M::mm_idx_gen::0.011*1.50] collected minimizers
[M::mm_idx_gen::0.014*1.80] sorted minimizers
[M::main::0.014*1.80] loaded/built the index for 764 target sequence(s)
[M::mm_mapopt_update::0.014*1.77] mid_occ = 398
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 764
[M::mm_idx_stat::0.015*1.75] distinct minimizers: 27628 (79.21% are singletons); average occurrences: 3.373; average spacing: 5.367
[M::worker_pipeline::0.332*2.90] mapped 764 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.334 sec; CPU: 0.965 sec; Peak RSS: 0.218 GB


pid[178467] 2023-06-24 23:45:06.348 INFO: Running kmeans with n_clusters = 4
pid[178468] 2023-06-24 23:45:06.373 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:06.441 INFO: Getting results
pid[178467] 2023-06-24 23:45:06.527 INFO: cluster_sweep: uncovered 3650/26721
pid[178467] 2023-06-24 23:45:06.532 INFO: cluster_compute: computing pairwise distance matrix
pid[178468] 2023-06-24 23:45:06.597 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:06.602 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:06.605 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:06.608 INFO: clust_OPTICS: iter=0 using min_samples=382
pid[178468] 2023-06-24 23:45:06.630 INFO: clust_OPTICS: clusters=0 outliers=764 delta=191.0
pid[178468] 2023-06-24 23:45:06.630 INFO: clust_OPTICS: iter=1 using min_samples=191
pid[178468] 2023-06-24 23:45:06.641 INFO: clust_OPTICS: clusters=0 outliers=764 delta=191
pid[178468] 2023-06-24 23:45:06.641 INFO: clust_OPTICS: iter=2 using min_samples=96
p

[M::mm_idx_gen::0.026*1.20] collected minimizers
[M::mm_idx_gen::0.032*1.55] sorted minimizers
[M::main::0.032*1.55] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.035*1.50] mid_occ = 402
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.038*1.47] distinct minimizers: 109104 (84.19% are singletons); average occurrences: 2.161; average spacing: 5.466


pid[178468] 2023-06-24 23:45:07.013 INFO: Making directory ./clusters/
pid[178468] 2023-06-24 23:45:07.099 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178468] 2023-06-24 23:45:07.099 INFO: cluster_spoa_merge: reading consensus
pid[178468] 2023-06-24 23:45:07.115 INFO: cluster_merge: doing merging on 8 clusters, 1/4
pid[178468] 2023-06-24 23:45:07.155 INFO: cluster_compute: computing pairwise distance matrix
pid[178468] 2023-06-24 23:45:07.155 INFO: Making directory ./clusters/


[M::mm_idx_gen::0.010*1.58] collected minimizers
[M::mm_idx_gen::0.012*1.88] sorted minimizers
[M::main::0.012*1.88] loaded/built the index for 732 target sequence(s)
[M::mm_mapopt_update::0.013*1.85] mid_occ = 497
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 732
[M::mm_idx_stat::0.013*1.82] distinct minimizers: 19539 (77.25% are singletons); average occurrences: 4.614; average spacing: 5.327
[M::worker_pipeline::0.734*2.83] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.737 sec; CPU: 2.080 sec; Peak RSS: 0.171 GB
[M::worker_pipeline::0.465*2.94] mapped 732 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.467 sec; CPU: 1.367 sec; Peak RSS: 0.222 GB


pid[178469] 2023-06-24 23:45:07.759 INFO: Running kmeans with n_clusters = 4
pid[178469] 2023-06-24 23:45:07.858 INFO: Getting results
pid[178469] 2023-06-24 23:45:07.890 INFO: cluster_sweep: uncovered 14384/25040
pid[178469] 2023-06-24 23:45:07.893 INFO: cluster_compute: computing pairwise distance matrix


  df_c.append(df_q[df_q['id'].isin(qry[ridx[:rsize]])][['id','sequence']])
  df_c = df_c.append(db)
  df_c = df_c.append(db)
  df_c = df_c.append(db)
  df_c = df_c.append(db)
  df_c = df_c.append(db)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentat

pid[178468] 2023-06-24 23:45:08.175 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:08.312 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:08.312 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:08.315 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:08.315 INFO: clust_OPTICS: iter=0 using min_samples=366
pid[178468] 2023-06-24 23:45:08.331 INFO: clust_OPTICS: clusters=0 outliers=732 delta=183.0
pid[178468] 2023-06-24 23:45:08.331 INFO: clust_OPTICS: iter=1 using min_samples=183
pid[178468] 2023-06-24 23:45:08.341 INFO: clust_OPTICS: clusters=0 outliers=732 delta=183
pid[178468] 2023-06-24 23:45:08.342 INFO: clust_OPTICS: iter=2 using min_samples=92
pid[178468] 2023-06-24 23:45:08.352 INFO: clust_OPTICS: clusters=1 outliers=675 delta=91
pid[178468] 2023-06-24 23:45:08.353 INFO: clust_OPTICS: iter=3 using min_samples=47
pid[178468] 2023-06-24 23:45:08.368 INFO: clust_OPTICS: clusters=1 outliers=429 delta=45
pid[178468] 2023-06-24 23:45:08.368 INFO: clus

[M::mm_idx_gen::0.003*2.81] collected minimizers
[M::mm_idx_gen::0.004*2.81] sorted minimizers
[M::main::0.004*2.81] loaded/built the index for 197 target sequence(s)
[M::mm_mapopt_update::0.005*2.71] mid_occ = 149
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 197
[M::mm_idx_stat::0.005*2.64] distinct minimizers: 6499 (77.07% are singletons); average occurrences: 3.545; average spacing: 5.558
[M::worker_pipeline::0.055*2.87] mapped 197 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.056 sec; CPU: 0.160 sec; Peak RSS: 0.222 GB


pid[178468] 2023-06-24 23:45:08.913 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:08.950 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:08.950 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:08.951 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:08.951 INFO: clust_OPTICS: iter=0 using min_samples=99
pid[178468] 2023-06-24 23:45:08.954 INFO: clust_OPTICS: clusters=0 outliers=197 delta=49.5
pid[178468] 2023-06-24 23:45:08.954 INFO: clust_OPTICS: iter=1 using min_samples=50
pid[178468] 2023-06-24 23:45:08.957 INFO: clust_OPTICS: clusters=2 outliers=180 delta=49
pid[178468] 2023-06-24 23:45:08.957 INFO: clust_OPTICS: iter=2 using min_samples=26
pid[178468] 2023-06-24 23:45:08.961 INFO: clust_OPTICS: clusters=2 outliers=120 delta=24
pid[178468] 2023-06-24 23:45:08.961 INFO: clust_OPTICS: iter=3 using min_samples=14
pid[178468] 2023-06-24 23:45:08.968 INFO: clust_OPTICS: clusters=1 outliers=17 delta=12
pid[178468] 2023-06-24 23:45:08.968 INFO: clust_OPT

[M::mm_idx_gen::0.004*2.78] collected minimizers
[M::mm_idx_gen::0.005*2.80] sorted minimizers
[M::main::0.005*2.80] loaded/built the index for 200 target sequence(s)
[M::mm_mapopt_update::0.005*2.70] mid_occ = 139
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 200
[M::mm_idx_stat::0.006*2.64] distinct minimizers: 7957 (79.09% are singletons); average occurrences: 2.921; average spacing: 5.639
[M::worker_pipeline::0.046*2.85] mapped 200 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.047 sec; CPU: 0.131 sec; Peak RSS: 0.222 GB
  df_c.append(df_q[df_q['id'].isin(qry[ridx[:rsize]])][['id','sequence']])
  df_c = df_c.append(db)
  df_c = df_c.append(db)
  df_c = df_c.append(db)
  df_c = df_c.append(db)
  df_c = df_c.append(db)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexe

pid[178467] 2023-06-24 23:45:09.174 INFO: clust_OPTICS: clusters=0 outliers=1997 delta=499.5
pid[178467] 2023-06-24 23:45:09.174 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178467] 2023-06-24 23:45:09.251 INFO: clust_OPTICS: clusters=0 outliers=1997 delta=499
pid[178467] 2023-06-24 23:45:09.251 INFO: clust_OPTICS: iter=2 using min_samples=251
pid[178468] 2023-06-24 23:45:09.293 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:09.309 INFO: clust_OPTICS: clusters=0 outliers=1997 delta=249
pid[178467] 2023-06-24 23:45:09.310 INFO: clust_OPTICS: iter=3 using min_samples=127
pid[178468] 2023-06-24 23:45:09.338 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:09.338 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:09.339 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:09.339 INFO: clust_OPTICS: iter=0 using min_samples=100
pid[178468] 2023-06-24 23:45:09.343 INFO: clust_OPTICS: clusters=0 outliers=200 delta=50.0
pid[178468] 2023-06-24 23:45:09.343 IN

[M::worker_pipeline::1.225*2.90] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.234 sec; CPU: 3.559 sec; Peak RSS: 0.160 GB


pid[178468] 2023-06-24 23:45:09.380 INFO: clust_OPTICS: clusters=2 outliers=51 delta=3
pid[178468] 2023-06-24 23:45:09.381 INFO: clust_OPTICS: iter=6 using min_samples=20
pid[178468] 2023-06-24 23:45:09.385 INFO: clust_OPTICS: clusters=2 outliers=96 delta=-4
pid[178468] 2023-06-24 23:45:09.385 INFO: clust_OPTICS: iter=7 using min_samples=20
pid[178468] 2023-06-24 23:45:09.389 INFO: clust_OPTICS: clusters=2 outliers=96 delta=-4
pid[178468] 2023-06-24 23:45:09.389 INFO: clust_OPTICS: iter=8 using min_samples=22
pid[178468] 2023-06-24 23:45:09.394 INFO: clust_OPTICS: clusters=2 outliers=99 delta=-2
pid[178468] 2023-06-24 23:45:09.394 INFO: clust_OPTICS: iter=9 using min_samples=22
pid[178468] 2023-06-24 23:45:09.398 INFO: clust_OPTICS: clusters=2 outliers=99 delta=-2
pid[178468] 2023-06-24 23:45:09.398 INFO: clust_OPTICS: iter=10 using min_samples=23
pid[178468] 2023-06-24 23:45:09.401 INFO: clust_OPTICS: clusters=2 outliers=99 delta=-1
pid[178468] 2023-06-24 23:45:09.402 INFO: n_clusters

[M::mm_idx_gen::0.001*7.69] collected minimizers
[M::mm_idx_gen::0.002*5.76] sorted minimizers
[M::main::0.002*5.73] loaded/built the index for 21 target sequence(s)
[M::mm_mapopt_update::0.002*5.47] mid_occ = 23
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 21
[M::mm_idx_stat::0.002*5.29] distinct minimizers: 1481 (64.15% are singletons); average occurrences: 1.918; average spacing: 5.379


pid[178467] 2023-06-24 23:45:10.647 INFO: clust_OPTICS: clusters=8 outliers=1048 delta=4
pid[178467] 2023-06-24 23:45:10.647 INFO: clust_OPTICS: iter=15 using min_samples=38
pid[178467] 2023-06-24 23:45:10.737 INFO: clust_OPTICS: clusters=12 outliers=1430 delta=2
pid[178467] 2023-06-24 23:45:10.737 INFO: clust_OPTICS: iter=16 using min_samples=41
pid[178467] 2023-06-24 23:45:10.821 INFO: clust_OPTICS: clusters=11 outliers=1543 delta=-3
pid[178467] 2023-06-24 23:45:10.821 INFO: clust_OPTICS: iter=17 using min_samples=36
pid[178469] 2023-06-24 23:45:10.840 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:45:10.840 INFO: Running OPTICS


[M::worker_pipeline::0.296*2.72] mapped 20929 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.297 sec; CPU: 0.807 sec; Peak RSS: 0.175 GB


pid[178469] 2023-06-24 23:45:10.861 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:45:10.862 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178467] 2023-06-24 23:45:10.948 INFO: clust_OPTICS: clusters=12 outliers=1334 delta=-2
pid[178467] 2023-06-24 23:45:10.948 INFO: clust_OPTICS: iter=18 using min_samples=34
pid[178469] 2023-06-24 23:45:11.016 INFO: clust_OPTICS: clusters=0 outliers=1999 delta=500.0
pid[178469] 2023-06-24 23:45:11.016 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178469] 2023-06-24 23:45:11.096 INFO: clust_OPTICS: clusters=0 outliers=1999 delta=500
pid[178469] 2023-06-24 23:45:11.096 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178467] 2023-06-24 23:45:11.097 INFO: clust_OPTICS: clusters=6 outliers=1201 delta=2
pid[178467] 2023-06-24 23:45:11.097 INFO: clust_OPTICS: iter=19 using min_samples=37
pid[178469] 2023-06-24 23:45:11.167 INFO: clust_OPTICS: clusters=2 outliers=1770 delta=250
pid[178469] 2023-06-24 23:45:11.167 INFO: clust_OPTICS: iter=3 

[M::mm_idx_gen::0.001*5.66] collected minimizers
[M::mm_idx_gen::0.002*4.60] sorted minimizers
[M::main::0.002*4.58] loaded/built the index for 29 target sequence(s)
[M::mm_mapopt_update::0.002*4.38] mid_occ = 27
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 29
[M::mm_idx_stat::0.002*4.24] distinct minimizers: 1372 (66.76% are singletons); average occurrences: 2.791; average spacing: 5.435


pid[178468] 2023-06-24 23:45:14.022 INFO: cluster_eval: number of clusters = 21
pid[178468] 2023-06-24 23:45:14.964 INFO: cluster_split: splitting on cid=cluster0 0/21
pid[178468] 2023-06-24 23:45:14.980 INFO: cluster_compute: computing pairwise distance matrix
pid[178469] 2023-06-24 23:45:15.078 INFO: clust_OPTICS: clusters=3 outliers=20 delta=3
pid[178469] 2023-06-24 23:45:15.078 INFO: clust_OPTICS: iter=9 using min_samples=11


[M::mm_idx_gen::0.013*1.50] collected minimizers
[M::mm_idx_gen::0.016*1.81] sorted minimizers
[M::main::0.016*1.81] loaded/built the index for 886 target sequence(s)
[M::mm_mapopt_update::0.018*1.76] mid_occ = 430
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 886
[M::mm_idx_stat::0.018*1.73] distinct minimizers: 37951 (81.31% are singletons); average occurrences: 2.751; average spacing: 5.578
[M::worker_pipeline::2.312*2.89] mapped 26721 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 2.314 sec; CPU: 6.694 sec; Peak RSS: 0.141 GB
[M::worker_pipeline::0.367*2.89] mapped 886 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.369 sec; CPU: 1.062 sec; Peak RSS: 0.170 GB


pid[178469] 2023-06-24 23:45:15.804 INFO: clust_OPTICS: clusters=4 outliers=107 delta=1
pid[178469] 2023-06-24 23:45:15.804 INFO: clust_OPTICS: iter=10 using min_samples=13
pid[178468] 2023-06-24 23:45:15.897 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:16.059 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:16.059 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:16.063 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:16.064 INFO: clust_OPTICS: iter=0 using min_samples=443
pid[178468] 2023-06-24 23:45:16.089 INFO: clust_OPTICS: clusters=0 outliers=886 delta=221.5
pid[178468] 2023-06-24 23:45:16.089 INFO: clust_OPTICS: iter=1 using min_samples=222
pid[178468] 2023-06-24 23:45:16.105 INFO: clust_OPTICS: clusters=0 outliers=886 delta=221
pid[178468] 2023-06-24 23:45:16.105 INFO: clust_OPTICS: iter=2 using min_samples=112
pid[178468] 2023-06-24 23:45:16.122 INFO: clust_OPTICS: clusters=1 outliers=620 delta=110
pid[178468] 2023-06-24 23:45:16.122 INFO: cl

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a 

pid[178468] 2023-06-24 23:45:16.959 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:17.009 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:17.009 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:17.010 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:17.010 INFO: clust_OPTICS: iter=0 using min_samples=142
pid[178468] 2023-06-24 23:45:17.014 INFO: clust_OPTICS: clusters=1 outliers=257 delta=71.0
pid[178468] 2023-06-24 23:45:17.014 INFO: clust_OPTICS: iter=1 using min_samples=71
pid[178468] 2023-06-24 23:45:17.018 INFO: clust_OPTICS: clusters=1 outliers=184 delta=71
pid[178468] 2023-06-24 23:45:17.018 INFO: clust_OPTICS: iter=2 using min_samples=36
pid[178468] 2023-06-24 23:45:17.023 INFO: clust_OPTICS: clusters=1 outliers=67 delta=35
pid[178468] 2023-06-24 23:45:17.023 INFO: clust_OPTICS: iter=3 using min_samples=19
pid[178468] 2023-06-24 23:45:17.030 INFO: clust_OPTICS: clusters=1 outliers=59 delta=17
pid[178468] 2023-06-24 23:45:17.030 INFO: clust_OPT

[M::mm_idx_gen::0.006*1.99] collected minimizers
[M::mm_idx_gen::0.008*2.23] sorted minimizers
[M::main::0.008*2.22] loaded/built the index for 435 target sequence(s)
[M::mm_mapopt_update::0.009*2.13] mid_occ = 222
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 435
[M::mm_idx_stat::0.009*2.08] distinct minimizers: 21567 (83.29% are singletons); average occurrences: 2.399; average spacing: 5.549
[M::worker_pipeline::0.113*2.84] mapped 435 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.115 sec; CPU: 0.324 sec; Peak RSS: 0.170 GB


pid[178468] 2023-06-24 23:45:17.588 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:17.665 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:17.666 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:17.667 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:17.667 INFO: clust_OPTICS: iter=0 using min_samples=218
pid[178468] 2023-06-24 23:45:17.674 INFO: clust_OPTICS: clusters=0 outliers=435 delta=109.0
pid[178468] 2023-06-24 23:45:17.675 INFO: clust_OPTICS: iter=1 using min_samples=109
pid[178468] 2023-06-24 23:45:17.680 INFO: clust_OPTICS: clusters=1 outliers=393 delta=109
pid[178468] 2023-06-24 23:45:17.681 INFO: clust_OPTICS: iter=2 using min_samples=55
pid[178468] 2023-06-24 23:45:17.687 INFO: clust_OPTICS: clusters=2 outliers=288 delta=54
pid[178468] 2023-06-24 23:45:17.687 INFO: clust_OPTICS: iter=3 using min_samples=28
pid[178468] 2023-06-24 23:45:17.695 INFO: clust_OPTICS: clusters=2 outliers=141 delta=27
pid[178468] 2023-06-24 23:45:17.695 INFO: clus

[M::mm_idx_gen::0.004*2.53] collected minimizers
[M::mm_idx_gen::0.005*2.60] sorted minimizers
[M::main::0.005*2.60] loaded/built the index for 243 target sequence(s)
[M::mm_mapopt_update::0.006*2.49] mid_occ = 146
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 243
[M::mm_idx_stat::0.006*2.42] distinct minimizers: 12506 (84.06% are singletons); average occurrences: 2.422; average spacing: 5.547
[M::worker_pipeline::0.073*2.86] mapped 243 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.075 sec; CPU: 0.211 sec; Peak RSS: 0.170 GB


pid[178469] 2023-06-24 23:45:18.083 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178469] 2023-06-24 23:45:18.083 INFO: cluster_spoa_merge: reading consensus
pid[178469] 2023-06-24 23:45:18.098 INFO: Making directory ./clusters/
pid[178468] 2023-06-24 23:45:18.103 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:18.147 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:18.147 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:18.148 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:18.148 INFO: clust_OPTICS: iter=0 using min_samples=122
pid[178468] 2023-06-24 23:45:18.151 INFO: clust_OPTICS: clusters=0 outliers=243 delta=61.0
pid[178468] 2023-06-24 23:45:18.151 INFO: clust_OPTICS: iter=1 using min_samples=61
pid[178468] 2023-06-24 23:45:18.154 INFO: clust_OPTICS: clusters=1 outliers=93 delta=61
pid[178468] 2023-06-24 23:45:18.154 INFO: clust_OPTICS: iter=2 using min_samples=31
pid[178468] 2023-06-24 23:45:18.158 INFO: clust_OPTICS: clusters=1 outliers=80 delta=

[M::mm_idx_gen::0.002*2.91] collected minimizers
[M::mm_idx_gen::0.002*2.85] sorted minimizers
[M::main::0.002*2.85] loaded/built the index for 70 target sequence(s)
[M::mm_mapopt_update::0.003*2.70] mid_occ = 47
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 70
[M::mm_idx_stat::0.003*2.61] distinct minimizers: 3941 (85.11% are singletons); average occurrences: 2.108; average spacing: 5.388
[M::worker_pipeline::0.013*2.76] mapped 70 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.014 sec; CPU: 0.037 sec; Peak RSS: 0.170 GB
[M::mm_idx_gen::0.003*2.53] collected minimizers
[M::mm_idx_gen::0.004*2.62] sorted minimizers
[M::main::0.004*2.62] loaded/built the index for 189 target sequence(s)
[M::mm_mapopt_update::0.005*2.50] mid_occ = 101
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 189
[M::mm_idx_stat::0.005*2.42] d

pid[178468] 2023-06-24 23:45:18.648 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:18.681 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:18.681 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:18.681 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:18.681 INFO: clust_OPTICS: iter=0 using min_samples=94
pid[178468] 2023-06-24 23:45:18.684 INFO: clust_OPTICS: clusters=0 outliers=188 delta=47.0
pid[178468] 2023-06-24 23:45:18.684 INFO: clust_OPTICS: iter=1 using min_samples=47
pid[178468] 2023-06-24 23:45:18.686 INFO: clust_OPTICS: clusters=1 outliers=124 delta=47
pid[178468] 2023-06-24 23:45:18.686 INFO: clust_OPTICS: iter=2 using min_samples=24
pid[178468] 2023-06-24 23:45:18.689 INFO: clust_OPTICS: clusters=1 outliers=92 delta=23
pid[178468] 2023-06-24 23:45:18.689 INFO: clust_OPTICS: iter=3 using min_samples=13
pid[178468] 2023-06-24 23:45:18.693 INFO: clust_OPTICS: clusters=2 outliers=47 delta=11
pid[178468] 2023-06-24 23:45:18.693 INFO: clust_OPTI

[M::mm_idx_gen::0.002*5.00] collected minimizers
[M::mm_idx_gen::0.003*4.25] sorted minimizers
[M::main::0.003*4.25] loaded/built the index for 104 target sequence(s)
[M::mm_mapopt_update::0.003*3.99] mid_occ = 56
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 104
[M::mm_idx_stat::0.004*3.78] distinct minimizers: 7101 (86.34% are singletons); average occurrences: 1.793; average spacing: 5.383
[M::worker_pipeline::0.025*2.97] mapped 104 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.025 sec; CPU: 0.074 sec; Peak RSS: 0.170 GB


pid[178468] 2023-06-24 23:45:18.947 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:18.965 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:18.965 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:18.966 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:18.966 INFO: clust_OPTICS: iter=0 using min_samples=52
pid[178468] 2023-06-24 23:45:18.968 INFO: clust_OPTICS: clusters=1 outliers=68 delta=26.0
pid[178468] 2023-06-24 23:45:18.968 INFO: clust_OPTICS: iter=1 using min_samples=26
pid[178468] 2023-06-24 23:45:18.970 INFO: clust_OPTICS: clusters=1 outliers=63 delta=26
pid[178468] 2023-06-24 23:45:18.970 INFO: clust_OPTICS: iter=2 using min_samples=13
pid[178468] 2023-06-24 23:45:18.972 INFO: clust_OPTICS: clusters=3 outliers=46 delta=13
pid[178468] 2023-06-24 23:45:18.972 INFO: clust_OPTICS: iter=3 using min_samples=7
pid[178468] 2023-06-24 23:45:18.978 INFO: clust_OPTICS: clusters=1 outliers=2 delta=6
pid[178468] 2023-06-24 23:45:18.978 INFO: clust_OPTICS: i

[M::mm_idx_gen::0.001*5.90] collected minimizers
[M::mm_idx_gen::0.002*4.80] sorted minimizers
[M::main::0.002*4.78] loaded/built the index for 41 target sequence(s)
[M::mm_mapopt_update::0.002*4.42] mid_occ = 19
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 41
[M::mm_idx_stat::0.002*4.21] distinct minimizers: 3817 (88.66% are singletons); average occurrences: 1.280; average spacing: 5.452
[M::worker_pipeline::0.009*2.98] mapped 41 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.010 sec; CPU: 0.028 sec; Peak RSS: 0.170 GB
[M::mm_idx_gen::0.001*6.11] collected minimizers
[M::mm_idx_gen::0.002*4.78] sorted minimizers
[M::main::0.002*4.76] loaded/built the index for 23 target sequence(s)
[M::mm_mapopt_update::0.002*4.55] mid_occ = 22
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 23
[M::mm_idx_stat::0.002*4.39] dist

pid[178468] 2023-06-24 23:45:19.152 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:19.152 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:19.153 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:19.153 INFO: clust_OPTICS: iter=0 using min_samples=20
pid[178468] 2023-06-24 23:45:19.154 INFO: clust_OPTICS: clusters=1 outliers=28 delta=10.0
pid[178468] 2023-06-24 23:45:19.154 INFO: clust_OPTICS: iter=1 using min_samples=10
pid[178468] 2023-06-24 23:45:19.156 INFO: clust_OPTICS: clusters=1 outliers=19 delta=10
pid[178468] 2023-06-24 23:45:19.156 INFO: clust_OPTICS: iter=2 using min_samples=5
pid[178468] 2023-06-24 23:45:19.160 INFO: clust_OPTICS: clusters=1 outliers=3 delta=5
pid[178468] 2023-06-24 23:45:19.160 INFO: clust_OPTICS: iter=3 using min_samples=3
pid[178468] 2023-06-24 23:45:19.164 INFO: clust_OPTICS: clusters=1 outliers=0 delta=2
pid[178468] 2023-06-24 23:45:19.165 INFO: n_clusters=1 n_unclustered=0 N=39
pid[178468] 2023-06-24 23:45:19.168 INFO: Making dire

[M::mm_idx_gen::0.026*1.17] collected minimizers
[M::mm_idx_gen::0.033*1.57] sorted minimizers
[M::main::0.033*1.57] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.035*1.55] mid_occ = 1038
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.036*1.54] distinct minimizers: 65885 (80.46% are singletons); average occurrences: 3.685; average spacing: 5.379


pid[178467] 2023-06-24 23:45:20.668 INFO: cluster_merge: 23/6 clusters to merge
pid[178467] 2023-06-24 23:45:20.668 INFO: cluster_merge: doing merging on 7 clusters, 0/7
pid[178467] 2023-06-24 23:45:20.716 INFO: cluster_compute: computing pairwise distance matrix


[M::mm_idx_gen::0.011*1.66] collected minimizers
[M::mm_idx_gen::0.014*1.91] sorted minimizers
[M::main::0.014*1.91] loaded/built the index for 595 target sequence(s)
[M::mm_mapopt_update::0.015*1.88] mid_occ = 435
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 595
[M::mm_idx_stat::0.015*1.85] distinct minimizers: 16112 (77.74% are singletons); average occurrences: 4.398; average spacing: 5.353
[M::worker_pipeline::0.523*2.94] mapped 595 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.525 sec; CPU: 1.541 sec; Peak RSS: 0.255 GB
[M::worker_pipeline::2.201*2.88] mapped 25040 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 2.201 sec; CPU: 6.344 sec; Peak RSS: 0.140 GB
[M::worker_pipeline::1.8

pid[178467] 2023-06-24 23:45:21.702 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:21.817 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:21.817 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:21.819 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:21.819 INFO: clust_OPTICS: iter=0 using min_samples=298
pid[178467] 2023-06-24 23:45:21.829 INFO: clust_OPTICS: clusters=0 outliers=595 delta=149.0
pid[178467] 2023-06-24 23:45:21.830 INFO: clust_OPTICS: iter=1 using min_samples=149
pid[178467] 2023-06-24 23:45:21.837 INFO: clust_OPTICS: clusters=1 outliers=539 delta=149
pid[178467] 2023-06-24 23:45:21.837 INFO: clust_OPTICS: iter=2 using min_samples=75
pid[178467] 2023-06-24 23:45:21.846 INFO: clust_OPTICS: clusters=2 outliers=432 delta=74
pid[178467] 2023-06-24 23:45:21.846 INFO: clust_OPTICS: iter=3 using min_samples=38
pid[178467] 2023-06-24 23:45:21.858 INFO: clust_OPTICS: clusters=1 outliers=208 delta=37
pid[178467] 2023-06-24 23:45:21.858 INFO: clus

[M::mm_idx_gen::0.003*3.08] collected minimizers
[M::mm_idx_gen::0.004*3.01] sorted minimizers
[M::main::0.004*3.01] loaded/built the index for 239 target sequence(s)
[M::mm_mapopt_update::0.005*2.92] mid_occ = 212
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 239
[M::mm_idx_stat::0.005*2.85] distinct minimizers: 4592 (70.78% are singletons); average occurrences: 6.079; average spacing: 5.635
[M::worker_pipeline::0.135*2.95] mapped 239 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.136 sec; CPU: 0.399 sec; Peak RSS: 0.255 GB


pid[178467] 2023-06-24 23:45:22.356 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:22.400 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:22.400 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:22.401 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:22.401 INFO: clust_OPTICS: iter=0 using min_samples=120
pid[178467] 2023-06-24 23:45:22.404 INFO: clust_OPTICS: clusters=0 outliers=239 delta=60.0
pid[178467] 2023-06-24 23:45:22.404 INFO: clust_OPTICS: iter=1 using min_samples=60
pid[178467] 2023-06-24 23:45:22.407 INFO: clust_OPTICS: clusters=1 outliers=59 delta=60
pid[178467] 2023-06-24 23:45:22.407 INFO: clust_OPTICS: iter=2 using min_samples=30
pid[178467] 2023-06-24 23:45:22.411 INFO: clust_OPTICS: clusters=1 outliers=11 delta=30
pid[178467] 2023-06-24 23:45:22.411 INFO: clust_OPTICS: iter=3 using min_samples=15
pid[178467] 2023-06-24 23:45:22.421 INFO: clust_OPTICS: clusters=1 outliers=1 delta=15
pid[178467] 2023-06-24 23:45:22.421 INFO: clust_OPTIC

[M::mm_idx_gen::0.002*3.01] collected minimizers
[M::mm_idx_gen::0.003*2.96] sorted minimizers
[M::main::0.003*2.95] loaded/built the index for 138 target sequence(s)
[M::mm_mapopt_update::0.004*2.85] mid_occ = 118
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 138
[M::mm_idx_stat::0.004*2.79] distinct minimizers: 4086 (74.28% are singletons); average occurrences: 4.084; average spacing: 5.369
[M::worker_pipeline::0.050*2.81] mapped 138 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.051 sec; CPU: 0.141 sec; Peak RSS: 0.255 GB


pid[178468] 2023-06-24 23:45:22.623 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:22.702 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:22.729 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:22.729 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:22.729 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:22.730 INFO: clust_OPTICS: iter=0 using min_samples=69
pid[178467] 2023-06-24 23:45:22.732 INFO: clust_OPTICS: clusters=1 outliers=137 delta=34.5
pid[178467] 2023-06-24 23:45:22.732 INFO: clust_OPTICS: iter=1 using min_samples=35
pid[178467] 2023-06-24 23:45:22.735 INFO: clust_OPTICS: clusters=1 outliers=11 delta=34
pid[178467] 2023-06-24 23:45:22.735 INFO: clust_OPTICS: iter=2 using min_samples=18
pid[178467] 2023-06-24 23:45:22.739 INFO: clust_OPTICS: clusters=1 outliers=7 delta=17
pid[178467] 2023-06-24 23:45:22.739 INFO: clust_OPTICS: iter=3 using min_samples=10
pid[178467] 2023-06-24 23:45:22.746 INFO: clust_OPTICS: clusters=1 outlie

[M::mm_idx_gen::0.004*2.26] collected minimizers
[M::mm_idx_gen::0.006*2.39] sorted minimizers
[M::main::0.006*2.39] loaded/built the index for 320 target sequence(s)
[M::mm_mapopt_update::0.006*2.33] mid_occ = 282
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 320
[M::mm_idx_stat::0.006*2.28] distinct minimizers: 6078 (72.28% are singletons); average occurrences: 6.046; average spacing: 5.679
[M::worker_pipeline::0.207*2.93] mapped 320 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.208 sec; CPU: 0.609 sec; Peak RSS: 0.255 GB


pid[178468] 2023-06-24 23:45:23.182 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178468] 2023-06-24 23:45:23.182 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178468] 2023-06-24 23:45:23.266 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500
pid[178468] 2023-06-24 23:45:23.266 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178468] 2023-06-24 23:45:23.330 INFO: clust_OPTICS: clusters=1 outliers=1762 delta=250
pid[178468] 2023-06-24 23:45:23.330 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178467] 2023-06-24 23:45:23.351 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:23.410 INFO: clust_OPTICS: clusters=1 outliers=580 delta=125
pid[178468] 2023-06-24 23:45:23.410 INFO: clust_OPTICS: iter=4 using min_samples=63
pid[178467] 2023-06-24 23:45:23.416 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:23.416 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:23.418 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:23.418 INFO

[M::mm_idx_gen::0.003*3.39] collected minimizers
[M::mm_idx_gen::0.004*3.21] sorted minimizers
[M::main::0.004*3.20] loaded/built the index for 132 target sequence(s)
[M::mm_mapopt_update::0.004*3.06] mid_occ = 92
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 132
[M::mm_idx_stat::0.005*2.97] distinct minimizers: 6528 (81.11% are singletons); average occurrences: 2.293; average spacing: 5.645
[M::worker_pipeline::0.029*2.83] mapped 132 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.030 sec; CPU: 0.082 sec; Peak RSS: 0.255 GB


pid[178467] 2023-06-24 23:45:23.750 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:23.755 INFO: clust_OPTICS: clusters=1 outliers=120 delta=31
pid[178468] 2023-06-24 23:45:23.755 INFO: clust_OPTICS: iter=6 using min_samples=17
pid[178467] 2023-06-24 23:45:23.776 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:23.776 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:23.777 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:23.777 INFO: clust_OPTICS: iter=0 using min_samples=66
pid[178467] 2023-06-24 23:45:23.780 INFO: clust_OPTICS: clusters=0 outliers=132 delta=33.0
pid[178467] 2023-06-24 23:45:23.780 INFO: clust_OPTICS: iter=1 using min_samples=33
pid[178467] 2023-06-24 23:45:23.782 INFO: clust_OPTICS: clusters=1 outliers=57 delta=33
pid[178467] 2023-06-24 23:45:23.782 INFO: clust_OPTICS: iter=2 using min_samples=17
pid[178467] 2023-06-24 23:45:23.786 INFO: clust_OPTICS: clusters=1 outliers=34 delta=16
pid[178467] 2023-06-24 23:45:23.786 INFO: clust_OPTI

[M::mm_idx_gen::0.004*2.75] collected minimizers
[M::mm_idx_gen::0.006*2.78] sorted minimizers
[M::main::0.006*2.78] loaded/built the index for 297 target sequence(s)
[M::mm_mapopt_update::0.006*2.67] mid_occ = 224
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 297
[M::mm_idx_stat::0.007*2.59] distinct minimizers: 8902 (77.35% are singletons); average occurrences: 3.968; average spacing: 5.377
[M::worker_pipeline::0.110*2.89] mapped 297 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.111 sec; CPU: 0.318 sec; Peak RSS: 0.255 GB


pid[178468] 2023-06-24 23:45:24.146 INFO: clust_OPTICS: clusters=2 outliers=66 delta=15
pid[178468] 2023-06-24 23:45:24.147 INFO: clust_OPTICS: iter=7 using min_samples=10
pid[178467] 2023-06-24 23:45:24.274 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:24.332 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:24.332 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:24.333 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:24.333 INFO: clust_OPTICS: iter=0 using min_samples=149
pid[178467] 2023-06-24 23:45:24.338 INFO: clust_OPTICS: clusters=0 outliers=297 delta=74.5
pid[178467] 2023-06-24 23:45:24.338 INFO: clust_OPTICS: iter=1 using min_samples=75
pid[178467] 2023-06-24 23:45:24.342 INFO: clust_OPTICS: clusters=1 outliers=225 delta=74
pid[178467] 2023-06-24 23:45:24.342 INFO: clust_OPTICS: iter=2 using min_samples=38
pid[178467] 2023-06-24 23:45:24.347 INFO: clust_OPTICS: clusters=1 outliers=102 delta=37
pid[178467] 2023-06-24 23:45:24.348 INFO: clust_OP

[M::mm_idx_gen::0.003*4.22] collected minimizers
[M::mm_idx_gen::0.004*3.86] sorted minimizers
[M::main::0.004*3.86] loaded/built the index for 140 target sequence(s)
[M::mm_mapopt_update::0.004*3.71] mid_occ = 118
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 140
[M::mm_idx_stat::0.004*3.60] distinct minimizers: 4129 (78.15% are singletons); average occurrences: 4.009; average spacing: 5.427
[M::worker_pipeline::0.053*2.94] mapped 140 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.054 sec; CPU: 0.158 sec; Peak RSS: 0.255 GB


pid[178467] 2023-06-24 23:45:24.725 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:24.749 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:24.749 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:24.751 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:24.751 INFO: clust_OPTICS: iter=0 using min_samples=70
pid[178467] 2023-06-24 23:45:24.754 INFO: clust_OPTICS: clusters=0 outliers=140 delta=35.0
pid[178467] 2023-06-24 23:45:24.754 INFO: clust_OPTICS: iter=1 using min_samples=35
pid[178467] 2023-06-24 23:45:24.757 INFO: clust_OPTICS: clusters=1 outliers=31 delta=35
pid[178467] 2023-06-24 23:45:24.757 INFO: clust_OPTICS: iter=2 using min_samples=18
pid[178467] 2023-06-24 23:45:24.762 INFO: clust_OPTICS: clusters=1 outliers=0 delta=17
pid[178467] 2023-06-24 23:45:24.762 INFO: n_clusters=1 n_unclustered=0 N=140
pid[178467] 2023-06-24 23:45:24.768 INFO: Making directory ./clusters/
pid[178467] 2023-06-24 23:45:24.818 INFO: cluster_spoa_merge: spoa on 0/1.0
pi

[M::mm_idx_gen::0.001*7.80] collected minimizers
[M::mm_idx_gen::0.002*5.43] sorted minimizers
[M::main::0.002*5.39] loaded/built the index for 15 target sequence(s)
[M::mm_mapopt_update::0.002*5.17] mid_occ = 16
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 15
[M::mm_idx_stat::0.002*4.99] distinct minimizers: 871 (57.86% are singletons); average occurrences: 2.272; average spacing: 5.401


pid[178469] 2023-06-24 23:45:26.328 INFO: cluster_merge: 18/5 clusters to merge
pid[178469] 2023-06-24 23:45:26.329 INFO: cluster_merge: doing merging on 9 clusters, 0/5
pid[178468] 2023-06-24 23:45:26.349 INFO: clust_OPTICS: clusters=1 outliers=95 delta=-2
pid[178468] 2023-06-24 23:45:26.349 INFO: clust_OPTICS: iter=13 using min_samples=27
pid[178469] 2023-06-24 23:45:26.379 INFO: cluster_compute: computing pairwise distance matrix


[M::mm_idx_gen::0.010*1.66] collected minimizers
[M::mm_idx_gen::0.013*1.89] sorted minimizers
[M::main::0.013*1.89] loaded/built the index for 681 target sequence(s)
[M::mm_mapopt_update::0.013*1.85] mid_occ = 523
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 681
[M::mm_idx_stat::0.013*1.83] distinct minimizers: 14787 (76.11% are singletons); average occurrences: 5.428; average spacing: 5.499
[M::worker_pipeline::0.352*2.70] mapped 26721 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.352 sec; CPU: 0.951 sec; Peak RSS: 0.183 GB


pid[178468] 2023-06-24 23:45:26.608 INFO: clust_OPTICS: clusters=1 outliers=95 delta=-2
pid[178468] 2023-06-24 23:45:26.608 INFO: clust_OPTICS: iter=14 using min_samples=28
pid[178468] 2023-06-24 23:45:26.899 INFO: clust_OPTICS: clusters=1 outliers=96 delta=-1
pid[178468] 2023-06-24 23:45:26.899 INFO: clust_OPTICS: iter=15 using min_samples=28


[M::worker_pipeline::0.530*2.94] mapped 681 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.533 sec; CPU: 1.563 sec; Peak RSS: 0.236 GB


pid[178468] 2023-06-24 23:45:27.132 INFO: clust_OPTICS: clusters=1 outliers=96 delta=-1
pid[178468] 2023-06-24 23:45:27.132 INFO: n_clusters=2 n_unclustered=66 N=2000
pid[178468] 2023-06-24 23:45:27.141 INFO: Making directory ./clusters/
pid[178468] 2023-06-24 23:45:27.180 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178468] 2023-06-24 23:45:27.180 INFO: cluster_spoa_merge: reading consensus
pid[178468] 2023-06-24 23:45:27.191 INFO: cluster_split: splitting on cid=cluster9 9/21
pid[178468] 2023-06-24 23:45:27.207 INFO: cluster_compute: computing pairwise distance matrix
pid[178468] 2023-06-24 23:45:27.208 INFO: Making directory ./clusters/


[M::mm_idx_gen::0.006*1.73] collected minimizers
[M::mm_idx_gen::0.008*2.03] sorted minimizers
[M::main::0.008*2.03] loaded/built the index for 378 target sequence(s)
[M::mm_mapopt_update::0.009*1.94] mid_occ = 152
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 378
[M::mm_idx_stat::0.009*1.89] distinct minimizers: 23205 (82.75% are singletons); average occurrences: 1.937; average spacing: 5.500
[M::worker_pipeline::0.092*2.80] mapped 378 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.094 sec; CPU: 0.260 sec; Peak RSS: 0.149 GB


pid[178469] 2023-06-24 23:45:27.416 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:27.485 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:45:27.538 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:45:27.539 INFO: Running OPTICS
pid[178469] 2023-06-24 23:45:27.541 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:45:27.541 INFO: clust_OPTICS: iter=0 using min_samples=341
pid[178468] 2023-06-24 23:45:27.553 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:27.553 INFO: Running OPTICS
pid[178469] 2023-06-24 23:45:27.554 INFO: clust_OPTICS: clusters=0 outliers=681 delta=170.5
pid[178469] 2023-06-24 23:45:27.554 INFO: clust_OPTICS: iter=1 using min_samples=171
pid[178468] 2023-06-24 23:45:27.554 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:27.554 INFO: clust_OPTICS: iter=0 using min_samples=189
pid[178468] 2023-06-24 23:45:27.560 INFO: clust_OPTICS: clusters=0 outliers=378 delta=94.5
pid[178468] 2023-06-24 23:45:27.560 INFO: clust_OP

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a 

pid[178469] 2023-06-24 23:45:27.621 INFO: clust_OPTICS: clusters=2 outliers=271 delta=10
pid[178469] 2023-06-24 23:45:27.621 INFO: clust_OPTICS: iter=6 using min_samples=69
pid[178468] 2023-06-24 23:45:27.622 INFO: clust_OPTICS: clusters=6 outliers=136 delta=-2
pid[178468] 2023-06-24 23:45:27.622 INFO: clust_OPTICS: iter=9 using min_samples=12
pid[178468] 2023-06-24 23:45:27.634 INFO: clust_OPTICS: clusters=5 outliers=83 delta=2
pid[178468] 2023-06-24 23:45:27.634 INFO: clust_OPTICS: iter=10 using min_samples=15
pid[178469] 2023-06-24 23:45:27.634 INFO: clust_OPTICS: clusters=3 outliers=354 delta=-15
pid[178469] 2023-06-24 23:45:27.635 INFO: clust_OPTICS: iter=7 using min_samples=76
pid[178468] 2023-06-24 23:45:27.641 INFO: clust_OPTICS: clusters=3 outliers=162 delta=1
pid[178468] 2023-06-24 23:45:27.641 INFO: clust_OPTICS: iter=11 using min_samples=10
pid[178469] 2023-06-24 23:45:27.645 INFO: clust_OPTICS: clusters=3 outliers=380 delta=-7
pid[178469] 2023-06-24 23:45:27.645 INFO: clus

[M::mm_idx_gen::0.002*3.79] collected minimizers
[M::mm_idx_gen::0.003*3.49] sorted minimizers
[M::main::0.003*3.49] loaded/built the index for 116 target sequence(s)
[M::mm_mapopt_update::0.003*3.18] mid_occ = 75
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 116
[M::mm_idx_stat::0.004*2.97] distinct minimizers: 6643 (82.69% are singletons); average occurrences: 2.069; average spacing: 5.450
[M::worker_pipeline::0.020*2.85] mapped 116 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.021 sec; CPU: 0.057 sec; Peak RSS: 0.236 GB
[M::mm_idx_gen::0.009*1.50] collected minimizers
[M::mm_idx_gen::0.012*1.85] sorted minimizers
[M::main::0.012*1.85] loaded/built the index for 695 target sequence(s)
[M::mm_mapopt_update::0.013*1.80] mid_occ = 275
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 695
[M::mm_idx_stat::0.013*1.77

pid[178469] 2023-06-24 23:45:27.929 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:45:27.952 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:45:27.952 INFO: Running OPTICS
pid[178469] 2023-06-24 23:45:27.952 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:45:27.952 INFO: clust_OPTICS: iter=0 using min_samples=58
pid[178469] 2023-06-24 23:45:27.954 INFO: clust_OPTICS: clusters=0 outliers=116 delta=29.0
pid[178469] 2023-06-24 23:45:27.954 INFO: clust_OPTICS: iter=1 using min_samples=29
pid[178469] 2023-06-24 23:45:27.956 INFO: clust_OPTICS: clusters=1 outliers=77 delta=29
pid[178469] 2023-06-24 23:45:27.956 INFO: clust_OPTICS: iter=2 using min_samples=15
pid[178469] 2023-06-24 23:45:27.959 INFO: clust_OPTICS: clusters=2 outliers=28 delta=14
pid[178469] 2023-06-24 23:45:27.959 INFO: clust_OPTICS: iter=3 using min_samples=8
pid[178469] 2023-06-24 23:45:27.965 INFO: clust_OPTICS: clusters=1 outliers=7 delta=7
pid[178469] 2023-06-24 23:45:27.965 INFO: clust_OPTICS: 

[M::worker_pipeline::0.213*2.86] mapped 695 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.215 sec; CPU: 0.612 sec; Peak RSS: 0.149 GB
[M::mm_idx_gen::0.003*3.14] collected minimizers
[M::mm_idx_gen::0.004*2.99] sorted minimizers
[M::main::0.004*2.98] loaded/built the index for 140 target sequence(s)
[M::mm_mapopt_update::0.004*2.83] mid_occ = 101
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 140
[M::mm_idx_stat::0.005*2.71] distinct minimizers: 6400 (81.95% are singletons); average occurrences: 2.576; average spacing: 5.615
[M::worker_pipeline::0.029*2.76] mapped 140 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.030 sec; CPU: 0.081 sec; Peak RSS: 0.236 GB


pid[178469] 2023-06-24 23:45:28.220 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:45:28.243 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:45:28.243 INFO: Running OPTICS
pid[178469] 2023-06-24 23:45:28.244 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:45:28.244 INFO: clust_OPTICS: iter=0 using min_samples=70
pid[178469] 2023-06-24 23:45:28.246 INFO: clust_OPTICS: clusters=1 outliers=72 delta=35.0
pid[178469] 2023-06-24 23:45:28.246 INFO: clust_OPTICS: iter=1 using min_samples=35
pid[178469] 2023-06-24 23:45:28.248 INFO: clust_OPTICS: clusters=1 outliers=54 delta=35
pid[178469] 2023-06-24 23:45:28.248 INFO: clust_OPTICS: iter=2 using min_samples=18
pid[178469] 2023-06-24 23:45:28.250 INFO: clust_OPTICS: clusters=1 outliers=15 delta=17
pid[178469] 2023-06-24 23:45:28.250 INFO: clust_OPTICS: iter=3 using min_samples=10
pid[178469] 2023-06-24 23:45:28.255 INFO: clust_OPTICS: clusters=1 outliers=5 delta=8
pid[178469] 2023-06-24 23:45:28.255 INFO: clust_OPTICS: 

[M::mm_idx_gen::0.003*2.81] collected minimizers
[M::mm_idx_gen::0.004*2.80] sorted minimizers
[M::main::0.004*2.80] loaded/built the index for 184 target sequence(s)
[M::mm_mapopt_update::0.004*2.67] mid_occ = 103
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 184
[M::mm_idx_stat::0.005*2.58] distinct minimizers: 10115 (83.30% are singletons); average occurrences: 2.186; average spacing: 5.456
[M::worker_pipeline::0.038*2.84] mapped 184 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.039 sec; CPU: 0.109 sec; Peak RSS: 0.236 GB


pid[178468] 2023-06-24 23:45:28.511 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:28.511 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:28.514 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:28.514 INFO: clust_OPTICS: iter=0 using min_samples=347
pid[178469] 2023-06-24 23:45:28.520 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:28.530 INFO: clust_OPTICS: clusters=0 outliers=694 delta=173.5
pid[178468] 2023-06-24 23:45:28.531 INFO: clust_OPTICS: iter=1 using min_samples=174
pid[178468] 2023-06-24 23:45:28.540 INFO: clust_OPTICS: clusters=0 outliers=694 delta=173
pid[178468] 2023-06-24 23:45:28.540 INFO: clust_OPTICS: iter=2 using min_samples=88
pid[178468] 2023-06-24 23:45:28.548 INFO: clust_OPTICS: clusters=0 outliers=694 delta=86
pid[178468] 2023-06-24 23:45:28.548 INFO: clust_OPTICS: iter=3 using min_samples=45
pid[178469] 2023-06-24 23:45:28.551 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:45:28.551 INFO: Running OPTICS
pid[

[M::mm_idx_gen::0.002*4.63] collected minimizers
[M::mm_idx_gen::0.003*4.15] sorted minimizers
[M::main::0.003*4.14] loaded/built the index for 106 target sequence(s)
[M::mm_mapopt_update::0.003*3.87] mid_occ = 86
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 106
[M::mm_idx_stat::0.003*3.69] distinct minimizers: 4874 (81.56% are singletons); average occurrences: 2.541; average spacing: 5.643
[M::worker_pipeline::0.023*2.93] mapped 106 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.024 sec; CPU: 0.069 sec; Peak RSS: 0.236 GB
[M::mm_idx_gen::0.003*3.32] collected minimizers
[M::mm_idx_gen::0.005*3.13] sorted minimizers
[M::main::0.005*3.13] loaded/built the index for 228 target sequence(s)
[M::mm_mapopt_update::0.005*2.96] mid_occ = 99
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 228
[M::mm_idx_stat::0.006*2.84]

pid[178469] 2023-06-24 23:45:28.927 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178469] 2023-06-24 23:45:28.927 INFO: cluster_spoa_merge: reading consensus
pid[178469] 2023-06-24 23:45:28.952 INFO: perform_cluster: iter = 1/5
pid[178469] 2023-06-24 23:45:28.952 INFO: Making directory ./clusters/
pid[178468] 2023-06-24 23:45:28.992 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:29.032 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:29.032 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:29.033 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:29.033 INFO: clust_OPTICS: iter=0 using min_samples=114
pid[178468] 2023-06-24 23:45:29.036 INFO: clust_OPTICS: clusters=0 outliers=228 delta=57.0
pid[178468] 2023-06-24 23:45:29.036 INFO: clust_OPTICS: iter=1 using min_samples=57
pid[178468] 2023-06-24 23:45:29.039 INFO: clust_OPTICS: clusters=0 outliers=228 delta=57
pid[178468] 2023-06-24 23:45:29.039 INFO: clust_OPTICS: iter=2 using min_samples=29
pid[178468] 20

[M::mm_idx_gen::0.003*2.31] collected minimizers
[M::mm_idx_gen::0.004*2.45] sorted minimizers
[M::main::0.004*2.45] loaded/built the index for 161 target sequence(s)
[M::mm_mapopt_update::0.004*2.34] mid_occ = 62
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 161
[M::mm_idx_stat::0.005*2.26] distinct minimizers: 11715 (84.42% are singletons); average occurrences: 1.630; average spacing: 5.493
[M::worker_pipeline::0.034*2.78] mapped 161 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.035 sec; CPU: 0.096 sec; Peak RSS: 0.159 GB


pid[178468] 2023-06-24 23:45:29.347 INFO: clust_OPTICS: clusters=2 outliers=104 delta=20
pid[178468] 2023-06-24 23:45:29.348 INFO: clust_OPTICS: iter=3 using min_samples=10
pid[178468] 2023-06-24 23:45:29.352 INFO: clust_OPTICS: clusters=1 outliers=27 delta=10
pid[178468] 2023-06-24 23:45:29.353 INFO: clust_OPTICS: iter=4 using min_samples=25
pid[178468] 2023-06-24 23:45:29.355 INFO: clust_OPTICS: clusters=1 outliers=116 delta=5
pid[178468] 2023-06-24 23:45:29.355 INFO: clust_OPTICS: iter=5 using min_samples=25
pid[178468] 2023-06-24 23:45:29.357 INFO: clust_OPTICS: clusters=1 outliers=116 delta=5
pid[178468] 2023-06-24 23:45:29.357 INFO: clust_OPTICS: iter=6 using min_samples=32
pid[178468] 2023-06-24 23:45:29.359 INFO: clust_OPTICS: clusters=1 outliers=134 delta=-7
pid[178468] 2023-06-24 23:45:29.359 INFO: clust_OPTICS: iter=7 using min_samples=32
pid[178468] 2023-06-24 23:45:29.361 INFO: clust_OPTICS: clusters=1 outliers=134 delta=-7
pid[178468] 2023-06-24 23:45:29.361 INFO: clust_O

[M::mm_idx_gen::0.024*1.18] collected minimizers
[M::mm_idx_gen::0.031*1.53] sorted minimizers
[M::main::0.031*1.53] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.032*1.51] mid_occ = 1083
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.033*1.50] distinct minimizers: 64477 (80.41% are singletons); average occurrences: 3.777; average spacing: 5.361
[M::mm_idx_gen::0.001*7.70] collected minimizers
[M::mm_idx_gen::0.002*5.26] sorted minimizers
[M::main::0.002*5.25] loaded/built the index for 13 target sequence(s)
[M::mm_mapopt_update::0.002*5.00] mid_occ = 13
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 13
[M::mm_idx_stat::0.002*4.85] distinct minimizers: 1047 (70.11% are singletons); average occurrences: 1.644; average spacing: 5.373
[M::worker_pipeline::0.280*2.65] mapped 25040 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/data

pid[178467] 2023-06-24 23:45:31.194 INFO: cluster_eval: number of clusters = 15


[M::worker_pipeline::1.816*2.95] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.818 sec; CPU: 5.355 sec; Peak RSS: 0.169 GB


pid[178467] 2023-06-24 23:45:32.339 INFO: cluster_split: splitting on cid=cluster0 0/15
pid[178467] 2023-06-24 23:45:32.354 INFO: cluster_compute: computing pairwise distance matrix
pid[178467] 2023-06-24 23:45:32.401 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:32.403 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:32.403 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:32.404 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:32.404 INFO: clust_OPTICS: iter=0 using min_samples=5
pid[178467] 2023-06-24 23:45:32.405 INFO: clust_OPTICS: clusters=1 outliers=2 delta=2.5
pid[178467] 2023-06-24 23:45:32.405 INFO: clust_OPTICS: iter=1 using min_samples=3
pid[178467] 2023-06-24 23:45:32.406 INFO: clust_OPTICS: clusters=1 outliers=1 delta=2
pid[178467] 2023-06-24 23:45:32.407 INFO: n_clusters=1 n_unclustered=1 N=9
pid[178467] 2023-06-24 23:45:32.410 INFO: Making directory ./clusters/
pid[178467] 2023-06-24 23:45:32.441 INFO: cluster_spoa_merge: spoa on 0/1.0


[M::mm_idx_gen::0.001*4.64] collected minimizers
[M::mm_idx_gen::0.002*3.97] sorted minimizers
[M::main::0.002*3.95] loaded/built the index for 10 target sequence(s)
[M::mm_mapopt_update::0.002*3.86] mid_occ = 10
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 10
[M::mm_idx_stat::0.002*3.79] distinct minimizers: 945 (84.97% are singletons); average occurrences: 1.297; average spacing: 5.268
[M::worker_pipeline::0.003*3.34] mapped 10 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.003 sec; CPU: 0.010 sec; Peak RSS: 0.183 GB
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(

[M::worker_pipeline::0.067*2.78] mapped 278 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.069 sec; CPU: 0.188 sec; Peak RSS: 0.183 GB


pid[178468] 2023-06-24 23:45:32.648 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:32.692 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:32.740 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:32.740 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:32.741 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:32.741 INFO: clust_OPTICS: iter=0 using min_samples=139
pid[178467] 2023-06-24 23:45:32.745 INFO: clust_OPTICS: clusters=0 outliers=277 delta=69.5
pid[178467] 2023-06-24 23:45:32.745 INFO: clust_OPTICS: iter=1 using min_samples=70
pid[178467] 2023-06-24 23:45:32.748 INFO: clust_OPTICS: clusters=0 outliers=277 delta=69
pid[178467] 2023-06-24 23:45:32.748 INFO: clust_OPTICS: iter=2 using min_samples=36
pid[178467] 2023-06-24 23:45:32.750 INFO: clust_OPTICS: clusters=1 outliers=275 delta=34
pid[178467] 2023-06-24 23:45:32.750 INFO: clust_OPTICS: iter=3 using min_samples=19
pid[178467] 2023-06-24 23:45:32.755 INFO: clust_OPTICS: clusters=4 ou

[M::mm_idx_gen::0.002*2.80] collected minimizers
[M::mm_idx_gen::0.003*2.77] sorted minimizers
[M::main::0.003*2.77] loaded/built the index for 139 target sequence(s)
[M::mm_mapopt_update::0.004*2.63] mid_occ = 84
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 139
[M::mm_idx_stat::0.004*2.52] distinct minimizers: 8562 (84.54% are singletons); average occurrences: 1.960; average spacing: 5.367
[M::worker_pipeline::0.026*2.80] mapped 139 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.027 sec; CPU: 0.074 sec; Peak RSS: 0.183 GB


pid[178467] 2023-06-24 23:45:33.105 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178467] 2023-06-24 23:45:33.106 INFO: cluster_spoa_merge: reading consensus
pid[178467] 2023-06-24 23:45:33.121 INFO: cluster_split: splitting on cid=cluster3 3/15
pid[178467] 2023-06-24 23:45:33.148 INFO: cluster_compute: computing pairwise distance matrix
pid[178467] 2023-06-24 23:45:33.148 INFO: Making directory ./clusters/
pid[178468] 2023-06-24 23:45:33.182 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178468] 2023-06-24 23:45:33.182 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178468] 2023-06-24 23:45:33.261 INFO: clust_OPTICS: clusters=1 outliers=1995 delta=500
pid[178468] 2023-06-24 23:45:33.261 INFO: clust_OPTICS: iter=2 using min_samples=250


[M::mm_idx_gen::0.006*2.13] collected minimizers
[M::mm_idx_gen::0.008*2.31] sorted minimizers
[M::main::0.008*2.31] loaded/built the index for 419 target sequence(s)
[M::mm_mapopt_update::0.009*2.23] mid_occ = 187
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 419
[M::mm_idx_stat::0.009*2.17] distinct minimizers: 21694 (83.27% are singletons); average occurrences: 2.295; average spacing: 5.461
[M::worker_pipeline::0.122*2.84] mapped 419 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.124 sec; CPU: 0.349 sec; Peak RSS: 0.183 GB


pid[178468] 2023-06-24 23:45:33.325 INFO: clust_OPTICS: clusters=1 outliers=1917 delta=250
pid[178468] 2023-06-24 23:45:33.326 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178468] 2023-06-24 23:45:33.403 INFO: clust_OPTICS: clusters=1 outliers=599 delta=125
pid[178468] 2023-06-24 23:45:33.403 INFO: clust_OPTICS: iter=4 using min_samples=63
pid[178468] 2023-06-24 23:45:33.517 INFO: clust_OPTICS: clusters=1 outliers=359 delta=62
pid[178468] 2023-06-24 23:45:33.517 INFO: clust_OPTICS: iter=5 using min_samples=32
pid[178467] 2023-06-24 23:45:33.548 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:33.628 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:33.628 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:33.629 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:33.629 INFO: clust_OPTICS: iter=0 using min_samples=210
pid[178467] 2023-06-24 23:45:33.637 INFO: clust_OPTICS: clusters=1 outliers=403 delta=105.0
pid[178467] 2023-06-24 23:45:33.637 INFO: cl

[M::mm_idx_gen::0.008*1.93] collected minimizers
[M::mm_idx_gen::0.011*2.16] sorted minimizers
[M::main::0.011*2.16] loaded/built the index for 555 target sequence(s)
[M::mm_mapopt_update::0.011*2.09] mid_occ = 272
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 555
[M::mm_idx_stat::0.012*2.05] distinct minimizers: 28358 (83.54% are singletons); average occurrences: 2.345; average spacing: 5.435
[M::worker_pipeline::0.169*2.85] mapped 555 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.171 sec; CPU: 0.483 sec; Peak RSS: 0.183 GB


pid[178469] 2023-06-24 23:45:34.096 INFO: cluster_eval: number of clusters = 13
pid[178468] 2023-06-24 23:45:34.161 INFO: clust_OPTICS: clusters=1 outliers=62 delta=15
pid[178468] 2023-06-24 23:45:34.161 INFO: clust_OPTICS: iter=7 using min_samples=39
pid[178468] 2023-06-24 23:45:34.335 INFO: clust_OPTICS: clusters=1 outliers=203 delta=7
pid[178468] 2023-06-24 23:45:34.335 INFO: clust_OPTICS: iter=8 using min_samples=39
pid[178467] 2023-06-24 23:45:34.415 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:34.497 INFO: clust_OPTICS: clusters=1 outliers=203 delta=7
pid[178468] 2023-06-24 23:45:34.497 INFO: clust_OPTICS: iter=9 using min_samples=50
pid[178467] 2023-06-24 23:45:34.523 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:34.523 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:34.525 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:34.525 INFO: clust_OPTICS: iter=0 using min_samples=277
pid[178467] 2023-06-24 23:45:34.536 INFO: clust_OPTICS: cluster

[M::mm_idx_gen::0.002*3.61] collected minimizers
[M::mm_idx_gen::0.003*3.34] sorted minimizers
[M::main::0.003*3.34] loaded/built the index for 120 target sequence(s)
[M::mm_mapopt_update::0.004*3.13] mid_occ = 66
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 120
[M::mm_idx_stat::0.004*2.98] distinct minimizers: 8369 (85.65% are singletons); average occurrences: 1.682; average spacing: 5.498
[M::worker_pipeline::0.026*2.83] mapped 120 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.027 sec; CPU: 0.075 sec; Peak RSS: 0.183 GB


pid[178468] 2023-06-24 23:45:34.799 INFO: clust_OPTICS: clusters=1 outliers=286 delta=-11
pid[178468] 2023-06-24 23:45:34.799 INFO: clust_OPTICS: iter=11 using min_samples=55
pid[178467] 2023-06-24 23:45:34.861 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:34.884 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:34.884 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:34.884 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:34.885 INFO: clust_OPTICS: iter=0 using min_samples=60
pid[178467] 2023-06-24 23:45:34.887 INFO: clust_OPTICS: clusters=0 outliers=120 delta=30.0
pid[178467] 2023-06-24 23:45:34.887 INFO: clust_OPTICS: iter=1 using min_samples=30
pid[178467] 2023-06-24 23:45:34.889 INFO: clust_OPTICS: clusters=1 outliers=82 delta=30
pid[178467] 2023-06-24 23:45:34.889 INFO: clust_OPTICS: iter=2 using min_samples=15
pid[178467] 2023-06-24 23:45:34.892 INFO: clust_OPTICS: clusters=1 outliers=39 delta=15
pid[178467] 2023-06-24 23:45:34.892 INFO: clust_OP

[M::mm_idx_gen::0.003*2.85] collected minimizers
[M::mm_idx_gen::0.004*2.83] sorted minimizers
[M::main::0.004*2.83] loaded/built the index for 120 target sequence(s)
[M::mm_mapopt_update::0.005*2.68] mid_occ = 70
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 120
[M::mm_idx_stat::0.005*2.58] distinct minimizers: 7148 (84.39% are singletons); average occurrences: 1.960; average spacing: 5.555
[M::worker_pipeline::0.024*2.78] mapped 120 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.025 sec; CPU: 0.069 sec; Peak RSS: 0.174 GB
[M::mm_idx_gen::0.023*1.26] collected minimizers
[M::mm_idx_gen::0.029*1.58] sorted minimizers
[M::main::0.029*1.58] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.031*1.55] mid_occ = 1051
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.031*1

pid[178469] 2023-06-24 23:45:35.310 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:35.319 INFO: clust_OPTICS: clusters=1 outliers=342 delta=-2
pid[178468] 2023-06-24 23:45:35.319 INFO: clust_OPTICS: iter=15 using min_samples=58
pid[178469] 2023-06-24 23:45:35.332 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:45:35.332 INFO: Running OPTICS
pid[178469] 2023-06-24 23:45:35.332 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:45:35.333 INFO: clust_OPTICS: iter=0 using min_samples=60
pid[178469] 2023-06-24 23:45:35.335 INFO: clust_OPTICS: clusters=1 outliers=80 delta=30.0
pid[178469] 2023-06-24 23:45:35.335 INFO: clust_OPTICS: iter=1 using min_samples=30
pid[178469] 2023-06-24 23:45:35.337 INFO: clust_OPTICS: clusters=1 outliers=74 delta=30
pid[178469] 2023-06-24 23:45:35.337 INFO: clust_OPTICS: iter=2 using min_samples=15
pid[178469] 2023-06-24 23:45:35.339 INFO: clust_OPTICS: clusters=2 outliers=37 delta=15
pid[178469] 2023-06-24 23:45:35.339 INFO: clust_OPTI

[M::mm_idx_gen::0.002*4.02] collected minimizers
[M::mm_idx_gen::0.002*3.65] sorted minimizers
[M::main::0.002*3.64] loaded/built the index for 82 target sequence(s)
[M::mm_mapopt_update::0.003*3.43] mid_occ = 45
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 82
[M::mm_idx_stat::0.003*3.28] distinct minimizers: 5241 (84.05% are singletons); average occurrences: 1.918; average spacing: 5.529
[M::worker_pipeline::0.015*2.94] mapped 82 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.016 sec; CPU: 0.045 sec; Peak RSS: 0.174 GB
[M::mm_idx_gen::0.005*1.95] collected minimizers
[M::mm_idx_gen::0.008*2.21] sorted minimizers
[M::main::0.008*2.21] loaded/built the index for 340 target sequence(s)
[M::mm_mapopt_update::0.008*2.09] mid_occ = 163
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 340
[M::mm_idx_stat::0.009*2.03] d

pid[178468] 2023-06-24 23:45:35.517 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178468] 2023-06-24 23:45:35.518 INFO: cluster_spoa_merge: reading consensus
pid[178468] 2023-06-24 23:45:35.534 INFO: cluster_split: splitting on cid=cluster14 14/21
pid[178469] 2023-06-24 23:45:35.543 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:35.550 INFO: cluster_compute: computing pairwise distance matrix
pid[178468] 2023-06-24 23:45:35.550 INFO: Making directory ./clusters/
pid[178469] 2023-06-24 23:45:35.557 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:45:35.558 INFO: Running OPTICS
pid[178469] 2023-06-24 23:45:35.558 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:45:35.558 INFO: clust_OPTICS: iter=0 using min_samples=41
pid[178469] 2023-06-24 23:45:35.560 INFO: clust_OPTICS: clusters=0 outliers=82 delta=20.5
pid[178469] 2023-06-24 23:45:35.560 INFO: clust_OPTICS: iter=1 using min_samples=21
pid[178469] 2023-06-24 23:45:35.562 INFO: clust_OPTICS: clusters=1 outliers

[M::worker_pipeline::0.095*2.77] mapped 340 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.097 sec; CPU: 0.265 sec; Peak RSS: 0.150 GB


pid[178468] 2023-06-24 23:45:35.847 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:35.911 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:35.912 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:35.913 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:35.914 INFO: clust_OPTICS: iter=0 using min_samples=170
pid[178468] 2023-06-24 23:45:35.919 INFO: clust_OPTICS: clusters=0 outliers=339 delta=85.0
pid[178468] 2023-06-24 23:45:35.919 INFO: clust_OPTICS: iter=1 using min_samples=85
pid[178468] 2023-06-24 23:45:35.923 INFO: clust_OPTICS: clusters=0 outliers=339 delta=85
pid[178468] 2023-06-24 23:45:35.923 INFO: clust_OPTICS: iter=2 using min_samples=43
pid[178468] 2023-06-24 23:45:35.928 INFO: clust_OPTICS: clusters=3 outliers=228 delta=42
pid[178468] 2023-06-24 23:45:35.928 INFO: clust_OPTICS: iter=3 using min_samples=22
pid[178468] 2023-06-24 23:45:35.933 INFO: clust_OPTICS: clusters=1 outliers=133 delta=21
pid[178468] 2023-06-24 23:45:35.933 INFO: clust_O

[M::mm_idx_gen::0.024*1.23] collected minimizers
[M::mm_idx_gen::0.031*1.61] sorted minimizers
[M::main::0.031*1.61] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.033*1.57] mid_occ = 1046
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.034*1.55] distinct minimizers: 65192 (80.29% are singletons); average occurrences: 3.645; average spacing: 5.408


pid[178468] 2023-06-24 23:45:36.056 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178468] 2023-06-24 23:45:36.057 INFO: cluster_spoa_merge: reading consensus
pid[178468] 2023-06-24 23:45:36.073 INFO: cluster_split: splitting on cid=cluster15 15/21
pid[178468] 2023-06-24 23:45:36.090 INFO: cluster_compute: computing pairwise distance matrix
pid[178468] 2023-06-24 23:45:36.091 INFO: Making directory ./clusters/


[M::mm_idx_gen::0.009*1.56] collected minimizers
[M::mm_idx_gen::0.012*1.97] sorted minimizers
[M::main::0.012*1.97] loaded/built the index for 564 target sequence(s)
[M::mm_mapopt_update::0.013*1.91] mid_occ = 280
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 564
[M::mm_idx_stat::0.014*1.87] distinct minimizers: 31471 (83.69% are singletons); average occurrences: 2.175; average spacing: 5.355
[M::worker_pipeline::0.237*2.88] mapped 564 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.240 sec; CPU: 0.686 sec; Peak RSS: 0.150 GB


pid[178468] 2023-06-24 23:45:36.732 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:36.851 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:36.852 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:36.854 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:36.854 INFO: clust_OPTICS: iter=0 using min_samples=282
pid[178468] 2023-06-24 23:45:36.866 INFO: clust_OPTICS: clusters=0 outliers=564 delta=141.0
pid[178468] 2023-06-24 23:45:36.866 INFO: clust_OPTICS: iter=1 using min_samples=141
pid[178468] 2023-06-24 23:45:36.875 INFO: clust_OPTICS: clusters=0 outliers=564 delta=141
pid[178468] 2023-06-24 23:45:36.875 INFO: clust_OPTICS: iter=2 using min_samples=71
pid[178468] 2023-06-24 23:45:36.884 INFO: clust_OPTICS: clusters=2 outliers=524 delta=70
pid[178468] 2023-06-24 23:45:36.884 INFO: clust_OPTICS: iter=3 using min_samples=36
pid[178468] 2023-06-24 23:45:36.895 INFO: clust_OPTICS: clusters=1 outliers=311 delta=35
pid[178468] 2023-06-24 23:45:36.895 INFO: clus

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is tryi

pid[178468] 2023-06-24 23:45:36.939 INFO: clust_OPTICS: clusters=2 outliers=429 delta=9
pid[178468] 2023-06-24 23:45:36.939 INFO: clust_OPTICS: iter=9 using min_samples=60
pid[178468] 2023-06-24 23:45:36.949 INFO: clust_OPTICS: clusters=3 outliers=505 delta=4
pid[178468] 2023-06-24 23:45:36.949 INFO: clust_OPTICS: iter=10 using min_samples=66
pid[178468] 2023-06-24 23:45:36.957 INFO: clust_OPTICS: clusters=2 outliers=524 delta=-6
pid[178468] 2023-06-24 23:45:36.958 INFO: clust_OPTICS: iter=11 using min_samples=57
pid[178468] 2023-06-24 23:45:36.967 INFO: clust_OPTICS: clusters=3 outliers=495 delta=-3
pid[178468] 2023-06-24 23:45:36.967 INFO: clust_OPTICS: iter=12 using min_samples=53
pid[178468] 2023-06-24 23:45:36.976 INFO: clust_OPTICS: clusters=2 outliers=434 delta=4
pid[178468] 2023-06-24 23:45:36.976 INFO: clust_OPTICS: iter=13 using min_samples=59
pid[178468] 2023-06-24 23:45:36.984 INFO: clust_OPTICS: clusters=3 outliers=505 delta=2
pid[178468] 2023-06-24 23:45:36.984 INFO: clus

[M::worker_pipeline::1.877*2.95] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.881 sec; CPU: 5.533 sec; Peak RSS: 0.183 GB
[M::mm_idx_gen::0.006*1.74] collected minimizers
[M::mm_idx_gen::0.009*2.03] sorted minimizers
[M::main::0.009*2.03] loaded/built the index for 399 target sequence(s)
[M::mm_mapopt_update::0.009*1.97] mid_occ = 205
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 399
[M::mm_idx_stat::0.010*1.93] distinct minimizers: 23585 (84.55% are singletons); average occurrences: 2.044; average spacing: 5.397


pid[178468] 2023-06-24 23:45:37.156 INFO: cluster_compute: computing pairwise distance matrix
pid[178468] 2023-06-24 23:45:37.156 INFO: Making directory ./clusters/


[M::worker_pipeline::0.124*2.73] mapped 399 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.126 sec; CPU: 0.341 sec; Peak RSS: 0.151 GB
[M::worker_pipeline::1.626*2.92] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.632 sec; CPU: 4.758 sec; Peak RSS: 0.173 GB


pid[178468] 2023-06-24 23:45:37.542 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:37.620 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:37.620 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:37.621 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:37.621 INFO: clust_OPTICS: iter=0 using min_samples=200
pid[178468] 2023-06-24 23:45:37.628 INFO: clust_OPTICS: clusters=0 outliers=399 delta=100.0
pid[178468] 2023-06-24 23:45:37.628 INFO: clust_OPTICS: iter=1 using min_samples=100
pid[178468] 2023-06-24 23:45:37.633 INFO: clust_OPTICS: clusters=0 outliers=399 delta=100
pid[178468] 2023-06-24 23:45:37.633 INFO: clust_OPTICS: iter=2 using min_samples=50
pid[178468] 2023-06-24 23:45:37.638 INFO: clust_OPTICS: clusters=1 outliers=310 delta=50
pid[178468] 2023-06-24 23:45:37.638 INFO: clust_OPTICS: iter=3 using min_samples=25
pid[178468] 2023-06-24 23:45:37.646 INFO: clust_OPTICS: clusters=1 outliers=161 delta=25
pid[178468] 2023-06-24 23:45:37.646 INFO: clus

[M::mm_idx_gen::0.011*1.49] collected minimizers
[M::mm_idx_gen::0.015*1.83] sorted minimizers
[M::main::0.015*1.83] loaded/built the index for 781 target sequence(s)
[M::mm_mapopt_update::0.016*1.77] mid_occ = 358
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 781
[M::mm_idx_stat::0.016*1.75] distinct minimizers: 41658 (83.37% are singletons); average occurrences: 2.283; average spacing: 5.374
[M::worker_pipeline::0.293*2.88] mapped 781 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.295 sec; CPU: 0.844 sec; Peak RSS: 0.151 GB


pid[178467] 2023-06-24 23:45:38.512 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:38.582 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:45:38.695 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:38.752 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:38.752 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:38.755 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:38.755 INFO: clust_OPTICS: iter=0 using min_samples=391
pid[178468] 2023-06-24 23:45:38.778 INFO: clust_OPTICS: clusters=0 outliers=781 delta=195.5
pid[178468] 2023-06-24 23:45:38.778 INFO: clust_OPTICS: iter=1 using min_samples=196
pid[178468] 2023-06-24 23:45:38.794 INFO: clust_OPTICS: clusters=1 outliers=701 delta=195
pid[178468] 2023-06-24 23:45:38.794 INFO: clust_OPTICS: iter=2 using min_samples=99
pid[178468] 2023-06-24 23:45:38.806 INFO: clust_OPTICS: clusters=2 outliers=657 delta=97
pid[178468] 2023-06-24 23:45:38.806 INFO: clust_OPTICS: iter=3 using min_samples=

[M::mm_idx_gen::0.012*1.73] collected minimizers
[M::mm_idx_gen::0.015*1.98] sorted minimizers
[M::main::0.015*1.98] loaded/built the index for 830 target sequence(s)
[M::mm_mapopt_update::0.016*1.92] mid_occ = 426
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 830
[M::mm_idx_stat::0.017*1.88] distinct minimizers: 35402 (82.74% are singletons); average occurrences: 2.765; average spacing: 5.576


pid[178467] 2023-06-24 23:45:39.221 INFO: clust_OPTICS: clusters=1 outliers=1587 delta=500
pid[178467] 2023-06-24 23:45:39.221 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178469] 2023-06-24 23:45:39.305 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178469] 2023-06-24 23:45:39.305 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178467] 2023-06-24 23:45:39.310 INFO: clust_OPTICS: clusters=1 outliers=802 delta=250
pid[178467] 2023-06-24 23:45:39.310 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178467] 2023-06-24 23:45:39.409 INFO: clust_OPTICS: clusters=1 outliers=519 delta=125
pid[178467] 2023-06-24 23:45:39.409 INFO: clust_OPTICS: iter=4 using min_samples=63
pid[178469] 2023-06-24 23:45:39.411 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500
pid[178469] 2023-06-24 23:45:39.411 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178469] 2023-06-24 23:45:39.500 INFO: clust_OPTICS: clusters=2 outliers=1257 delta=250
pid[178469] 2023-06-24 23:45:3

[M::worker_pipeline::0.414*2.89] mapped 830 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.419 sec; CPU: 1.202 sec; Peak RSS: 0.164 GB


pid[178469] 2023-06-24 23:45:39.739 INFO: clust_OPTICS: clusters=1 outliers=1670 delta=62
pid[178469] 2023-06-24 23:45:39.739 INFO: clust_OPTICS: iter=6 using min_samples=405
pid[178467] 2023-06-24 23:45:39.784 INFO: clust_OPTICS: clusters=1 outliers=134 delta=31
pid[178467] 2023-06-24 23:45:39.784 INFO: clust_OPTICS: iter=6 using min_samples=17
pid[178469] 2023-06-24 23:45:39.816 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=-93
pid[178469] 2023-06-24 23:45:39.816 INFO: clust_OPTICS: iter=7 using min_samples=265
pid[178469] 2023-06-24 23:45:39.895 INFO: clust_OPTICS: clusters=2 outliers=1257 delta=-47
pid[178469] 2023-06-24 23:45:39.895 INFO: clust_OPTICS: iter=8 using min_samples=195


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a 

pid[178469] 2023-06-24 23:45:39.972 INFO: clust_OPTICS: clusters=3 outliers=956 delta=70
pid[178469] 2023-06-24 23:45:39.973 INFO: clust_OPTICS: iter=9 using min_samples=160
pid[178468] 2023-06-24 23:45:40.030 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:45:40.055 INFO: clust_OPTICS: clusters=2 outliers=697 delta=35
pid[178469] 2023-06-24 23:45:40.055 INFO: clust_OPTICS: iter=10 using min_samples=212
pid[178469] 2023-06-24 23:45:40.128 INFO: clust_OPTICS: clusters=3 outliers=1081 delta=17
pid[178469] 2023-06-24 23:45:40.128 INFO: clust_OPTICS: iter=11 using min_samples=238
pid[178468] 2023-06-24 23:45:40.185 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:40.185 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:40.190 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:40.190 INFO: clust_OPTICS: iter=0 using min_samples=415
pid[178468] 2023-06-24 23:45:40.214 INFO: clust_OPTICS: clusters=0 outliers=830 delta=207.5
pid[178468] 2023-06-24 23:45:40.214 INFO: 

[M::mm_idx_gen::0.007*1.80] collected minimizers
[M::mm_idx_gen::0.010*2.06] sorted minimizers
[M::main::0.010*2.06] loaded/built the index for 439 target sequence(s)
[M::mm_mapopt_update::0.011*1.98] mid_occ = 187
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 439
[M::mm_idx_stat::0.011*1.93] distinct minimizers: 23253 (82.79% are singletons); average occurrences: 2.248; average spacing: 5.556
[M::worker_pipeline::0.139*2.84] mapped 439 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.143 sec; CPU: 0.400 sec; Peak RSS: 0.167 GB
[M::mm_idx_gen::0.002*3.64] collected minimizers
[M::mm_idx_gen::0.003*3.40] sorted minimizers
[M::main::0.003*3.39] loaded/built the index for 150 target sequence(s)
[M::mm_mapopt_update::0.004*3.13] mid_occ = 82
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 150
[M::mm_idx_stat::0.004*3.0

pid[178469] 2023-06-24 23:45:40.987 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:40.994 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:45:41.013 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:45:41.013 INFO: Running OPTICS
pid[178469] 2023-06-24 23:45:41.014 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:45:41.014 INFO: clust_OPTICS: iter=0 using min_samples=75
pid[178469] 2023-06-24 23:45:41.016 INFO: clust_OPTICS: clusters=1 outliers=91 delta=37.5
pid[178469] 2023-06-24 23:45:41.017 INFO: clust_OPTICS: iter=1 using min_samples=38
pid[178469] 2023-06-24 23:45:41.019 INFO: clust_OPTICS: clusters=1 outliers=54 delta=37
pid[178469] 2023-06-24 23:45:41.019 INFO: clust_OPTICS: iter=2 using min_samples=20
pid[178469] 2023-06-24 23:45:41.022 INFO: clust_OPTICS: clusters=1 outliers=24 delta=18
pid[178469] 2023-06-24 23:45:41.022 INFO: clust_OPTICS: iter=3 using min_samples=11
pid[178469] 2023-06-24 23:45:41.027 INFO: clust_OPTICS: clusters=2 outlie

[M::mm_idx_gen::0.003*2.90] collected minimizers
[M::mm_idx_gen::0.005*2.88] sorted minimizers
[M::main::0.005*2.87] loaded/built the index for 149 target sequence(s)
[M::mm_mapopt_update::0.005*2.74] mid_occ = 96
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 149
[M::mm_idx_stat::0.005*2.65] distinct minimizers: 7652 (84.49% are singletons); average occurrences: 2.302; average spacing: 5.388
[M::worker_pipeline::0.038*2.74] mapped 149 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.039 sec; CPU: 0.106 sec; Peak RSS: 0.156 GB


pid[178468] 2023-06-24 23:45:41.198 INFO: clust_OPTICS: clusters=2 outliers=10 delta=3
pid[178468] 2023-06-24 23:45:41.198 INFO: clust_OPTICS: iter=7 using min_samples=10
pid[178468] 2023-06-24 23:45:41.217 INFO: clust_OPTICS: clusters=3 outliers=48 delta=1
pid[178468] 2023-06-24 23:45:41.218 INFO: clust_OPTICS: iter=8 using min_samples=12
pid[178468] 2023-06-24 23:45:41.235 INFO: clust_OPTICS: clusters=2 outliers=62 delta=-2
pid[178468] 2023-06-24 23:45:41.235 INFO: clust_OPTICS: iter=9 using min_samples=9
pid[178468] 2023-06-24 23:45:41.257 INFO: clust_OPTICS: clusters=4 outliers=43 delta=-1
pid[178468] 2023-06-24 23:45:41.257 INFO: clust_OPTICS: iter=10 using min_samples=8
pid[178468] 2023-06-24 23:45:41.286 INFO: clust_OPTICS: clusters=3 outliers=19 delta=1
pid[178468] 2023-06-24 23:45:41.286 INFO: n_clusters=4 n_unclustered=43 N=439
pid[178468] 2023-06-24 23:45:41.291 INFO: Making directory ./clusters/
pid[178469] 2023-06-24 23:45:41.319 INFO: preparing precomputed data
pid[178469

[M::mm_idx_gen::0.005*2.26] collected minimizers
[M::mm_idx_gen::0.007*2.39] sorted minimizers
[M::main::0.007*2.39] loaded/built the index for 268 target sequence(s)
[M::mm_mapopt_update::0.007*2.28] mid_occ = 112
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 268
[M::mm_idx_stat::0.008*2.22] distinct minimizers: 15778 (82.73% are singletons); average occurrences: 1.999; average spacing: 5.558
[M::worker_pipeline::0.069*2.78] mapped 268 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.070 sec; CPU: 0.193 sec; Peak RSS: 0.167 GB


pid[178468] 2023-06-24 23:45:41.647 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:41.704 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:41.704 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:41.705 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:41.705 INFO: clust_OPTICS: iter=0 using min_samples=134
pid[178468] 2023-06-24 23:45:41.709 INFO: clust_OPTICS: clusters=0 outliers=268 delta=67.0
pid[178468] 2023-06-24 23:45:41.709 INFO: clust_OPTICS: iter=1 using min_samples=67
pid[178468] 2023-06-24 23:45:41.713 INFO: clust_OPTICS: clusters=1 outliers=259 delta=67
pid[178468] 2023-06-24 23:45:41.713 INFO: clust_OPTICS: iter=2 using min_samples=34
pid[178468] 2023-06-24 23:45:41.717 INFO: clust_OPTICS: clusters=1 outliers=205 delta=33
pid[178468] 2023-06-24 23:45:41.717 INFO: clust_OPTICS: iter=3 using min_samples=18
pid[178468] 2023-06-24 23:45:41.722 INFO: clust_OPTICS: clusters=2 outliers=167 delta=16
pid[178468] 2023-06-24 23:45:41.722 INFO: clust_O

[M::mm_idx_gen::0.025*1.27] collected minimizers
[M::mm_idx_gen::0.032*1.63] sorted minimizers
[M::main::0.032*1.63] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.033*1.60] mid_occ = 1012
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.034*1.58] distinct minimizers: 62228 (80.28% are singletons); average occurrences: 3.753; average spacing: 5.488


pid[178468] 2023-06-24 23:45:41.854 INFO: Making directory ./clusters/


[M::mm_idx_gen::0.002*4.74] collected minimizers
[M::mm_idx_gen::0.002*4.18] sorted minimizers
[M::main::0.002*4.17] loaded/built the index for 55 target sequence(s)
[M::mm_mapopt_update::0.003*3.94] mid_occ = 47
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 55
[M::mm_idx_stat::0.003*3.81] distinct minimizers: 3155 (64.66% are singletons); average occurrences: 2.292; average spacing: 5.478
[M::worker_pipeline::0.489*2.79] mapped 20929 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.489 sec; CPU: 1.363 sec; Peak RSS: 0.167 GB
[M::worker_pipeline::1.854*2.95] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.858 sec; CPU: 5.466 sec; Peak RSS: 0.165 GB


pid[178467] 2023-06-24 23:45:44.255 INFO: clust_OPTICS: clusters=1 outliers=2 delta=3
pid[178467] 2023-06-24 23:45:44.255 INFO: clust_OPTICS: iter=9 using min_samples=6
pid[178469] 2023-06-24 23:45:44.815 INFO: preparing precomputed data
pid[178469] 2023-06-24 23:45:45.176 INFO: cluster_compute: running optics
pid[178469] 2023-06-24 23:45:45.177 INFO: Running OPTICS
pid[178469] 2023-06-24 23:45:45.196 INFO: max_eps = 0.5
pid[178469] 2023-06-24 23:45:45.196 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178469] 2023-06-24 23:45:45.326 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178469] 2023-06-24 23:45:45.326 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178469] 2023-06-24 23:45:45.431 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500
pid[178469] 2023-06-24 23:45:45.431 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178469] 2023-06-24 23:45:45.498 INFO: clust_OPTICS: clusters=1 outliers=1172 delta=250
pid[178469] 2023-06-24 23:45:45.499 INFO: cl

[M::mm_idx_gen::0.015*1.50] collected minimizers
[M::mm_idx_gen::0.019*1.78] sorted minimizers
[M::main::0.019*1.78] loaded/built the index for 1108 target sequence(s)
[M::mm_mapopt_update::0.020*1.73] mid_occ = 574
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1108
[M::mm_idx_stat::0.021*1.70] distinct minimizers: 44423 (81.46% are singletons); average occurrences: 2.967; average spacing: 5.366


pid[178468] 2023-06-24 23:45:47.631 INFO: cluster_eval: number of clusters = 55


[M::worker_pipeline::0.516*2.90] mapped 1108 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.518 sec; CPU: 1.495 sec; Peak RSS: 0.163 GB


pid[178467] 2023-06-24 23:45:48.603 INFO: preparing precomputed data
pid[178467] 2023-06-24 23:45:48.803 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:48.803 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:48.809 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:48.810 INFO: clust_OPTICS: iter=0 using min_samples=554
pid[178468] 2023-06-24 23:45:48.847 INFO: Running kmeans with n_clusters = 4
pid[178467] 2023-06-24 23:45:48.849 INFO: clust_OPTICS: clusters=0 outliers=1108 delta=277.0
pid[178467] 2023-06-24 23:45:48.849 INFO: clust_OPTICS: iter=1 using min_samples=277
pid[178467] 2023-06-24 23:45:48.882 INFO: clust_OPTICS: clusters=0 outliers=1108 delta=277
pid[178467] 2023-06-24 23:45:48.883 INFO: clust_OPTICS: iter=2 using min_samples=139
pid[178467] 2023-06-24 23:45:48.912 INFO: clust_OPTICS: clusters=1 outliers=970 delta=138
pid[178467] 2023-06-24 23:45:48.912 INFO: clust_OPTICS: iter=3 using min_samples=70
pid[178468] 2023-06-24 23:45:48.942 INFO: Getting resu

[M::mm_idx_gen::0.032*1.15] collected minimizers
[M::mm_idx_gen::0.041*1.50] sorted minimizers
[M::main::0.041*1.50] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.044*1.46] mid_occ = 444
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.046*1.44] distinct minimizers: 117912 (84.60% are singletons); average occurrences: 2.042; average spacing: 5.438


pid[178467] 2023-06-24 23:45:49.732 INFO: clust_OPTICS: clusters=2 outliers=8 delta=4
pid[178467] 2023-06-24 23:45:49.732 INFO: clust_OPTICS: iter=8 using min_samples=13
pid[178467] 2023-06-24 23:45:49.855 INFO: clust_OPTICS: clusters=1 outliers=85 delta=2
pid[178467] 2023-06-24 23:45:49.855 INFO: clust_OPTICS: iter=9 using min_samples=4


[M::worker_pipeline::0.791*2.85] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 0.796 sec; CPU: 2.262 sec; Peak RSS: 0.166 GB


pid[178469] 2023-06-24 23:45:50.118 INFO: clust_OPTICS: clusters=1 outliers=4 delta=3
pid[178469] 2023-06-24 23:45:50.118 INFO: clust_OPTICS: iter=9 using min_samples=11
pid[178467] 2023-06-24 23:45:50.504 INFO: clust_OPTICS: clusters=1 outliers=2 delta=-3
pid[178467] 2023-06-24 23:45:50.505 INFO: n_clusters=3 n_unclustered=48 N=1108
pid[178467] 2023-06-24 23:45:50.512 INFO: Making directory ./clusters/
pid[178467] 2023-06-24 23:45:50.561 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178467] 2023-06-24 23:45:50.561 INFO: cluster_spoa_merge: reading consensus
pid[178467] 2023-06-24 23:45:50.576 INFO: cluster_split: splitting on cid=cluster8 8/15
pid[178467] 2023-06-24 23:45:50.601 INFO: cluster_compute: computing pairwise distance matrix
pid[178467] 2023-06-24 23:45:50.601 INFO: Making directory ./clusters/


[M::mm_idx_gen::0.025*1.22] collected minimizers
[M::mm_idx_gen::0.032*1.56] sorted minimizers
[M::main::0.032*1.56] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.033*1.54] mid_occ = 1220
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.034*1.53] distinct minimizers: 56978 (79.56% are singletons); average occurrences: 4.122; average spacing: 5.550


pid[178469] 2023-06-24 23:45:50.851 INFO: clust_OPTICS: clusters=1 outliers=31 delta=1
pid[178469] 2023-06-24 23:45:50.851 INFO: clust_OPTICS: iter=10 using min_samples=11
pid[178468] 2023-06-24 23:45:51.119 INFO: preparing precomputed data
pid[178468] 2023-06-24 23:45:51.516 INFO: cluster_compute: running optics
pid[178468] 2023-06-24 23:45:51.516 INFO: Running OPTICS
pid[178468] 2023-06-24 23:45:51.540 INFO: max_eps = 0.5
pid[178468] 2023-06-24 23:45:51.540 INFO: clust_OPTICS: iter=0 using min_samples=997
pid[178469] 2023-06-24 23:45:51.557 INFO: clust_OPTICS: clusters=1 outliers=31 delta=1
pid[178469] 2023-06-24 23:45:51.557 INFO: clust_OPTICS: iter=11 using min_samples=13
pid[178468] 2023-06-24 23:45:51.715 INFO: clust_OPTICS: clusters=0 outliers=1994 delta=498.5
pid[178468] 2023-06-24 23:45:51.715 INFO: clust_OPTICS: iter=1 using min_samples=499
pid[178468] 2023-06-24 23:45:51.847 INFO: clust_OPTICS: clusters=0 outliers=1994 delta=498
pid[178468] 2023-06-24 23:45:51.847 INFO: clus

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is tryi

pid[178468] 2023-06-24 23:45:52.068 INFO: clust_OPTICS: clusters=4 outliers=1639 delta=62
pid[178468] 2023-06-24 23:45:52.068 INFO: clust_OPTICS: iter=5 using min_samples=33
pid[178468] 2023-06-24 23:45:52.234 INFO: clust_OPTICS: clusters=2 outliers=935 delta=31
pid[178468] 2023-06-24 23:45:52.234 INFO: clust_OPTICS: iter=6 using min_samples=79
pid[178468] 2023-06-24 23:45:52.306 INFO: clust_OPTICS: clusters=2 outliers=1757 delta=15
pid[178468] 2023-06-24 23:45:52.306 INFO: clust_OPTICS: iter=7 using min_samples=79
pid[178469] 2023-06-24 23:45:52.320 INFO: clust_OPTICS: clusters=1 outliers=37 delta=-2
pid[178469] 2023-06-24 23:45:52.320 INFO: clust_OPTICS: iter=12 using min_samples=13
pid[178468] 2023-06-24 23:45:52.383 INFO: clust_OPTICS: clusters=2 outliers=1757 delta=15
pid[178468] 2023-06-24 23:45:52.383 INFO: clust_OPTICS: iter=8 using min_samples=102
pid[178468] 2023-06-24 23:45:52.448 INFO: clust_OPTICS: clusters=1 outliers=1836 delta=-23
pid[178468] 2023-06-24 23:45:52.448 INFO

[M::worker_pipeline::2.223*2.95] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 2.227 sec; CPU: 6.563 sec; Peak RSS: 0.195 GB


pid[178468] 2023-06-24 23:45:53.240 INFO: clust_OPTICS: clusters=4 outliers=1639 delta=-1
pid[178468] 2023-06-24 23:45:53.240 INFO: n_clusters=4 n_unclustered=1412 N=1994
pid[178468] 2023-06-24 23:45:53.251 INFO: Making directory ./clusters/
pid[178468] 2023-06-24 23:45:53.353 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178468] 2023-06-24 23:45:53.353 INFO: cluster_spoa_merge: reading consensus
pid[178468] 2023-06-24 23:45:53.375 INFO: Making directory ./clusters/
pid[178469] 2023-06-24 23:45:53.676 INFO: clust_OPTICS: clusters=1 outliers=45 delta=-1
pid[178469] 2023-06-24 23:45:53.676 INFO: clust_OPTICS: iter=14 using min_samples=14
pid[178469] 2023-06-24 23:45:54.171 INFO: clust_OPTICS: clusters=1 outliers=45 delta=-1
pid[178469] 2023-06-24 23:45:54.171 INFO: n_clusters=2 n_unclustered=22 N=2000
pid[178469] 2023-06-24 23:45:54.180 INFO: Making directory ./clusters/
pid[178469] 2023-06-24 23:45:54.220 INFO: cluster_spoa_merge: spoa on 0/1.0
pid[178469] 2023-06-24 23:45:54.220 INFO: cl

[M::mm_idx_gen::0.002*4.47] collected minimizers
[M::mm_idx_gen::0.002*4.04] sorted minimizers
[M::main::0.002*4.03] loaded/built the index for 59 target sequence(s)
[M::mm_mapopt_update::0.002*3.80] mid_occ = 51
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 59
[M::mm_idx_stat::0.003*3.64] distinct minimizers: 3260 (65.12% are singletons); average occurrences: 2.387; average spacing: 5.461
[M::mm_idx_gen::0.025*1.18] collected minimizers
[M::mm_idx_gen::0.031*1.53] sorted minimizers
[M::main::0.031*1.53] loaded/built the index for 2000 target sequence(s)
[M::mm_mapopt_update::0.033*1.51] mid_occ = 1018
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 2000
[M::mm_idx_stat::0.033*1.49] distinct minimizers: 61785 (80.05% are singletons); average occurrences: 3.779; average spacing: 5.520


pid[178467] 2023-06-24 23:45:54.805 INFO: cluster_compute: running optics
pid[178467] 2023-06-24 23:45:54.806 INFO: Running OPTICS
pid[178467] 2023-06-24 23:45:54.824 INFO: max_eps = 0.5
pid[178467] 2023-06-24 23:45:54.824 INFO: clust_OPTICS: iter=0 using min_samples=1000
pid[178467] 2023-06-24 23:45:54.936 INFO: clust_OPTICS: clusters=0 outliers=2000 delta=500.0
pid[178467] 2023-06-24 23:45:54.936 INFO: clust_OPTICS: iter=1 using min_samples=500
pid[178467] 2023-06-24 23:45:55.018 INFO: clust_OPTICS: clusters=1 outliers=1002 delta=500
pid[178467] 2023-06-24 23:45:55.018 INFO: clust_OPTICS: iter=2 using min_samples=250
pid[178467] 2023-06-24 23:45:55.086 INFO: clust_OPTICS: clusters=1 outliers=923 delta=250
pid[178467] 2023-06-24 23:45:55.087 INFO: clust_OPTICS: iter=3 using min_samples=125
pid[178467] 2023-06-24 23:45:55.176 INFO: clust_OPTICS: clusters=1 outliers=280 delta=125
pid[178467] 2023-06-24 23:45:55.176 INFO: clust_OPTICS: iter=4 using min_samples=63
pid[178467] 2023-06-24 2

[M::worker_pipeline::1.727*2.94] mapped 2000 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --dual=no --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 1.731 sec; CPU: 5.078 sec; Peak RSS: 0.167 GB
[M::worker_pipeline::2.006*2.91] mapped 20929 sequences
[M::main] Version: 2.17-r941
[M::main] CMD: minimap2 -c -k15 -w10 -p 0.9 -D --for-only -o ./clusters/results.paf ./clusters/database.fq ./clusters/read1.fq
[M::main] Real time: 2.008 sec; CPU: 5.845 sec; Peak RSS: 0.143 GB


pid[178467] 2023-06-24 23:45:56.655 INFO: clust_OPTICS: clusters=1 outliers=9 delta=7
pid[178467] 2023-06-24 23:45:56.655 INFO: clust_OPTICS: iter=8 using min_samples=7


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_o['outlier'] = True
  df_c = df_c.append(cout[['id','sequence','split']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_q['id'] = df_q['id'].astype(str)
A value is tryi

KeyboardInterrupt: 

e/pascal/anaconda3/envs/GEANS/lib/python3.8/site-packages/sklearn/cluster/_optics.py", line 595, in _set_reach_dist
    indices = nbrs.radius_neighbors(P, radius=max_eps, return_distance=False)[0]
  File "/home/pascal/anaconda3/envs/GEANS/lib/python3.8/site-packages/sklearn/neighbors/_base.py", line 1084, in radius_neighbors
    neigh_ind_list = sum(chunked_results, [])
  File "/home/pascal/anaconda3/envs/GEANS/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1717, in pairwise_distances_chunked
    D_chunk = pairwise_distances(X_chunk, Y, metric=metric, n_jobs=n_jobs, **kwds)
  File "/home/pascal/anaconda3/envs/GEANS/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 1851, in pairwise_distances
    X, _ = check_pairwise_arrays(
  File "/home/pascal/anaconda3/envs/GEANS/lib/python3.8/site-packages/sklearn/metrics/pairwise.py", line 164, in check_pairwise_arrays
    Y = check_array(
  File "/home/pascal/anaconda3/envs/GEANS/lib/python3.8/site-packages/sklearn/ut

## Taxonomic assignment using PR2

In [35]:
path_to_blastdb = "/home/pascal/Documents/GEANS/eDNA_18S/PR2_5.0.0/pr2_version_5.0.0_SSU_taxo_long.fasta"
numthreads = 8
mts = 10
pct_ident = 90
db = 'PR2'

for base in base_name:
    file_path = './results/qc/' + base + '/' + base + '_clusters_cut.fasta'
    if os.path.exists(file_path):
        print("Running blastn on", base)
        output_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn.csv'

        command = [
            "blastn",
            "-db", path_to_blastdb,
            "-query", file_path,
            "-task", "blastn",
            "-dust", "no",
            "-num_threads", str(numthreads),
            "-outfmt", "7 delim=, sseqid stitle qacc sacc evalue bitscore length pident",
            "-max_target_seqs", str(mts),
            "-perc_identity", str(pct_ident),
            "-out", output_csv
        ]

        subprocess.run(command)

Running blastn on barcode01




Running blastn on barcode02




Running blastn on barcode03




Running blastn on barcode04




Running blastn on barcode05




Running blastn on barcode06




Running blastn on barcode07




Running blastn on barcode08
Running blastn on barcode09
Running blastn on barcode10




Running blastn on barcode11
Running blastn on barcode12




In [36]:
db = 'PR2'


for base in base_name:
    input_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn.csv'
    output_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn2.csv'
    if os.path.exists(input_csv):
        with open(input_csv, 'r') as infile, open(output_csv, 'w') as outfile:
            for line in infile:
                if not line.startswith('#'):
                    outfile.write(line)



In [37]:
db = 'PR2'

for base in base_name:
    input_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn.csv'
    output_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn2.csv'
    if os.path.exists(input_csv):
        with open(input_csv, 'r') as infile, open(output_csv, 'w') as outfile:
            for line in infile:
                if not line.startswith('#'):
                    comma_count = line.count(',')
                    if comma_count == 10:
                        comma_indices = []
                        for i, char in enumerate(line):
                            if char == ',':
                                comma_indices.append(i)

                        if len(comma_indices) >= 6:
                            line = line[:comma_indices[0]] + line[comma_indices[0]+1:]
                            line = line[:comma_indices[2]-1] + line[comma_indices[2]:]
                            line = line[:comma_indices[5]-2] + line[comma_indices[5]-1:]
                    elif comma_count == 13:
                        comma_indices = []
                        for i, char in enumerate(line):
                            if char == ',':
                                comma_indices.append(i)

                        indices_to_remove = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  # Create a list to track the indices to remove

                        if len(comma_indices) >= 13:
                            indices_to_remove[0] = 1  # Mark the first comma to remove
                            indices_to_remove[1] = 1  # Mark the second comma to remove
                            indices_to_remove[3] = 1  # Mark the fourth comma to remove
                            indices_to_remove[4] = 1  # Mark the fifth comma to remove
                            indices_to_remove[7] = 1  # Mark the eighth comma to remove
                            indices_to_remove[8] = 1  # Mark the ninth comma to remove

                        updated_line = ""
                        for i, char in enumerate(line):
                            if char == ',' and indices_to_remove[comma_indices.index(i)] == 1:
                                continue
                            updated_line += char

                        line = updated_line
                    outfile.write(line)

In [38]:

db = 'PR2'




for base in base_name:
    input_csv = './results/qc/' + base + '/' + base + '_' + db + '_blastn2.csv'
    print(input_csv)
    output_csv = './results/qc/' + base + '/' + base + '_' + db + '_ASV.csv'
    if os.path.exists(input_csv) and os.path.getsize(input_csv) > 0:
        # load file
        df = pd.read_csv(input_csv, sep=',')
    
        # add column names
        df.columns=['accession', 'taxonomic_annotation', 'cluster', 'accession', 'evalue', 'bitscore', 'alignment_length', 'percentage_identity']

        # select only rows with alignment length >= 500 bp
        df2 = df[df['alignment_length'] >= 500]

        # arrange rows by match percentage
        df3 = df2.sort_values(by=['percentage_identity'], ascending=False)

        # keep only first row of each ASV
        df4 = df3.drop_duplicates(subset=['cluster'], keep='first', inplace=False, ignore_index=False)

        # add sample name information
        df4['#sample_name'] = base

        df4['taxonomy'] = df4['taxonomic_annotation'].replace('"', '')

        df5 = df4[['#sample_name', 'cluster', 'accession', 'evalue', 'bitscore', 'alignment_length', 'percentage_identity', 'taxonomic_annotation']]


        df5.to_csv(output_csv, sep=';', index=False, header=False)


./results/qc/barcode01/barcode01_PR2_blastn2.csv
./results/qc/barcode02/barcode02_PR2_blastn2.csv
./results/qc/barcode03/barcode03_PR2_blastn2.csv
./results/qc/barcode04/barcode04_PR2_blastn2.csv
./results/qc/barcode05/barcode05_PR2_blastn2.csv
./results/qc/barcode06/barcode06_PR2_blastn2.csv
./results/qc/barcode07/barcode07_PR2_blastn2.csv
./results/qc/barcode08/barcode08_PR2_blastn2.csv
./results/qc/barcode09/barcode09_PR2_blastn2.csv
./results/qc/barcode10/barcode10_PR2_blastn2.csv
./results/qc/barcode11/barcode11_PR2_blastn2.csv
./results/qc/barcode12/barcode12_PR2_blastn2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['#sample_name'] = base
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['taxonomy'] = df4['taxonomic_annotation'].replace('"', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['#sample_name'] = base
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

In [39]:
if os.path.exists("_PR2_eDNA.csv"):
    os.remove("_PR2_eDNA.csv")

for base in base_name:
    file_path = './results/qc/' + base + '/' + base + '_' + db + '_ASV.csv'
    if os.path.exists(file_path):
        with open(file_path, "r") as input_file, open("_PR2_eDNA.csv", "a") as output_file:
            output_file.write(input_file.read())

with open("_PR2_eDNA.csv", "r") as input_file, open("PR2_eDNA.csv", "w") as output_file:
    output_file.write("counts,cluster,accession,accession,evalue,bitscore,alignment_length,percentage_identity,taxonomic_annotation\n")
    for line in input_file:
        if not line.startswith("#"):
            output_file.write(line.replace(";", ",").replace("|", ","))

#shutil.copy("PR2_eDNA.csv", os.path.join(current_dir, "PR2_eDNA.csv"))