# Analysis for Pangebin June 2025


In [2]:
""" Handling errors """

import sys
import os
import logging

""" Exceptions handling """

class CustomException(Exception):
    def __init__(self, msg):
        # Call the base class constructor with the custom message
        super().__init__(msg)
        
def _check_file(in_file, log=False, msg='FILE'):
    try:
        if not os.path.isfile(in_file):
            raise CustomException('File is missing')
        elif os.path.getsize(in_file) == 0:
            process_warning(f'{msg}\t{in_file}: is empty')
    except Exception as e:
        process_exception(f'{msg}\t{in_file}: {e}')
    else:
        if log:
            logging.info(f'{msg}\t{in_file}')
            
def check_file(in_file):
    _check_file(in_file, log=False)

def log_file(in_file):
    _check_file(in_file, log=True)
    
def process_exception(msg):
    logging.exception(msg)
    print(f'EXCEPTION\t{msg}', file=sys.stderr)
    sys.exit(1)

def process_error(msg):
    logging.error(msg)
    print(f'ERROR\t{msg}', file=sys.stderr)
    sys.exit(1)

def process_warning(msg):
    logging.warning(msg)
    print(f'WARNING\t{msg}', file=sys.stderr)

""" Files and directories function """
def create_directory(in_dir_list):
    for in_dir in in_dir_list:
        if in_dir and not os.path.exists(in_dir):
            os.makedirs(in_dir)


In [None]:
# plaseval code
#!/usr/bin/python

from __future__ import division
import pandas as pd
import os

def eval_bins(pred_dict, pls_dict, len_dict, th_len, eval):
	#Following functions are used to compute precision and recall,
	#	for each predicted bin and true plasmid bin respectively
	def create_bin_entry():
		'''
		Input: None
		Returns: Dictionary entry for weighted and unweighted statistics
		'''
		stat_dict = {'wtd': {}, 'unwtd': {}}
		for eval_type in ['wtd', 'unwtd']: 
			stat_dict[eval_type] = {'Val': 0, 'Bin': 'NA', 'Common': 0, 'Total': 0}
		return stat_dict
	
	def get_total_ctgs(ctg_list, len_dict, th_len):
		'''
		Input: 
			List of contigs
			Dictionary of contig lengths
			Length threshold
		Returns:
			Number and total length of contigs in the list
		'''
		n_ctgs = 0
		len_ctgs = 0	
		for ctg in ctg_list:
			ctg_len = len_dict[ctg]
			if ctg_len >= th_len:
				n_ctgs += 1
				len_ctgs += ctg_len
		return n_ctgs, len_ctgs	
	
	def compute_best_bin(stat_dict, ctg_list, opp_bins_dict, len_dict, th_len):
		'''
		Input:
			Dictionary of weighted and unweighted statistics for the bin in question
			List of contigs forming the bin
			Dictionary of bins against which to compute statistics:
				For computing precision: dictionary of true plasmid bins 
				For computing recall: dictionary of predicted bins 
				Format: (Key: Bin id, Value: List of contigs)
			Dictionary of contig lengths
			Length threshold
		Returns:
			Dictionary of weighted and unweighted statistics for the bin in question
			updated with best matched bin details
		'''
		n_ctgs, len_ctgs = stat_dict['unwtd']['Total'], stat_dict['wtd']['Total']
		for bin_id in opp_bins_dict:
			common_ctgs = set(opp_bins_dict[bin_id]).intersection(set(ctg_list))
			ncommon_ctgs, lencommon_ctgs = 0, 0
			for ctg in common_ctgs:
				ctg_len = len_dict[ctg]
				if ctg_len >= th_len:
					ncommon_ctgs += 1
					lencommon_ctgs += ctg_len
			n_stat, len_stat = 0, 0
			if n_ctgs >= 1:
				n_stat = ncommon_ctgs / n_ctgs
				len_stat = lencommon_ctgs / len_ctgs
			if n_stat > stat_dict['unwtd']['Val']:
				stat_dict['unwtd'] = {'Val': n_stat, 'Bin': bin_id, 'Common': ncommon_ctgs, 'Total': n_ctgs}
			if len_stat > stat_dict['wtd']['Val']:
				stat_dict['wtd'] = {'Val': len_stat, 'Bin': bin_id, 'Common': lencommon_ctgs, 'Total': len_ctgs}
		return stat_dict
	
	recall = {}
	precision = {}

	for ref_pls in pls_dict:
		recall[ref_pls] = create_bin_entry()
		nref_ctgs, lenref_ctgs = get_total_ctgs(pls_dict[ref_pls], len_dict, th_len)
		recall[ref_pls]['unwtd']['Total'] = nref_ctgs
		recall[ref_pls]['wtd']['Total'] = lenref_ctgs
		recall[ref_pls] = compute_best_bin(recall[ref_pls], pls_dict[ref_pls], pred_dict, len_dict, th_len)

	for pred_pls in pred_dict:
		precision[pred_pls] = create_bin_entry()
		npred_ctgs, lenpred_ctgs = get_total_ctgs(pred_dict[pred_pls], len_dict, th_len)
		precision[pred_pls]['unwtd']['Total'] = npred_ctgs
		precision[pred_pls]['wtd']['Total'] = lenpred_ctgs
		precision[pred_pls] = compute_best_bin(precision[pred_pls], pred_dict[pred_pls], pls_dict, len_dict, th_len)

	#Following functions are used to compute overall statistics and to write to the output file
	def compute_overall_details(stat_dict, best_match, ovr_dict):
		'''
		Input:
			Dictionary of weighted and unweighted statistics for the bin in question
			Dictionary of best match details
		'''
		best_match['n_stat'] = float("{:.4f}".format(stat_dict['unwtd']['Val']))
		best_match['len_stat'] = float("{:.4f}".format(stat_dict['wtd']['Val']))
		best_match['n_bin'] = stat_dict['unwtd']['Bin']
		best_match['len_bin'] = stat_dict['wtd']['Bin']
		ovr_dict['ovr_n_common'] += stat_dict['unwtd']['Common']
		ovr_dict['ovr_n_total'] += stat_dict['unwtd']['Total']
		ovr_dict['ovr_len_common'] += stat_dict['wtd']['Common']
		ovr_dict['ovr_len_total'] += stat_dict['wtd']['Total']
		return best_match, ovr_dict
	
	def write_best_match_details(eval_file, bin_id, best_match):
		'''
		Input: 
			Output file
			Bin id
			Details of bin matched to bin in question
		Output: None
		'''
		eval_file.write(bin_id + '\t' \
			 	+ str(best_match['n_stat']) + '\t' + best_match['n_bin'] + '\t' \
				+ str(best_match['len_stat']) + '\t' + best_match['len_bin'] +"\n")
		
	def compute_overall_stat(ovr_details):
		'''
		Input: Dictionary with overall details for the statistic (precision or recall) in question
		Output: Weighted and unweighted overall statistic (precision or recall)
		'''
		ovr_n_stat, ovr_len_stat = 0, 0
		if ovr_details['ovr_n_total'] != 0:
			ovr_n_stat = ovr_details['ovr_n_common']/ovr_details['ovr_n_total']
		if ovr_details['ovr_len_total'] != 0:
			ovr_len_stat = ovr_details['ovr_len_common']/ovr_details['ovr_len_total']	
		return ovr_n_stat, ovr_len_stat
	
	eval.write("#Precision: Proportion of correctedly identified contigs for each prediction\n")
	eval.write(">Precision details\n")
	eval.write("#Predicted_bin\tUnwtd_Precision\tUnwtd_Reference_plasmid\tWtd_Precision\tWtd_Reference_plasmid\n")
	ovr_details = {'ovr_n_common': 0, 'ovr_len_common': 0, 'ovr_n_total': 0, 'ovr_len_total': 0}
	for bin_id in precision:
		best_match_details = {'n_stat': None, 'n_bin': None, 'len_stat': None, 'len_bin': None}
		best_match_details, ovr_details = \
			compute_overall_details(precision[bin_id], best_match_details, ovr_details)
		write_best_match_details(eval, bin_id, best_match_details)
	ovr_n_prec, ovr_len_prec = compute_overall_stat(ovr_details)

	eval.write("\n")

	eval.write("#Recall: Proportion of correctedly identified contigs for each reference\n")
	eval.write(">Recall details\n")
	eval.write("#Reference_plasmid\tUnwtd_Recall\tUnwtd_Predicted_bin\tWtd_Recall\tWtd_Predicted_bin\n")
	ovr_details = {'ovr_n_common': 0, 'ovr_len_common': 0, 'ovr_n_total': 0, 'ovr_len_total': 0}
	ovr_n_rec, ovr_len_rec = 0, 0
	for bin_id in recall:
		best_match_details = {'n_stat': None, 'n_bin': None, 'len_stat': None, 'len_bin': None}
		best_match_details, ovr_details = \
			compute_overall_details(recall[bin_id], best_match_details, ovr_details)
		write_best_match_details(eval, bin_id, best_match_details)
	ovr_n_rec, ovr_len_rec = compute_overall_stat(ovr_details)	

	eval.write("\n")

	n_f1, len_f1 = 0, 0
	if (ovr_n_prec + ovr_n_rec) != 0:
		n_f1 = 2*ovr_n_prec*ovr_n_rec / (ovr_n_prec + ovr_n_rec)
	if (ovr_len_prec + ovr_len_rec) != 0:
		len_f1 = 2*ovr_len_prec*ovr_len_rec / (ovr_len_prec + ovr_len_rec)		
	
	eval.write("#Final statistics (Unwtd and Wtd)\n")
	eval.write(">Overall details\n")
	eval.write("#Overall_statistic\tUnwtd_statistic\tWtd_statistic\n")
	eval.write('Precision\t' + str(ovr_n_prec) + '\t' + str(ovr_len_prec) + '\n')
	eval.write('Recall\t' + str(ovr_n_rec) + '\t' + str(ovr_len_rec) + '\n')
	eval.write('F1\t' + str(n_f1)  + '\t' + str(len_f1) + '\n')


def get_bin_details_noHD(len_dict, bins_file):
    bins_file = pd.read_csv(bins_file, sep="\t", header=0, dtype={'plasmid': str, 'contig': str, 'contig_len': int})
    return get_bin_details_tsv(len_dict, bins_file)

def get_bin_details(len_dict, bins_file):
    bins_file = pd.read_csv(bins_file, sep="\t")
    return get_bin_details_tsv(len_dict, bins_file)



	
 
def get_bin_details_tsv(len_dict, bins_file):
	'''
	Input: 
		path to input file
		len_dict: Key: contig (str), Value: length (int), 
	Returns:
		pls_dict: Key: plasmid id (str), Value: list of contig ids
		updated len_dict
	'''
	pls_ctg_df = bins_file
	pls_dict = {}
	for _, row in pls_ctg_df.iterrows():
		plasmid, contig, length = row['plasmid'], str(row['contig']), row['contig_len']
		try:
			if "chromosome" in plasmid:
				continue
		except:
			pass
		len_dict[contig] = length
		if plasmid not in pls_dict and "chromosome" not in plasmid:
			pls_dict[plasmid] = []
		if contig not in set(pls_dict[plasmid]):
			pls_dict[plasmid].append(contig)
	return pls_dict, len_dict

def eval_mode(pred_file, gt_file, min_len, output_file):
	'''
	Reads prediction and ground truth files
	Initializes dictionaries and stores prediction and ground truth bins
	Initializes and populates a dictionary of contig lengths 
	Calls the eval_bins function to compute the precision and recall statistics
	'''
	for in_file in [pred_file, gt_file]:
		check_file(in_file)
	output_dir = os.path.dirname(output_file)
	create_directory([output_dir])
	eval_file = open(output_file, "w")
	#Reading data and saving it to a dictionary with plasmids as keys and a nested dictionary of contigs as values
	len_dict = {}
	pred_dict, len_dict = get_bin_details(len_dict, pred_file)
	gt_dict, len_dict = get_bin_details(len_dict, gt_file)
	eval_bins(pred_dict, gt_dict, len_dict, min_len, eval_file)

			


In [4]:
import sys
import gfapy
import os
import pandas as pd
!pwd

/Users/msgro/Progetti/pangebin-algo


In [5]:
data = "/Users/msgro/Progetti/dataset"


In [6]:
# MAPPINGS, from GFA to mapping files

import pandas as pd
import os
import gfapy as gp
from typing import Dict, List, Tuple

def get_mappings_from_gfa(gfa_file: str):
    """
    Extract mappings from a GFA file.
    
    Args:
        gfa_file (str): Path to the GFA file.
        contig_lengths (Dict[str, int]): Dictionary of contig lengths.
        
    Returns:
        Tuple[Dict[str, List[str]], Dict[str, List[str]]]
        A tuple containing two dictionaries:
        - contig_to_bins: Key: Contig ID, Value: List of bin IDs
        - bin_to_contigs: Key: Bin ID, Value: List of Contig IDs
    """
    contig_to_fragments = {}
    fragments_to_contigs = {}
    all_contigs = set()
    all_fragments = set()
    
    gfa = gp.Gfa.from_file(gfa_file)
    if not gfa:
        raise ValueError(f"Failed to read GFA file: {gfa_file}")
    
    for segment in gfa.segments:
        fragment_id = segment.name
        all_fragments.add(fragment_id)
        fragments_to_contigs[fragment_id] = set()
        fragments_list = set(str(segment.cl).split(','))
        for contig in fragments_list:
            all_contigs.add(contig)
            contig_to_fragments.setdefault(contig, set()).add(fragment_id)
        fragments_to_contigs[fragment_id].update(fragments_list)

    return contig_to_fragments, fragments_to_contigs, all_contigs, all_fragments
        


In [7]:
# def eval_pan_asm(gfa_file, pan_pred, asm_pred, pan_asm_gt):
    

In [15]:
def cumlen(frags: set, lenmap: dict) -> int:
    return sum(lenmap.get(frag, 0) for frag in frags)

In [27]:
sample_perf = dict()
perf = {'TP': 0, 'FP': 0, 'FN': 0, 'totTP': 0, 'totFP':0, 'totFN':0}




for sample in os.listdir(data):
    sample_perf[sample] = perf.copy()
    
    if sample.startswith('.'):
        continue
    if not os.path.isdir(os.path.join(data, sample)):
        continue
    print("----------Processing file:", sample)
    
    ske_bins = os.path.join(data, sample, f"{sample}.ske.1.pbf.pred.tab")
    uni_bins = os.path.join(data, sample, f"{sample}.uni.1.pbf.pred.tab")
    pan_mix_gt = os.path.join(data, sample, f"{sample}.1.mix.pan.gt.tsv")
    pan_bins = os.path.join(data, sample, f"{sample}.pan.1.pbf.pred.tab")


    panasm = os.path.join(data, sample, f"{sample}.panasm.gfa")
    cf, fc, allc, allf = get_mappings_from_gfa(panasm)
    
    ske_bins_df = pd.read_csv(ske_bins, sep="\t", header=0, dtype={'plasmid': str, 'contig': str, 'contig_len': int})
    uni_bins_df = pd.read_csv(uni_bins, sep="\t", header=0, dtype={'plasmid': str, 'contig': str, 'contig_len': int})
    pan_bins_df = pd.read_csv(pan_bins, sep="\t", header=0, dtype={'plasmid': str, 'contig': str, 'contig_len': int})
    
    print(ske_bins_df.head())
    print(uni_bins_df.head())
    print(pan_bins_df.head())
    if ske_bins_df.empty or pan_bins_df.empty:
        print(f"Skipping {sample} due to empty bins files.")
        continue

    ske_bins_df_mapped = ske_bins_df.copy()
    ske_bins_df_mapped['fragment'] = ske_bins_df_mapped['contig'].apply(lambda x: cf.get(x, x))
    print(ske_bins_df_mapped.head())
    
    uni_bins_df_mapped = uni_bins_df.copy()
    uni_bins_df_mapped['fragment'] = uni_bins_df_mapped['contig'].apply(lambda x: cf.get(x, x))
    print(uni_bins_df_mapped.head())

    pan_bins_df_mapped = pan_bins_df.copy()
    pan_bins_df_mapped['fragment'] = pan_bins_df_mapped['contig'].apply(lambda x: fc.get(x, x))
    print(pan_bins_df_mapped.head())

    gt_df = pd.read_csv(pan_mix_gt, sep="\t", header=None)
    gt_df.columns = ["plasmid", "contig", "score", "total_len", "contig_len"]
    gt_df = gt_df.astype({'plasmid': str, 'contig': str, 'score': str, 'total_len': str, 'contig_len': str})

    gt_df_mapped = gt_df.copy()
    print(fc)
    gt_df_mapped['fragment'] = gt_df_mapped['contig'].apply(lambda x: fc.get(x, x))
    print(gt_df_mapped.head())

    gt_chr_frag = sorted(set(gt_df_mapped[gt_df_mapped['plasmid'].str.contains("chromosome")]['contig']), key=lambda x: int(x))
    gt_pl_frag = sorted(set(gt_df_mapped[~gt_df_mapped['plasmid'].str.contains("chromosome")]['contig']), key=lambda x: int(x))
    print("gt chromosomes\t", gt_chr_frag)
    print("gt plasmids\t", gt_pl_frag)
    
    len_dict = {}
    for _, row in gt_df_mapped.iterrows():
        contig = str(row['contig'])
        length = int(row['contig_len'])
        len_dict[contig] = length

    print("Contig lengths:", len_dict)
    
    
    
    # Find true positive plasmids in pan_bins_df against gt_df
    # True positive: predicted plasmid contains at least one contig that matches a plasmid contig in gt_df (not chromosome)

    pan_true_positives = set()
    pan_false_positives = set()
    pan_false_negatives = set()
    
    for plasmid, group in pan_bins_df.groupby('plasmid'):
        pred_contigs = set(group['contig'].astype(str))
        print(f"Processing plasmid: {plasmid}, Predicted contigs: {pred_contigs}")
        for x in pred_contigs:
            if x in gt_pl_frag:
                pan_true_positives.add(x)
            else:
                pan_false_positives.add(x)
    for x in gt_pl_frag:
        if x not in pan_true_positives and x not in pan_false_positives:
            pan_false_negatives.add(x)
        

    pan_true_positives = sorted(pan_true_positives, key=lambda x: int(x))
    pan_false_positives = sorted(pan_false_positives, key=lambda x: int(x))
    pan_false_negatives = sorted(pan_false_negatives, key=lambda x: int(x))
    print("Pan true positives:", pan_true_positives)
    print("Pan false positives:", pan_false_positives)
    print("Pan false negatives:", pan_false_negatives)


    ske_true_positives = set()
    ske_false_positives = set()
    ske_false_negatives = set()
    
    for plasmid, group in ske_bins_df_mapped.groupby('plasmid'):
        # Convert 'fragment' column to a set of contigs
        # Assuming 'fragment' column contains a set of contigs
        # Convert the 'fragment' column (which contains set-like strings) to a set of contig IDs
        # Each value in group['fragment'] is already a set, so we need to union all sets in the group
        pred_contigs = set()
        for frags in group['fragment']:
            pred_contigs.update(frags if isinstance(frags, set) else set(eval(frags)))
        print(f"Processing plasmid: {plasmid}, Predicted contigs: {pred_contigs}")

        for x in pred_contigs:
            if x in gt_pl_frag:
                ske_true_positives.add(x)
            else:
                ske_false_positives.add(x)
    for x in gt_pl_frag:
        if x not in ske_true_positives and x not in ske_false_positives:
            ske_false_negatives.add(x)
            
    uni_true_positives = set()
    uni_false_positives = set()
    uni_false_negatives = set()
    for plasmid, group in uni_bins_df_mapped.groupby('plasmid'):
        # Convert 'fragment' column to a set of contigs
        pred_contigs = set()
        for frags in group['fragment']:
            try:
                pred_contigs.update(frags if isinstance(frags, set) else set(eval(frags)))
            except Exception as e:
                print(f"Error processing fragments for plasmid {plasmid}: {e}")
        print(f"Processing plasmid: {plasmid}, Predicted contigs: {pred_contigs}")

        for x in pred_contigs:
            if x in gt_pl_frag:
                uni_true_positives.add(x)
            else:
                uni_false_positives.add(x)
    for x in gt_pl_frag:
        if x not in uni_true_positives and x not in uni_false_positives:
            uni_false_negatives.add(x)
    
    

    ske_true_positives = sorted(ske_true_positives, key=lambda x: int(x))
    ske_false_positives = sorted(ske_false_positives, key=lambda x: int(x))
    ske_false_negatives = sorted(ske_false_negatives, key=lambda x: int(x))
    print("Ske true positives:", ske_true_positives)
    print("Ske false positives:", ske_false_positives)
    print("Ske false negatives:", ske_false_negatives)
    
    uni_true_positives = sorted(uni_true_positives, key=lambda x: int(x))
    uni_false_positives = sorted(uni_false_positives, key=lambda x: int(x))
    uni_false_negatives = sorted(uni_false_negatives, key=lambda x: int(x))
    print("unicycler true positives:", uni_true_positives)
    print("unicycler false positives:", uni_false_positives)
    print("unicycler false negatives:", uni_false_negatives)
    sample_perf[sample]['totTP'] = cumlen(pan_true_positives, len_dict)
    sample_perf[sample]['totFP'] = cumlen(pan_false_positives, len_dict)
    sample_perf[sample]['totFN'] = cumlen(pan_false_negatives, len_dict)

    
    pan_asm_true_positives = set(pan_true_positives).difference(set(uni_true_positives)).difference(set(ske_true_positives))
    pan_asm_false_positives = set(pan_false_positives).difference(set(uni_false_positives)).difference(set(ske_false_positives))
    pan_asm_false_negatives = set(pan_false_negatives).difference(set(uni_false_negatives)).difference(set(ske_false_negatives))
    print("Pan-ASM true positives:", cumlen(pan_asm_true_positives, len_dict), "contigs", pan_asm_true_positives)
    print("Pan-ASM false positives:", cumlen(pan_asm_false_positives, len_dict), "contigs", pan_asm_false_positives)
    print("Pan-ASM false negatives:", cumlen(pan_asm_false_negatives, len_dict), "contigs", pan_asm_false_negatives)
    
    sample_perf[sample]['TP'] = cumlen(pan_asm_true_positives, len_dict)
    sample_perf[sample]['FP'] = cumlen(pan_asm_false_positives, len_dict)
    sample_perf[sample]['FN'] = cumlen(pan_asm_false_negatives, len_dict)

    # pan_ske_true_positives_reminder = set(pan_true_positives).difference(set(ske_true_positives))
    # pan_ske_true_positives_reminder = pan_ske_true_positives_reminder.union(set(ske_true_positives).difference(set(pan_true_positives)))
    # pan_ske_false_positives_reminder = set(pan_false_positives).difference(set(ske_false_positives))
    # pan_ske_false_positives_reminder = pan_ske_false_positives_reminder.union(set(ske_false_positives).difference(set(pan_false_positives)))
    # pan_ske_false_negatives_reminder = set(pan_false_negatives).difference(set(ske_false_negatives))
    # pan_ske_false_negatives_reminder = pan_ske_false_negatives_reminder.union(set(ske_false_negatives).difference(set(pan_false_negatives)))
    
    # print("Pan-Ske true positives reminder:", pan_ske_true_positives_reminder)
    # print("Pan-Ske false positives reminder:", pan_ske_false_positives_reminder)
    # print("Pan-Ske false negatives reminder:", pan_ske_false_negatives_reminder)

    # Write results to files
    # break
   


----------Processing file: SAMN16357459
Empty DataFrame
Columns: [plasmid, contig, contig_len]
Index: []
Empty DataFrame
Columns: [plasmid, contig, contig_len]
Index: []
Empty DataFrame
Columns: [plasmid, contig, contig_len]
Index: []
Skipping SAMN16357459 due to empty bins files.
----------Processing file: SAMN16357468
  plasmid  contig  contig_len
0      P1   ske26         759
1      P1   ske47         318
2      P1   ske67        7408
3      P1  ske155         347
4      P1    ske5        2599
  plasmid  contig  contig_len
0      P1   uni92         221
1      P1   uni73         557
2      P1   uni88         247
3      P1   uni70        1043
4      P1  uni120          88
  plasmid contig  contig_len
0      P1     16        7582
1      P1    376        7408
2      P1    238           2
3      P1    282         247
4      P1    239           1
  plasmid  contig  contig_len fragment
0      P1   ske26         759     {10}
1      P1   ske47         318     {19}
2      P1   ske67        74

In [28]:

print(sample_perf)
sample_perf_df = pd.DataFrame(sample_perf).T
sample_perf_df.to_csv(os.path.join(data, "pan_asm_performance.tsv"), sep="\t", header=True, index=True)


{'SAMN16357459': {'TP': 0, 'FP': 0, 'FN': 0, 'totTP': 0, 'totFP': 0, 'totFN': 0}, 'SAMN16357468': {'TP': 295, 'FP': 389, 'FN': 0, 'totTP': 178974, 'totFP': 13743, 'totFN': 5421}, 'SAMN16357457': {'TP': 1093, 'FP': 3428, 'FN': 9075, 'totTP': 237582, 'totFP': 201831, 'totFN': 15898}, 'SAMN11056346': {'TP': 0, 'FP': 0, 'FN': 0, 'totTP': 171817, 'totFP': 0, 'totFN': 3697}, 'SAMN16357450': {'TP': 0, 'FP': 0, 'FN': 0, 'totTP': 230101, 'totFP': 2382, 'totFN': 9578}, 'SAMN10163228': {'TP': 0, 'FP': 0, 'FN': 0, 'totTP': 0, 'totFP': 0, 'totFN': 0}, 'SAMN16357451': {'TP': 0, 'FP': 0, 'FN': 12119, 'totTP': 215443, 'totFP': 7012, 'totFN': 19248}, 'SAMN16357456': {'TP': 96, 'FP': 522, 'FN': 0, 'totTP': 225788, 'totFP': 13024, 'totFN': 15339}, 'SAMN16357460': {'TP': 0, 'FP': 0, 'FN': 0, 'totTP': 0, 'totFP': 0, 'totFN': 0}, 'SAMN16357458': {'TP': 0, 'FP': 0, 'FN': 0, 'totTP': 199716, 'totFP': 5115, 'totFN': 4738}, 'SAMN16357467': {'TP': 0, 'FP': 0, 'FN': 0, 'totTP': 146588, 'totFP': 8051, 'totFN': 594

In [20]:
!pwd


/Users/msgro/Progetti/pangebin-algo
