# Working with biotrainer files
This example notebook explains, how to work with files used or created by biotrainer

## 1. Input fasta files
Using the functionality from FASTA.py of biotrainer.utilities makes working with biotrainer fasta files easy!

In [26]:
from biotrainer.utilities import read_FASTA, get_attributes_from_seqrecords, get_split_lists

biotrainer_fasta_seq2class = "../sequence_to_class/sequences.fasta"

In [27]:
# 1. Read fasta file: Returns sequence records from biopython
seq_records = read_FASTA(biotrainer_fasta_seq2class)
print(seq_records[0])

ID: Seq1
Name: Seq1
Description: Seq1 TARGET=Glob SET=train
Number of features: 0
Seq('SEQWENCE')


In [28]:
# 2. Get attributes: Returns attributes from seq_records as an dict (seq_id: attributes)
seq_attributes = get_attributes_from_seqrecords(seq_records)
print(list(seq_attributes.items())[0])

('Seq1', {'TARGET': 'Glob', 'SET': 'train'})


In [29]:
# 3. Get dataset splits from file (lists with sequence ids)
train, val, test = get_split_lists(seq_attributes)
print(train)

['Seq1']


## 2. Output files: out.yml
Every run of biotrainer creates an out.yml file. It can be read via a yaml library.

In [30]:
from ruamel import yaml

out_path_residue2class = "../residue_to_class/output/out.yml"

In [31]:
# Read file and read test set results:
with open(out_path_residue2class, "r") as out_file_residue2class:
    residue2class_results = yaml.safe_load(out_file_residue2class)
print(residue2class_results["test_iterations_results"]["metrics"])

{'- f1_score class 0': 0.0, '- f1_score class 1': 0.0, '- f1_score class 2': 0.0, '- f1_score class 3': 0.0, '- f1_score class 4': 0.0, '- precission class 0': 0.0, '- precission class 1': 0.0, '- precission class 2': 0.0, '- precission class 3': 0.0, '- precission class 4': 0.0, '- recall class 0': 0.0, '- recall class 1': 0.0, '- recall class 2': 0.0, '- recall class 3': 0.0, '- recall class 4': 0.0, 'accuracy': 0.0, 'loss': 1.623467206954956, 'macro-f1_score': 0.0, 'macro-precision': 0.0, 'macro-recall': 0.0, 'matthews-corr-coeff': -0.3000600337982178, 'micro-f1_score': 0.0, 'micro-precision': 0.0, 'micro-recall': 0.0, 'spearmans-corr-coeff': -0.14046210050582886}
