### Run main() ImmuneMLApp.py
Parse arguments from terminal

In [275]:
import os
from pathlib import Path
import random
from immuneML.caching.CacheType import CacheType
from immuneML.dsl.ImmuneMLParser import ImmuneMLParser
from immuneML.environment.Constants import Constants
from immuneML.environment.EnvironmentSettings import EnvironmentSettings
from immuneML.util.PathBuilder import PathBuilder

### Set path to YAML file with specifications. Create folder for results
ImmuneMLApp.py - init

In [276]:
# for testing, create unique result path everytime
num = random.randint(0, 10000)
result_path = Path("../../results/quickstart_results"+str(num))
specification_path = Path("/Users/oskar/Documents/Skole/Master/immuneml_forked/quickstart.yaml")

In [277]:
PathBuilder.build(result_path)

PosixPath('../../results/quickstart_results2419')

In [278]:
cache_path = result_path/"cache"
os.environ[Constants.CACHE_TYPE] = CacheType.PRODUCTION.value
EnvironmentSettings.set_cache_path(cache_path)

2022-12-02 09:05:17.234557: Setting temporary cache path to ../../results/quickstart_results2419/cache


### ImmuneMLApp.run()

### Parse YAML file
Open and read content of file, parse YAML to python to dict object
Validate that all keys are of type string with characters containing only letters, numbers and underscore.

ImmuneMLParser.py - parse_yaml_file

In [279]:
import yaml
from yaml import MarkedYAMLError

# parse_yaml_file
try:
    with specification_path.open("r") as file:
        # open and read content of file, parse yaml to python dict object
        workflow_specification = yaml.safe_load(file)
        # validates that all keys are of type string with characters contain only letters, numbers and underscore
        ImmuneMLParser.check_keys(workflow_specification)
except yaml.YAMLError as exc:
    problem_description = "\n--------------------------------------------------------------------------------\n" \
                          "There was a YAML formatting error in the supplied specification file. Please validate specification " \
                          "(you can use https://jsonformatter.org/yaml-validator) and try again."
    raise MarkedYAMLError(context=str(exc), problem=problem_description, problem_mark=f"The error was {exc.problem_mark}.")


### Create SymbolTable
Contains all specifications set in YAML file after running parsers

ImmuneMLParser.py - parse

In [280]:


from pathlib import Path

from immuneML.dsl.symbol_table.SymbolTable import SymbolTable


# this is ImmuneMLParser.parse
symbol_table = SymbolTable()

specification_path

PosixPath('/Users/oskar/Documents/Skole/Master/immuneml_forked/quickstart.yaml')

### Definiton parser
DefinitionParser.py - parse

In [281]:
# definition parser. parse(). Python dict = workflow_specification, yaml file
import datetime
import re
from pathlib import Path

import yaml
from yaml import MarkedYAMLError

from immuneML.dsl.InstructionParser import InstructionParser
from immuneML.dsl.OutputParser import OutputParser
from immuneML.dsl.definition_parsers.DefinitionParser import DefinitionParser
from immuneML.dsl.symbol_table.SymbolTable import SymbolTable
from immuneML.dsl.symbol_table.SymbolType import SymbolType
from immuneML.util.Logger import print_log
from immuneML.util.PathBuilder import PathBuilder
from immuneML.dsl.definition_parsers.DefinitionParser import DefinitionParser

def_parser_output, specs_defs = DefinitionParser.parse(workflow_specification, symbol_table, result_path)

### Definition parser return def_parser_output(symbol_table, workflow_specification) and specs_defs

In [282]:
def_parser_output


<immuneML.dsl.definition_parsers.DefinitionParserOutput.DefinitionParserOutput at 0x7f89e8cf1e50>

In [283]:
specs_defs

{'datasets': {'my_dataset': {'format': 'AIRR',
   'params': {'is_repertoire': True,
    'path': PosixPath('/Users/oskar/Documents/Skole/Master/immuneml_forked/quickstart_data/repertoires'),
    'paired': False,
    'import_productive': True,
    'import_with_stop_codon': False,
    'import_out_of_frame': False,
    'import_illegal_characters': False,
    'region_type': 'IMGT_CDR3',
    'separator': '\t',
    'column_mapping': {'junction': 'sequences',
     'junction_aa': 'sequence_aas',
     'v_call': 'v_alleles',
     'j_call': 'j_alleles',
     'locus': 'chains',
     'duplicate_count': 'counts',
     'sequence_id': 'sequence_identifiers'},
    'import_empty_nt_sequences': True,
    'import_empty_aa_sequences': False,
    'metadata_file': PosixPath('/Users/oskar/Documents/Skole/Master/immuneml_forked/quickstart_data/metadata.csv'),
    'result_path': PosixPath('../../results/quickstart_results2419/datasets/my_dataset')}}},
 'simulations': {},
 'preprocessing_sequences': {},
 'motifs'

### Instruction parser. Parses instruction from dict formed by yaml file

In [284]:
symbol_table, specs_instructions = InstructionParser.parse(def_parser_output, result_path)

symbol_table

SymbolTable()

### OutputParser. Sets output formats for output file

In [285]:
app_output = OutputParser.parse(workflow_specification, symbol_table)

### Returns filepath to file with all specifications

In [286]:
from immuneML.dsl.ImmuneMLParser import ImmuneMLParser

path = ImmuneMLParser._output_specs(file_path=specification_path, result_path=result_path, definitions=specs_defs, instructions=specs_instructions, output=app_output)

2022-12-02 09:05:27.119293: Full specification is available at ../../results/quickstart_results2419/full_quickstart.yaml.



# Finished parsing
Result is a filled SymbolTable object and a path to all specifications

In [287]:
symbol_table._items

{'my_kmer_frequency': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7f89ff29f4f0>,
 'my_logistic_regression': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7f89e8d980d0>,
 'my_coefficients': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7f89e8d1fd60>,
 'my_dataset': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7f89ff344be0>,
 'my_training_instruction': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7f89e8d1fee0>,
 'output': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7f89dbd51eb0>}

### Get instructions in SymbolTable

In [288]:
instructions = symbol_table.get_by_type(SymbolType.INSTRUCTION)

# TrainMLModelInstruction. List of instructions. In this case, everything that is needed to train the ML model.
instructions[0].item.state

TrainMLModelState(dataset=<immuneML.data_model.dataset.RepertoireDataset.RepertoireDataset object at 0x7f8a08dedb20>, hp_strategy=<immuneML.hyperparameter_optimization.strategy.GridSearch.GridSearch object at 0x7f89e8d1f790>, hp_settings=[<immuneML.hyperparameter_optimization.HPSetting.HPSetting object at 0x7f89e8d485e0>], assessment=<immuneML.hyperparameter_optimization.config.SplitConfig.SplitConfig object at 0x7f89e8cf5850>, selection=<immuneML.hyperparameter_optimization.config.SplitConfig.SplitConfig object at 0x7f89e8cf57f0>, metrics={<Metric.AUC: 'roc_auc_score'>, <Metric.PRECISION: 'precision_score'>, <Metric.RECALL: 'recall_score'>}, optimization_metric=<Metric.BALANCED_ACCURACY: 'balanced_accuracy_score'>, label_configuration=<immuneML.environment.LabelConfiguration.LabelConfiguration object at 0x7f89ff2793a0>, path=PosixPath('/Users/oskar/Documents/Skole/Master/immuneml_forked/analysis_runs/f6a0fc2ede6c2c6ded8b4a2e392e849c'), context={'dataset': <immuneML.data_model.dataset.

### Get output from SymbolTable
uses .get because it is only one element.

In [293]:
output = symbol_table.get("output")
output

{'format': 'HTML'}

# Run all instructions
Run each instruction with the SemanticModel()

In [294]:
from immuneML.dsl.semantic_model.SemanticModel import SemanticModel

model = SemanticModel([instruction.item for instruction in instructions], result_path, output)


1. run - run instruction and build report if output is set
2. run_instruction - run each instruction.
    * TrainMLModelInstruction - ...

In [290]:
import shutil

# clear cache
shutil.rmtree(cache_path, ignore_errors=True)
EnvironmentSettings.reset_cache_path()
del os.environ[Constants.CACHE_TYPE]