# Jupyter notebook for running ImmuneML code

In [280]:
# import stuff
import os
from pathlib import Path

from immuneML.caching.CacheType import CacheType
from immuneML.dsl.ImmuneMLParser import ImmuneMLParser
from immuneML.environment.Constants import Constants
from immuneML.environment.EnvironmentSettings import EnvironmentSettings
from immuneML.dsl.definition_parsers.DefinitionParser import DefinitionParser
from immuneML.util.PathBuilder import PathBuilder



### Set specification and result path, and build path object. This is done in ImmuneMLApp.py

In [281]:
import random

num = random.randint(0, 10000)
specification_path = Path('quickstart_MLMethod.yaml')
result_path = Path('../../results/quickstart_result2' + str(num))

# Remove ../../ if stuff doesnt work :)
PathBuilder.build(result_path)



cache_path = result_path / "cache"
os.environ[Constants.CACHE_TYPE] = CacheType.PRODUCTION.value
EnvironmentSettings.set_cache_path(cache_path)


2022-12-01 15:02:02.610465: Setting temporary cache path to ../../results/quickstart_result25843/cache


### Open and read yaml specifcation file and turn into python object to make it understandable. Validates that all keys are strings with only letters, ints and underscore as valid characters. This is done in the file ImmuneMLParser.py

In [282]:
from yaml import MarkedYAMLError
import yaml

try:
    with specification_path.open("r") as file:
        workflow_specification = yaml.safe_load(file)
        ImmuneMLParser.check_keys(workflow_specification)
except yaml.YAMLError as exc:
    problem_description = "\n--------------------------------------------------------------------------------\n" \
                          "There was a YAML formatting error in the supplied specification file. Please validate specification " \
                          "(you can use https://jsonformatter.org/yaml-validator) and try again."
    raise MarkedYAMLError(context=str(exc), problem=problem_description, problem_mark=f"The error was {exc.problem_mark}.")

### Creating empty symboltable from the python object (dict) that is a result from reading in the YAML file. The SymbolTable will contain all the objects that has been mapped to the variables in the YAML specification path.

In [283]:
from immuneML.dsl.symbol_table.SymbolTable import SymbolTable

symbol_table = SymbolTable()

# The dict containing the YAML specification data. This was translated to understandable data using the safe_load function.
print(workflow_specification)

print("\n", symbol_table)

{'definitions': {'datasets': {'my_dataset': {'format': 'AIRR', 'params': {'is_repertoire': True, 'path': '/Users/jorgenskimmeland/Documents/aar5/Master/ImmuneML/quickstart_data/repertoires', 'metadata_file': '/Users/jorgenskimmeland/Documents/aar5/Master/ImmuneML/quickstart_data/metadata.csv'}}}, 'encodings': {'my_kmer_frequency': {'KmerFrequency': {'k': 3}}}, 'ml_methods': {'my_logistic_regression': 'LogisticRegression'}, 'reports': {'my_coefficients': 'Coefficients'}}, 'instructions': {'my_training_instruction': {'type': 'TrainMLModel', 'dataset': 'my_dataset', 'labels': ['signal_disease'], 'settings': [{'encoding': 'my_kmer_frequency', 'ml_method': 'my_logistic_regression'}], 'assessment': {'reports': {'models': ['my_coefficients']}, 'split_strategy': 'random', 'split_count': 1, 'training_percentage': 0.7}, 'selection': {'split_strategy': 'random', 'split_count': 1, 'training_percentage': 1}, 'optimization_metric': 'balanced_accuracy', 'metrics': ['auc', 'precision', 'recall'], 'num

### Read values from definition section in YAML file through definition parser

### Calling the parse function for the DefinitionParser that will call the different parsers for the objects that belongs to the Definition section. REMEMBER that the ml_methods are collected from the extendedPackages directory.

In [284]:
from immuneML.ml_methods.MLMethod import MLMethod
from immuneML.dsl.InstructionParser import InstructionParser

def_parser_output, specs_defs = DefinitionParser.parse(workflow_specification, symbol_table, result_path)

Filenames: [PosixPath('/Users/jorgenskimmeland/PycharmProjects/immuneML/extendedPackages/ml_methods/LogisticRegression.py')]


### Parsing the instruction of the YAML specification file.

In [285]:
symbol_table, specs_instructions = InstructionParser.parse(def_parser_output, result_path)

### Output parser defines format and builds ouput path

In [286]:
from immuneML.dsl.OutputParser import OutputParser

app_output = OutputParser.parse(workflow_specification, symbol_table)

### Specifies the output, dumps the yaml results

In [287]:
path = ImmuneMLParser._output_specs(file_path=specification_path, result_path=result_path, definitions=specs_defs, instructions=specs_instructions, output=app_output)


2022-12-01 15:02:18.349372: Full specification is available at ../../results/quickstart_result25843/full_quickstart_MLMethod.yaml.



In [288]:
symbol_table._items

{'my_kmer_frequency': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7ff084900610>,
 'my_logistic_regression': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7ff0a09fa820>,
 'my_coefficients': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7ff0a29518e0>,
 'my_dataset': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7ff0a2951c10>,
 'my_training_instruction': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7ff0849c7040>,
 'output': <immuneML.dsl.symbol_table.SymbolTableEntry.SymbolTableEntry at 0x7ff0850bb040>}

# At this point we are done with parsing the YAML specification file :)

#### The results from reading the yaml file is a SymbolTable that has mapped the YAML keywords to the appropriate python objects

### SymbolType is an Enum class that shows.. Gets all instructions from SymbolTable

In [290]:
from immuneML.dsl.symbol_table.SymbolType import SymbolType

instructions = symbol_table.get_by_type(SymbolType.INSTRUCTION)

### Get output from symbol table

In [None]:
output = symbol_table.get("output")

# At this point we are done with defining outout [revider denne senere]

### Clear cache to av avoid crash

In [289]:
import shutil

# clear cache
shutil.rmtree(cache_path, ignore_errors=True)
EnvironmentSettings.reset_cache_path()
del os.environ[Constants.CACHE_TYPE]