# MaxQuant Handler (mqhandler package)

## Install package

In [None]:
!pip install mqhandler

## Run mqhandler

### 1. Load Data

#### 1.1 Imports

In [1]:
import pandas as pd
from mqhandler.mq_utils.runner_utils import find_delimiter

#### 1.2 Specify File Path

In [None]:
file = "/Users/lisiarend/Desktop/Hiwi/"

#### 1.3 Load Your Data

In [None]:
data = pd.read_table(file, sep=find_delimiter(file)).fillna("")
data.head(5)

### 2. Filter Protein IDs

#### 2.1 Imports

In [2]:
from mqhandler import filter_protein_ids as fi

#### 2.2 Set Preferences

In [3]:
organism = "human" # Specify organism the IDs should match to
protein_column = "Protein IDs" # Name of column with protein IDs
decoy = False # Bool to indicate if protein IDs from decoy fasta (REV__,CON__) should be kept
reviewed = False # Bool to indicate if newly retrieved protein IDS should be reduced to reviewed ones
keep_empty = False # Bool to indicate if empty ID cells should be kept or deleted
return_log = True # Bool to indicate if a dictionary of logging dataframes should be returned

#### 2.2 Run filter_protein_ids

In [None]:
fi_data, fi_log_dict = fi.filter_protein_ids(data = data, protein_column = protein_column, 
                                             organism = organism, decoy = decoy, keep_empty = keep_empty, 
                                             reviewed = reviewed, return_log = return_log)
fi_data.head(5)

#### 2.3 Inspect Logging

In [None]:
# TODO

### 3. Remap Gene Names

#### 3.1 Imports

In [None]:
from mqhandler import remap_genenames as rmg

#### 3.2 Set Preferences

In [5]:
mode = "uniprot_primary" # Mode of refilling. See below for more infos.
protein_column = "Protein IDs" # Name of column with protein IDs
gene_column = "Gene Names" # Name of column with gene names
skip_filled = False # Bool to indicate if already filled gene names should be skipped
organism = "human" # Specify organism the IDs should match to
fasta = None # Path of Fasta file when mode all or fasta
keep_empty = False # Bool to indicate if empty gene names cells should be kept or deleted
res_column = None # Name of column for remap genenames results. If None, the gene_column will be overridden
return_log = True # Bool to indicate if a dictionary of logging dataframes should be returned

**Modes of refilling:**
- all : Use primarly fasta infos and additionally uniprot infos.
- fasta: Use information extracted from fasta headers.
- uniprot: Use mapping information from uniprot and use all gene names.
- uniprot_primary: Use mapping information from uniprot and only all primary gene names.
- uniprot_one: Use mapping information from uniprot and only use most frequent single gene name.

#### 3.3 Run remap_genenames

In [None]:
rmg_data, rmg_log_dict = rgn.remap_genenames(data = fi_data, mode=mode, protein_column = protein_column,
                                            gene_column = gene_column, skip_filled = skip_filled, organism = organism, 
                                             fasta = fasta, keep_empty = keep_empty, res_column = res_column, 
                                             return_log = return_log)

#### 3.4 Inspect Logging

In [None]:
# TODO

### 4. Reduce Gene Names

#### 4.1 Imports

In [None]:
from mqhandler import reduce_genenames as rdg

#### 4.2 Set Preferences

In [None]:
mode = "ensembl" # Mode of reduction. See below for more infos-
gene_column = "Gene Names" # Name of column with gene names
organism = "human" # Specify organism the IDs should match to
res_column = False # Name of column of reduced gene names results. If None, the gene_column will be overridden
keep_empty = False # Bool to indicate if empty reduced gene names cells should be kept or deleted
HGNC_mode = None # Mode on how to selected the gene names in HGNC (mostfrequent, all)
return_log = True # Bool to indicate if a dictionary of logging dataframes should be kept

**Modes of reduction:**
- ensembl : Use gProfiler to reduce gene names to those having a Ensembl ID
- HGNC: Use HGNC database to reduce gene names to those having an entry in HGNC (only for human)
- mygeneinfo: Use mygeneinfo database to reduce gene names to those having an entry in mygeneinfo
- enrichment: Use gProfiler to reduce gene names to those having a functional annotation

#### 4.3 Run reduce_genenames

In [None]:
rdg_data, rdg_log_dict = rdg.reduce_genenames(data = rmg_data, mode = mode, gene_column = gene_column, 
                                              organism = organism, res_column = res_column, keep_empty = keep_empty,
                                             HGNC_mode = HGNC_mode, return_log = return_log)

#### 4.4 Inspect Logging

In [6]:
#TODO

### 5. Map Orthologs

#### 5.1 Imports

In [None]:
from mqhandler import map_orthologs as mo

#### 5.2 Set Preferences

In [None]:
gene_column = "Gene Names" # Name of column with gene names
organism = "human" # Specify organism the IDs match to
tar_organism = "" # Specify organism the IDs should me mapped to
keep_empty = False # Bool to indicate if empty ortholog gene names cells should be kept or deleted
res_column = None # Name of column of orthologs gene names results. If None, the gene_column will be overridden
return_log = True # Bool to indicate if a dictionary of logging dataframes should be kept

#### 5.3 Run reduce_genenames

In [None]:
mo_data, mo_logging_dict = mo.map_orthologs(daa = data, gene_column = gene_column, organism = organism,
                                           tar_organism = tar_organism, keep_empty = keep_empty, 
                                            res_column = res_column, return_log = return_log)

#### 5.4 Inspect Logging

In [None]:
#TODO