# Converting CSV to TSV
The relatively raw CSV files manually generated from the XLSX files are not ideal for future steps. This script reads those in and generates uniform TSV files.

#### Import Packages & Setup Environment

In [1]:
import pandas
import math

#### Define Functions

In [2]:
def B721Allele(directory):
    return(directory[:directory.find('_')])

In [3]:
def inBlacklist(row, blacklist):
    if row.loc['sequence'] in blacklist:
        return(True)
    return(False)

In [4]:
def removeBlacklist(df):
    file = open('../../data/tsv/Blacklist.txt', 'r')
    blacklist = set(file.read().split('\n'))
    return(df[df.apply(inBlacklist, axis=1, args=(blacklist, )) == False])

In [5]:
def numberPeptides(peptide):
    return('Peptide:{:06}'.format(peptide.name))

In [6]:
def updateSequence(sequence):
    '''
    Change modified amino acids to protein sequence
    Expects a protein fasta string
    Returns a list of corrected strings
    '''
    newSequence = ['']
    for c in sequence:
        if c == 'm':
            for i in range(0, len(newSequence)):
                newSequence[i] += 'M'
        elif c == 'c':
            for i in range(0, len(newSequence)):
                newSequence[i] += 'M'
        elif c == 'q':
            newSequence = newSequence * 2
            for i in range(0, len(newSequence)):
                if i % 2 == 0:
                    newSequence[i] += 'E'
                else:
                    newSequence[i] += 'Q'
        else:
            for i in range(0, len(newSequence)):
                newSequence[i] += c
    return(newSequence)

In [7]:
def mapSequenceUpdate(row):
    if isinstance(row.loc['sequenceMulti'], str):
        seqMulti = row.loc['sequenceMulti'].split('|')
        seqList = []
        for seq in seqMulti:
            seqList = seqList + updateSequence(seq)
    else:
        seq = row.loc['sequence']
        seqList = updateSequence(seq)
    seqList = list(set(seqList))
    seqList = ','.join(seqList)
    return(seqList)

In [8]:
def mhcGBMIFN(row):
    directory = row.loc['directory']
    IFN = directory.split('_')[-1]
    if IFN == 'IFN':
        return('+')
    if IFN != 'IFN':
        return('-')

In [9]:
def wholeGBMIFN(row):
    TMT126 = row.loc['TMT_126_126.127']
    TMT127 = row.loc['TMT_127_126.127']
    if abs(TMT126 - TMT127) <= 0.2:
        return('.')
    if TMT126 > TMT127:
        return('-')
    if TMT127 > TMT126:
        return('+')

## B721.221

#### Process B721 MHC-I on 15 alleles

In [10]:
B721_15_MHC = pandas.read_csv(
    '../../data/csv/B721.15.MHCI.csv',
    sep=',',
    header=0,
    index_col=False,
    low_memory=False)
B721_15_MHC = removeBlacklist(B721_15_MHC)
B721_15_MHC['allele'] = B721_15_MHC['directory'].apply(B721Allele)
B721_15_MHC.drop(
    [
        'number',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'numKorR',
        'variableSites',
        'StartAA',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'missedCleavages',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'totalIntensity',
        'Ensembl_GENCODE_GeneID_v19',
        'rank2Sequence',
        'directory',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'num_seqs_past_parent_filter'
    ],
    axis=1,
    inplace=True)

#### Process B721 MHC-I on 79 alleles

In [11]:
B721_79_MHC = pandas.read_csv(
    '../../data/csv/B721.79.MHCI.csv',
    sep=',',
    header=0,
    index_col=False)
B721_79_MHC = removeBlacklist(B721_79_MHC)
B721_79_MHC['allele'] = B721_79_MHC['directory'].apply(B721Allele)
B721_79_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'rank2Sequence',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'num_seqs_past_parent_filter'
    ],
    axis=1,
    inplace=True)

#### Merge B721 MHC-I and assign peptide numbers

In [12]:
B721_MHC = pandas.concat(
    [B721_15_MHC, B721_79_MHC],
    axis=0,
    ignore_index=True,
    sort=False)
B721_MHC['Peptide:UID'] = B721_MHC.apply(numberPeptides, axis=1)
B721_MHC['sequenceList'] = B721_MHC.apply(mapSequenceUpdate, axis=1)
B721_MHC.to_csv(
    '../../data/tsv/B721.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process B721 Whole Proteome

In [13]:
B721_Whole = pandas.read_csv(
    '../../data/ssv/B721.Whole.ssv',
    sep=';',
    header=0,
    index_col=False,
    low_memory=False
)
B721_Whole.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'rank2Sequence',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'num_seqs_past_parent_filter',
        'nterm',
        'StartAA',
        'previous_aa',
        'next_aa',
        'accession_number',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'recallMstag',
        'numCcuts',
        'numNCcuts',
        'numIcuts',
        'fragmentationCategory',
        'trapFillMsec',
        'faimsCV',
        'species',
        'numCoveredAAsDB',
        'numNcuts'
    ],
    axis=1,
    inplace=True)
B721_Whole['Peptide:UID'] = B721_Whole.apply(numberPeptides, axis=1)
B721_Whole['sequenceList'] = B721_Whole.apply(mapSequenceUpdate, axis=1)
B721_Whole.to_csv(
    '../../data/tsv/B721.Whole.tsv',
    sep='\t',
    header=True,
    index=False)

## CLL MHC-I

#### Process CLL 5283

In [14]:
CLL_MHC = pandas.read_csv(
    '../../data/ssv/CLL_5283.MHCI.ssv',
    sep=';',
    header=0,
    index_col=False)
CLL_MHC = removeBlacklist(CLL_MHC)
CLL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'rank2Sequence',
        'sequenceMap',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'num_seqs_past_parent_filter',
        'StartAA',
        'previous_aa',
        'next_aa',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'type_MutationEvent',
        'entry_name',
        'recallMstag',
        'numCoveredAAsDB',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'faimsCV',
        'protein_mw',
        'species',
        'accession_number',
        'numNcuts'
    ],
    axis=1,
    inplace=True)
CLL_MHC['Peptide:UID'] = CLL_MHC.apply(numberPeptides, axis=1)
CLL_MHC['sequenceList'] = CLL_MHC.apply(mapSequenceUpdate, axis=1)
CLL_MHC.to_csv(
    '../../data/tsv/CLL.5283.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process CLL 5328

In [15]:
CLL_MHC = pandas.read_csv(
    '../../data/ssv/CLL_5328.MHCI.ssv',
    sep=';',
    header=0,
    index_col=False)
CLL_MHC = removeBlacklist(CLL_MHC)
CLL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'rank2Sequence',
        'sequenceMap',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'num_seqs_past_parent_filter',
        'StartAA',
        'previous_aa',
        'next_aa',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'recallMstag',
        'numCoveredAAsDB',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'faimsCV',
        'protein_mw',
        'species',
        'accession_number',
        'numNcuts'
    ],
    axis=1,
    inplace=True)
CLL_MHC['Peptide:UID'] = CLL_MHC.apply(numberPeptides, axis=1)
CLL_MHC['sequenceList'] = CLL_MHC.apply(mapSequenceUpdate, axis=1)
CLL_MHC.to_csv(
    '../../data/tsv/CLL.5328.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process CLL 5341

In [16]:
CLL_MHC = pandas.read_csv(
    '../../data/ssv/CLL_5341.MHCI.ssv',
    sep=';',
    header=0,
    index_col=False)
CLL_MHC = removeBlacklist(CLL_MHC)
CLL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'rank2Sequence',
        'sequenceMap',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'num_seqs_past_parent_filter',
        'StartAA',
        'previous_aa',
        'next_aa',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'recallMstag',
        'numCoveredAAsDB',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'faimsCV',
        'protein_mw',
        'species',
        'accession_number',
        'numNcuts'
    ],
    axis=1,
    inplace=True)
CLL_MHC['Peptide:UID'] = CLL_MHC.apply(numberPeptides, axis=1)
CLL_MHC['sequenceList'] = CLL_MHC.apply(mapSequenceUpdate, axis=1)
CLL_MHC.to_csv(
    '../../data/tsv/CLL.5341.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

## GBM MHC-I & Whole

#### Process GBM H4512 Whole

In [17]:
GBM_Whole = pandas.read_csv(
    '../../data/ssv/GBM_H4512.Whole.ssv',
    sep=';',
    header=0,
    index_col=False)
# GBM_Whole['IFN'] = GBM_Whole.apply(wholeGBMIFN, axis=1)
GBM_Whole.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'nterm',
        'rank2Sequence',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'chromatographicPeakWidthSec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'num_seqs_past_parent_filter',
        'StartAA',
        'previous_aa',
        'accession_number',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'next_aa',
        'fragmentationCategory',
        'precursorIsolationPurityPercent',
        'trapFillMsec',
        'faimsCV',
        'peptide_pI',
        'protein_mw',
        'species',
        'recallMstag',
        'numCoveredAAsDB',
        'numNcuts',
        'numCcuts',
        'numNCcuts',
        'numIcuts'
    ],
    axis=1,
    inplace=True)
GBM_Whole['Peptide:UID'] = GBM_Whole.apply(numberPeptides, axis=1)
GBM_Whole['sequenceList'] = GBM_Whole.apply(mapSequenceUpdate, axis=1)
GBM_Whole.to_csv(
    '../../data/tsv/GBM.H4512.Whole.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process GBM H4512 MHC-I

In [18]:
GBM_MHC = pandas.read_csv(
    '../../data/ssv/GBM_H4512_BT145_20180622.peptideExport.CS.4.ssv',
    sep=';',
    header=0,
    index_col=False)
GBM_MHC = removeBlacklist(GBM_MHC)
GBM_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'rank2Sequence',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'num_seqs_past_parent_filter',
        'StartAA',
        'previous_aa',
        'next_aa',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'trapFillMsec',
        'protein_mw',
        'species',
        'accession_number',
        'recallMstag',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'faimsCV',
        'numCoveredAAsDB',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
GBM_MHC['Peptide:UID'] = GBM_MHC.apply(numberPeptides, axis=1)
GBM_MHC['sequenceList'] = GBM_MHC.apply(mapSequenceUpdate, axis=1)
GBM_MHC.to_csv(
    '../../data/tsv/GBM.H4512.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process GBM H4512 IFN MHC-I

In [19]:
GBM_MHC = pandas.read_csv(
    '../../data/ssv/GBM_H4512_BT145_20180622_IFN.peptideExport.CS.1.ssv',
    sep=';',
    header=0,
    index_col=False)
GBM_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
GBM_MHC['Peptide:UID'] = GBM_MHC.apply(numberPeptides, axis=1)
GBM_MHC['sequenceList'] = GBM_MHC.apply(mapSequenceUpdate, axis=1)
GBM_MHC.to_csv(
    '../../data/tsv/GBM.H4512.IFN.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process GBM H4198 MHC-I

In [20]:
GBM_MHC = pandas.read_csv(
    '../../data/ssv/GBM_H4198_BT187_20180622.peptideExport.CS.3.ssv',
    sep=';',
    header=0,
    index_col=False)
GBM_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
GBM_MHC['Peptide:UID'] = GBM_MHC.apply(numberPeptides, axis=1)
GBM_MHC['sequenceList'] = GBM_MHC.apply(mapSequenceUpdate, axis=1)
GBM_MHC.to_csv(
    '../../data/tsv/GBM.H4198.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process GBM H4198 IFN MHC-I

In [21]:
GBM_MHC = pandas.read_csv(
    '../../data/ssv/GBM_H4198_BT187_20180622_IFN.peptideExport.CS.1.ssv',
    sep=';',
    header=0,
    index_col=False)
GBM_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
GBM_MHC['Peptide:UID'] = GBM_MHC.apply(numberPeptides, axis=1)
GBM_MHC['sequenceList'] = GBM_MHC.apply(mapSequenceUpdate, axis=1)
GBM_MHC.to_csv(
    '../../data/tsv/GBM.H4198.IFN.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process GBM 7 MHC-I

In [22]:
GBM_MHC = pandas.read_csv(
    '../../data/ssv/GBM07_14362-007_20170912.peptideExport.CS.2.ssv',
    sep=';',
    header=0,
    index_col=False)
GBM_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'refinedType',
        'mergeType',
        'plotType',
        'condType',
        'type_MutationEvent'
    ],
    axis=1,
    inplace=True)
GBM_MHC['Peptide:UID'] = GBM_MHC.apply(numberPeptides, axis=1)
GBM_MHC['sequenceList'] = GBM_MHC.apply(mapSequenceUpdate, axis=1)
GBM_MHC.to_csv(
    '../../data/tsv/GBM.7.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process GBM 7 IFN MHC-I

In [23]:
GBM_MHC = pandas.read_csv(
    '../../data/ssv/GBM07_14362-007_20170912_withIFN.peptideExport.CS.1.ssv',
    sep=';',
    header=0,
    index_col=False)
GBM_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'refinedType',
        'mergeType',
        'plotType',
        'condType',
        'type_MutationEvent'
    ],
    axis=1,
    inplace=True)
GBM_MHC['Peptide:UID'] = GBM_MHC.apply(numberPeptides, axis=1)
GBM_MHC['sequenceList'] = GBM_MHC.apply(mapSequenceUpdate, axis=1)
GBM_MHC.to_csv(
    '../../data/tsv/GBM.7.IFN.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

## Melanoma MHC-I

#### Process Mel 2 MHC-I

In [24]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel2.MHCI.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC = MEL_MHC[MEL_MHC['directory'] != 'Mel02_13240-002_20180803_4IPs']
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'type_MutationEvent',
        'subgroupSpecific'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.2.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Mel 2 10IP MHC-I

In [25]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel02_13240-002_20190419_10IPs.peptideExport.CS.1.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'type_MutationEvent',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.2.10IP.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Mel 2 SEL MHC-I

In [26]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel02_13240-002_20190108_MEL_sel.peptideExport.CS.1.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'type_MutationEvent',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.2s.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Mel 2 SEL IFN MHC-I

In [27]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel02_13240-002_20190108_MEL_sel_IFN.peptideExport.CS.1.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'type_MutationEvent',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.2s.IFN.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Mel 6 MHC-I

In [28]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel06_13240-006_20180622.peptideExport.CS.2.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.6.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Mel 6 IFN MHC-I

In [29]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel06_13240-006_20180622_IFN.peptideExport.CS.1.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.6.IFN.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Mel 11 MHC-I

In [30]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel11_13240-011_2018_2016.peptideExport.CS.4.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'type_MutationEvent',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.11.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Mel 11 IFN MHC-I

In [31]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel11_13240-011_20180308_IFN.peptideExport.CS.1.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'type_MutationEvent',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.11.IFN.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Mel 15 MHC-I

In [32]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel15_13240-015_2018_2016.peptideExport.CS.2.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.15.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Mel 15 IFN MHC-I

In [33]:
MEL_MHC = pandas.read_csv(
    '../../data/ssv/Mel15_13240-015_2018_IFN.peptideExport.CS.1.ssv',
    sep=';',
    header=0,
    index_col=False)
MEL_MHC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap',
        'refinedType',
        'mergeType',
        'plotType',
        'condType'
    ],
    axis=1,
    inplace=True)
MEL_MHC['Peptide:UID'] = MEL_MHC.apply(numberPeptides, axis=1)
MEL_MHC['sequenceList'] = MEL_MHC.apply(mapSequenceUpdate, axis=1)
MEL_MHC.to_csv(
    '../../data/tsv/MEL.15.IFN.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

## Ovarian Cancer

#### OV CP-594

In [34]:
OV = pandas.read_csv(
    '../../data/ssv/OV.CP-594.MHCI.ssv',
    sep=';',
    header=0,
    index_col=False)
OV.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap'
    ],
    axis=1,
    inplace=True)
OV['Peptide:UID'] = OV.apply(numberPeptides, axis=1)
OV['sequenceList'] = OV.apply(mapSequenceUpdate, axis=1)
OV.to_csv(
    '../../data/tsv/OV.CP-594.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

#### OV SLS3-M1

In [35]:
OV = pandas.read_csv(
    '../../data/ssv/OV.SLS3-M1.MHCI.ssv',
    sep=';',
    header=0,
    index_col=False)
OV.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap'
    ],
    axis=1,
    inplace=True)
OV['Peptide:UID'] = OV.apply(numberPeptides, axis=1)
OV['sequenceList'] = OV.apply(mapSequenceUpdate, axis=1)
OV.to_csv(
    '../../data/tsv/OV.SLS3-M1.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

## Renal Cell Carcinoma

#### RCC 9

In [36]:
RCC = pandas.read_csv(
    '../../data/ssv/RCC.9.MHCI.ssv',
    sep=';',
    header=0,
    index_col=False)
RCC.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'deltaRank1Rank2Score',
        'accession_numbers',
        'geneSymbol',
        'orfCategory',
        'Ensembl_GENCODE_GeneID_v19',
        'geneSymbol_RibORF',
        'type_RibORF',
        'entry_name',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'protein_mw',
        'species',
        'accession_number',
        'StartAA',
        'previous_aa',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'trapFillMsec',
        'faimsCV',
        'parent_m_over_z',
        'missedCleavages',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'fragmentationCategory',
        'cys',
        'modifications',
        'next_aa',
        'recallMstag',
        'rank2Sequence',
        'numCoveredAAsDB',
        'num_seqs_past_parent_filter',
        'sequenceMap'
    ],
    axis=1,
    inplace=True)
RCC['Peptide:UID'] = RCC.apply(numberPeptides, axis=1)
RCC['sequenceList'] = RCC.apply(mapSequenceUpdate, axis=1)
RCC.to_csv(
    '../../data/tsv/RCC.9.MHCI.tsv',
    sep='\t',
    header=True,
    index=False)

## Database Comparison

#### Process RNA

In [37]:
DBC_RNA = pandas.read_csv(
    '../../data/csv/DBC.RNA.csv',
    sep=',',
    header=0,
    index_col=False)
DBC_RNA = removeBlacklist(DBC_RNA)
DBC_RNA['allele'] = DBC_RNA['directory'].apply(B721Allele)
DBC_RNA.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'StartAA',
        'rank2Sequence',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'previous_aa',
        'next_aa',
        'recallMstag',
        'fragmentationCategory',
        'numCoveredAAsDB',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'protein_mw',
        'species',
        'entry_name'
    ],
    axis=1,
    inplace=True)
DBC_RNA['Peptide:UID'] = DBC_RNA.apply(numberPeptides, axis=1)
DBC_RNA['sequenceList'] = DBC_RNA.apply(mapSequenceUpdate, axis=1)
DBC_RNA.to_csv(
    '../../data/tsv/DBC.RNA.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process RPF

In [38]:
DBC_RPF = pandas.read_csv(
    '../../data/csv/DBC.RPF.csv',
    sep=',',
    header=0,
    index_col=False)
DBC_RPF = removeBlacklist(DBC_RPF)
DBC_RPF['allele'] = DBC_RPF['directory'].apply(B721Allele)
DBC_RPF.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'StartAA',
        'rank2Sequence',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'previous_aa',
        'next_aa',
        'recallMstag',
        'fragmentationCategory',
        'numCoveredAAsDB',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'protein_mw',
        'species',
        'entry_name'
    ],
    axis=1,
    inplace=True)
DBC_RPF['Peptide:UID'] = DBC_RPF.apply(numberPeptides, axis=1)
DBC_RPF['sequenceList'] = DBC_RPF.apply(mapSequenceUpdate, axis=1)
DBC_RPF.to_csv(
    '../../data/tsv/DBC.RPF.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process B721

In [39]:
DBC_B721 = pandas.read_csv(
    '../../data/csv/DBC.B721.csv',
    sep=',',
    header=0,
    index_col=False)
DBC_B721 = removeBlacklist(DBC_B721)
DBC_B721['allele'] = DBC_B721['directory'].apply(B721Allele)
DBC_B721.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'StartAA',
        'rank2Sequence',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'previous_aa',
        'next_aa',
        'recallMstag',
        'fragmentationCategory',
        'numCoveredAAsDB',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'protein_mw',
        'species',
        'entry_name'
    ],
    axis=1,
    inplace=True)
DBC_B721['Peptide:UID'] = DBC_B721.apply(numberPeptides, axis=1)
DBC_B721['sequenceList'] = DBC_B721.apply(mapSequenceUpdate, axis=1)
DBC_B721.to_csv(
    '../../data/tsv/DBC.B721.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Pan Sample

In [40]:
DBC_PS = pandas.read_csv(
    '../../data/csv/DBC.PS.csv',
    sep=',',
    header=0,
    index_col=False)
DBC_PS = removeBlacklist(DBC_PS)
DBC_PS['allele'] = DBC_PS['directory'].apply(B721Allele)
DBC_PS.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'StartAA',
        'rank2Sequence',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'previous_aa',
        'next_aa',
        'recallMstag',
        'fragmentationCategory',
        'numCoveredAAsDB',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'protein_mw',
        'species',
        'entry_name'
    ],
    axis=1,
    inplace=True)
DBC_PS['Peptide:UID'] = DBC_PS.apply(numberPeptides, axis=1)
DBC_PS['sequenceList'] = DBC_PS.apply(mapSequenceUpdate, axis=1)
DBC_PS.to_csv(
    '../../data/tsv/DBC.PS.tsv',
    sep='\t',
    header=True,
    index=False)

#### Process Null

In [41]:
DBC_NULL = pandas.read_csv(
    '../../data/csv/DBC.NULL.csv',
    sep=',',
    header=0,
    index_col=False)
DBC_NULL = removeBlacklist(DBC_NULL)
DBC_NULL['allele'] = DBC_NULL['directory'].apply(B721Allele)
DBC_NULL.drop(
    [
        'number',
        'directory',
        'parent_charge',
        'subgroupSpecific',
        'numLCrunsObserved',
        'missedCleavages',
        'numKorR',
        'totalIntensity',
        'variableSites',
        'StartAA',
        'rank2Sequence',
        'cys',
        'modifications',
        'precursorAveragineChiSquared',
        'precursorIsolationPurityPercent',
        'chromatographicPeakWidthSec',
        'parent_m_over_z',
        'matched_parent_mass',
        'delta_parent_mass',
        'delta_parent_mass_ppm',
        'peptide_pI',
        'deltaRank1Rank2Score',
        'sequenceMap',
        'previous_aa',
        'next_aa',
        'recallMstag',
        'fragmentationCategory',
        'numCoveredAAsDB',
        'numNcuts',
        'numCcuts',
        'numIcuts',
        'numNCcuts',
        'protein_mw',
        'species',
        'entry_name'
    ],
    axis=1,
    inplace=True)
DBC_NULL['Peptide:UID'] = DBC_NULL.apply(numberPeptides, axis=1)
DBC_NULL['sequenceList'] = DBC_NULL.apply(mapSequenceUpdate, axis=1)
DBC_NULL.to_csv(
    '../../data/tsv/DBC.NULL.tsv',
    sep='\t',
    header=True,
    index=False)