In [1]:
from collections import defaultdict
from glob import glob
for length in [15, 21, 27]:
    human_class1_peptides = defaultdict(set)
    human_class2_peptides = defaultdict(set)
    h2b_class1_peptides = defaultdict(set)
    h2d_class1_peptides = defaultdict(set)
    h2b_class2_peptides = defaultdict(set)
    h2d_class2_peptides = defaultdict(set)
    
    for fname in glob("selected*%dmer.csv" % length):
        df = pd.read_csv(fname)
        df = df.replace(np.nan, "", regex=True)
        for _, row in df.iterrows():
            vaccine_peptide = row.Sequence
            for p in row["HLA-I_peptides"].split(','):
                human_class1_peptides[p].add(vaccine_peptide)
            for p in row["HLA-II_peptides"].split(','):
                human_class2_peptides[p].add(vaccine_peptide)
            for p in row["Mouse MHC-I b"].split(','):
                h2b_class1_peptides[p].add(vaccine_peptide)
            for p in row["Mouse MHC-II b"].split(','):
                h2b_class2_peptides[p].add(vaccine_peptide)
            for p in row["Mouse MHC-I d"].split(','):
                h2d_class1_peptides[p].add(vaccine_peptide)
            for p in row["Mouse MHC-II d"].split(','):
                h2d_class2_peptides[p].add(vaccine_peptide)
    all_peptides = set.union(
        *map(lambda x: set(x.keys()), [human_class1_peptides,
        human_class2_peptides,
        h2b_class1_peptides,
        h2b_class2_peptides,
        h2d_class1_peptides,
        h2d_class2_peptides]))
    all_peptides_list = sorted(all_peptides, key=lambda x: (len(x), x))
    all_peptides_list = [p for p in all_peptides_list if p ]
    sources = defaultdict(set)
    for p in all_peptides_list:
        sources[p].update(human_class1_peptides.get(p, []))
        sources[p].update(human_class2_peptides.get(p, []))
        sources[p].update(h2b_class1_peptides.get(p, []))
        sources[p].update(h2b_class2_peptides.get(p, []))
        sources[p].update(h2d_class1_peptides.get(p, []))
        sources[p].update(h2d_class2_peptides.get(p, []))
        
    df_combined = pd.DataFrame({
        "peptide": all_peptides_list,
        "length": [len(p) for p in all_peptides_list],
        "vaccine_peptide_length": [length] * len(all_peptides_list),
        "vaccine_peptides": [";".join(sorted(sources[p])) for p in all_peptides_list],
        "human_class1": [p in human_class1_peptides for p in all_peptides_list],
        "human_class2": [p in human_class2_peptides for p in all_peptides_list],
        "h2b_class1": [p in h2b_class1_peptides for p in all_peptides_list],
        "h2b_class2": [p in h2b_class2_peptides for p in all_peptides_list],
        "h2d_class1": [p in h2d_class1_peptides for p in all_peptides_list],
        "h2d_class2": [p in h2d_class2_peptides for p in all_peptides_list],
    })
    df_combined["any_human"] = (
        df_combined["human_class1"] | df_combined["human_class2"]
    )
    
    df_combined["any_h2b"] = (
        df_combined["h2b_class1"] | df_combined["h2b_class2"]
    )
    df_combined["any_h2d"] = (
        df_combined["h2d_class1"] | df_combined["h2d_class2"]
    )
    df_combined["any_murine"] = df_combined["any_h2b"] | df_combined["any_h2d"]
    df_combined.to_csv("minimal-epitopes-in-%dmer-vaccine-peptides.csv" % length, index=False)
    df_combined[df_combined.any_murine].to_csv(
        "minimal-epitopes-in-%dmer-vaccine-peptides-murine.csv" % length, index=False)
    df_combined[df_combined.any_human].to_csv(
        "minimal-epitopes-in-%dmer-vaccine-peptides-human.csv" % length, index=False)
    df_combined[df_combined.any_h2b].to_csv(
        "minimal-epitopes-in-%dmer-vaccine-peptides-h2b.csv" % length, index=False)
    df_combined[df_combined.any_h2d].to_csv(
        "minimal-epitopes-in-%dmer-vaccine-peptides-h2d.csv" % length, index=False)
    df_combined[df_combined.any_h2d & df_combined.any_h2b].to_csv(
        "minimal-epitopes-in-%dmer-vaccine-peptides-h2b-h2d.csv" % length, index=False)
    df_combined[df_combined.any_h2d & df_combined.any_human].to_csv(
        "minimal-epitopes-in-%dmer-vaccine-peptides-human-h2d.csv" % length, index=False)
    df_combined[df_combined.any_h2b & df_combined.any_human].to_csv(
        "minimal-epitopes-in-%dmer-vaccine-peptides-human-h2b.csv" % length, index=False)
    
    df_combined[df_combined.any_h2d & df_combined.any_h2b & df_combined.any_human].to_csv(
        "minimal-epitopes-in-%dmer-vaccine-peptides-human-h2b-h2d.csv" % length, index=False)
    
    
    

In [2]:
!cat minimal-epitopes-in-27mer-vaccine-peptides.csv | cut -d, -f1 




peptide
LLFNKVTL
RLFRKSNL
TDEMIAQY
YANRNRFL
YLQPRTFL
AAAYYVGYL
AEIRASANL
AEVQIDRLI
ANRNRFLYI
ASAFFGMSR
AYANRNRFL
FAYANRNRF
FIEDLLFNK
FRKSNLKPF
FVLAAVYRI
GIYQTSNFR
GLTVLPPLL
GMSRIGMEV
GYLQPRTFL
IAQYTSALL
ILDITPCSF
IYQTSNFRV
KLDDKDPNF
KSNLKPFER
KVFRSSVLH
LLFNKVTLA
LPAADLDDF
LPFFSNVTW
LPPLLTDEM
LSPRWYFYY
LTDEMIAQY
LTYTGAIKL
MEVTPSGTW
MIAQYTSAL
NRFLYIIKL
NRNRFLYII
NYNYLYRLF
RFLYIIKLI
RLDKVEAEV
RLFARTRSM
RLFRKSNLK
RLQSLQTYV
SAFFGMSRI
SPRWYFYYL
SVLNDILSR
TLADAGFIK
TLKSFTVEK
TPSGTWLTY
VLNDILSRL
YANRNRFLY
YFIASFRLF
YLQPRTFLL
YRLFRKSNL
FAYANRNRFL
FIEDLLFNKV
FLPFFSNVTW
FLYIIKLIFL
FRLFARTRSM
GMEVTPSGTW
GNYNYLYRLF
GYLQPRTFLL
ILPDPSKPSK
IYKTPPIKDF
LLTDEMIAQY
LTDEMIAQYT
LYIIKLIFLW
QFAPSASAFF
RFLYIIKLIF
SETKCTLKSF
SFIEDLLFNK
SVLNDILSRL
VTLADAGFIK
VTPSGTWLTY
YANRNRFLYI
YFIASFRLFA
YLQPRTFLLK
YPDKVFRSSV
YRLFRKSNLK
KPFERDISTEI
KVTLADAGFIK
PLLTDEMIAQY
RSFIEDLLFNK
VLNDILSRLDK
YYVGYLQPRTF
AAVYRINWITGGIAI
APSASAFFGMSRIGM
A

In [3]:
!cat minimal-epitopes-in-27mer-vaccine-peptides-murine.csv | cut -d, -f1 



peptide
LLFNKVTL
RLFRKSNL
YANRNRFL
YLQPRTFL
AAAYYVGYL
ANRNRFLYI
AYANRNRFL
FAYANRNRF
FVLAAVYRI
GYLQPRTFL
IAQYTSALL
IYQTSNFRV
LSPRWYFYY
LTYTGAIKL
MIAQYTSAL
NRFLYIIKL
NYNYLYRLF
RFLYIIKLI
RLFARTRSM
RLQSLQTYV
SAFFGMSRI
SPRWYFYYL
VLNDILSRL
YANRNRFLY
YFIASFRLF
YLQPRTFLL
FAYANRNRFL
FLPFFSNVTW
GNYNYLYRLF
GYLQPRTFLL
QFAPSASAFF
YANRNRFLYI
YYVGYLQPRTF
AQFAPSASAFFGMSR
ASFRLFARTRSMWSF
AVYRINWITGGIAIA
AYYVGYLQPRTFLLK
DEMIAQYTSALLAGT
DKVFRSSVLHSTQDL
EDLLFNKVTLADAGF
EMIAQYTSALLAGTI
FGGFNFSQILPDPSK
FIASFRLFARTRSMW
FRLFARTRSMWSFNP
GFNFSQILPDPSKPS
GGFNFSQILPDPSKP
GTWLTYTGAIKLDDK
IAQFAPSASAFFGMS
IASFRLFARTRSMWS
ISTEIYQAGSTPCNG
KKQQTVTLLPAADLD
KQQTVTLLPAADLDD
LIRAAEIRASANLAA
LLFNKVTLADAGFIK
LLQFAYANRNRFLYI
LQTYVTQQLIRAAEI
LTDEMIAQYTSALLA
MIAQYTSALLAGTIT
PDKVFRSSVLHSTQD
PQIAQFAPSASAFFG
PRWYFYYLGTGPEAG
PSGTWLTYTGAIKLD
QIAQFAPSASAFFGM
QKKQQTVTLLPAADL
QLIRAAEIRASANLA
QQLIRAAEIRASANL
QSLQTYVTQQLIRAA
QTYVTQQLIRAAEIR
RLFARTRSMWSFNPE
RSMWSFNPETNI

In [4]:

!cat minimal-epitopes-in-27mer-vaccine-peptides-human-h2b-h2d.csv | cut -d, -f1 

!cat minimal-epitopes-in-27mer-vaccine-peptides-human-h2b-h2d.csv | cut -d, -f1 | wc -l


peptide
AAAYYVGYL
ANRNRFLYI
FAYANRNRF
IAQYTSALL
SAFFGMSRI
SPRWYFYYL
FAYANRNRFL
YANRNRFLYI
ASFRLFARTRSMWSF
DEMIAQYTSALLAGT
DKVFRSSVLHSTQDL
EMIAQYTSALLAGTI
FRLFARTRSMWSFNP
IAQFAPSASAFFGMS
IASFRLFARTRSMWS
LIRAAEIRASANLAA
LLQFAYANRNRFLYI
LTDEMIAQYTSALLA
MIAQYTSALLAGTIT
PDKVFRSSVLHSTQD
PQIAQFAPSASAFFG
QIAQFAPSASAFFGM
QLIRAAEIRASANLA
QQLIRAAEIRASANL
SFRLFARTRSMWSFN
TDEMIAQYTSALLAG
TQQLIRAAEIRASAN
VTQQLIRAAEIRASA
VYRINWITGGIAIAM
WPQIAQFAPSASAFF
YPDKVFRSSVLHSTQ
YRINWITGGIAIAMA
YVTQQLIRAAEIRAS
      34


In [5]:

!cat minimal-epitopes-in-27mer-vaccine-peptides-h2d.csv | cut -d, -f1 

!cat minimal-epitopes-in-27mer-vaccine-peptides-h2d.csv | cut -d, -f1 | wc -l


peptide
YLQPRTFL
AAAYYVGYL
ANRNRFLYI
AYANRNRFL
FAYANRNRF
GYLQPRTFL
IAQYTSALL
IYQTSNFRV
LSPRWYFYY
NYNYLYRLF
RFLYIIKLI
RLQSLQTYV
SAFFGMSRI
SPRWYFYYL
YFIASFRLF
YLQPRTFLL
FAYANRNRFL
FLPFFSNVTW
GYLQPRTFLL
QFAPSASAFF
YANRNRFLYI
YYVGYLQPRTF
ASFRLFARTRSMWSF
DEMIAQYTSALLAGT
DKVFRSSVLHSTQDL
EDLLFNKVTLADAGF
EMIAQYTSALLAGTI
FIASFRLFARTRSMW
FRLFARTRSMWSFNP
IAQFAPSASAFFGMS
IASFRLFARTRSMWS
KKQQTVTLLPAADLD
KQQTVTLLPAADLDD
LIRAAEIRASANLAA
LLFNKVTLADAGFIK
LLQFAYANRNRFLYI
LQTYVTQQLIRAAEI
LTDEMIAQYTSALLA
MIAQYTSALLAGTIT
PDKVFRSSVLHSTQD
PQIAQFAPSASAFFG
QIAQFAPSASAFFGM
QKKQQTVTLLPAADL
QLIRAAEIRASANLA
QQLIRAAEIRASANL
QSLQTYVTQQLIRAA
QTYVTQQLIRAAEIR
RLFARTRSMWSFNPE
SFRLFARTRSMWSFN
SLQTYVTQQLIRAAE
TDEMIAQYTSALLAG
TQQLIRAAEIRASAN
TYVTQQLIRAAEIRA
VTQQLIRAAEIRASA
VYRINWITGGIAIAM
WPQIAQFAPSASAFF
YFIASFRLFARTRSM
YPDKVFRSSVLHSTQ
YRINWITGGIAIAMA
YVTQQLIRAAEIRAS
      61


In [6]:

!cat minimal-epitopes-in-27mer-vaccine-peptides-human-h2d.csv | cut -d, -f1 

!cat minimal-epitopes-in-27mer-vaccine-peptides-human-h2d.csv | cut -d, -f1 | wc -l


peptide
YLQPRTFL
AAAYYVGYL
ANRNRFLYI
AYANRNRFL
FAYANRNRF
GYLQPRTFL
IAQYTSALL
IYQTSNFRV
LSPRWYFYY
NYNYLYRLF
RFLYIIKLI
RLQSLQTYV
SAFFGMSRI
SPRWYFYYL
YFIASFRLF
YLQPRTFLL
FAYANRNRFL
FLPFFSNVTW
GYLQPRTFLL
QFAPSASAFF
YANRNRFLYI
YYVGYLQPRTF
ASFRLFARTRSMWSF
DEMIAQYTSALLAGT
DKVFRSSVLHSTQDL
EDLLFNKVTLADAGF
EMIAQYTSALLAGTI
FIASFRLFARTRSMW
FRLFARTRSMWSFNP
IAQFAPSASAFFGMS
IASFRLFARTRSMWS
KKQQTVTLLPAADLD
KQQTVTLLPAADLDD
LIRAAEIRASANLAA
LLFNKVTLADAGFIK
LLQFAYANRNRFLYI
LQTYVTQQLIRAAEI
LTDEMIAQYTSALLA
MIAQYTSALLAGTIT
PDKVFRSSVLHSTQD
PQIAQFAPSASAFFG
QIAQFAPSASAFFGM
QKKQQTVTLLPAADL
QLIRAAEIRASANLA
QQLIRAAEIRASANL
QSLQTYVTQQLIRAA
QTYVTQQLIRAAEIR
RLFARTRSMWSFNPE
SFRLFARTRSMWSFN
SLQTYVTQQLIRAAE
TDEMIAQYTSALLAG
TQQLIRAAEIRASAN
TYVTQQLIRAAEIRA
VTQQLIRAAEIRASA
VYRINWITGGIAIAM
WPQIAQFAPSASAFF
YFIASFRLFARTRSM
YPDKVFRSSVLHSTQ
YRINWITGGIAIAMA
YVTQQLIRAAEIRAS
      61
