**4/28/21**

The purpose of this notebook is to collect all of the human and bacterial proteins IDd in the Community, Tailored, Hybrid, and Single database searches for later annotation and functional characterization.

In [1]:
from elliot_utils import *

In [2]:
analysisPath = Path('analysis_files/functional_analysis/')

In [3]:
cResults = getOrderedFiles(COMMUNITY_RESULTS, '.tsv')
tResults = getOrderedFiles(TAILORED_RESULTS, '.tsv')
hResults = getOrderedFiles(HYBRID_RESULTS, '.tsv')
sResults = getOrderedFiles(SINGLE_RESULTS, '.tsv')
pResults = getOrderedFiles(POOLED_RESULTS, '.tsv')

In [4]:
cDBs = getOrderedFiles(COMMUNITY_DB, '.fasta')
tDBs = getOrderedFiles(TAILORED_DB, '.fasta')
hDBs = getOrderedFiles(HYBRID_DB, '.fasta')
sDBs = getOrderedFiles(SINGLE_DB, '.fasta')
pDBs = getOrderedFiles(POOLED_DB, '.fasta')

In [5]:
cRefs = [ProtRef(x) for x in cDBs]
tRefs = [ProtRef(x) for x in tDBs]
hRefs = [ProtRef(x) for x in hDBs]
sRefs = [ProtRef(x) for x in sDBs]
pRefs = [ProtRef(x) for x in pDBs]

In [6]:
cHPeps = getFilteredPeptides(cResults, 'human')
tHPeps = getFilteredPeptides(tResults, 'human')
hHPeps = getFilteredPeptides(hResults, 'human')
sHPeps = getFilteredPeptides(sResults, 'human')
pHPeps = getFilteredPeptides(pResults, 'human')
cPeps = getFilteredPeptides(cResults, 'bacteria')
tPeps = getFilteredPeptides(tResults, 'bacteria')
hPeps = getFilteredPeptides(hResults, 'bacteria')
sPeps = getFilteredPeptides(sResults, 'bacteria')
pPeps = getFilteredPeptides(pResults, 'bacteria')

In [7]:
# Pull all of the valid, hit proteins out of the results and add them to the dictionary of proteins
# Collapses proteins with the same ID together, adding new taxa to the same protein ID
# Format of outDict is key=protID, value=protein object
def collectProtsInDict(results, refs, allowedPeps, typeOfProt, outDict):
    for i in range(len(results)):
        res = results[i]
        ref = refs[i] if len(refs) > 1 else refs[0]
        with res.open(mode='r') as infile:
            reader = csv.reader(infile, delimiter='\t')
            for row in reader:
                protType = determineIDType(row)
                if protType == 'first':
                    continue
                if not isSignificant(row):
                    break
                if row[PEPTIDE] in allowedPeps and protType == typeOfProt:
                    hits = getProteinHitList(row, typeOfProt)
                    for hit in hits:
                        prot = ref.getProt(hit)
                        if prot.id in outDict.keys():
                            for t in prot.taxa:
                                outDict[prot.id].addTaxa(t)
                        else:
                            outDict[prot.id] = prot

In [8]:
# Collect all of the hit bacterial proteins into a dictionary
allProts = {} # key=protID, value=protein object
collectProtsInDict(cResults, cRefs, cPeps, 'bacteria', allProts)
collectProtsInDict(tResults, tRefs, tPeps, 'bacteria', allProts)
collectProtsInDict(hResults, hRefs, hPeps, 'bacteria', allProts)
collectProtsInDict(sResults, sRefs, sPeps, 'bacteria', allProts)
collectProtsInDict(pResults, pRefs, pPeps, 'bacteria', allProts)

In [9]:
allProtsFasta = analysisPath.joinpath('all_hit_bacterial_proteins.fasta')
toWrite = []
for prot in allProts.values():
    toWrite.append(prot.getEntry())
with open(allProtsFasta, 'w', newline='') as output:
    output.write(''.join(toWrite))

In [10]:
# Collect all of the hit human proteins into a dictionary
allHumanProts = {} # key=protID, value=protein object
collectProtsInDict(cResults, cRefs, cHPeps, 'human', allHumanProts)
collectProtsInDict(tResults, tRefs, tHPeps, 'human', allHumanProts)
collectProtsInDict(hResults, hRefs, hHPeps, 'human', allHumanProts)
collectProtsInDict(sResults, sRefs, sHPeps, 'human', allHumanProts)
collectProtsInDict(pResults, pRefs, pHPeps, 'human', allHumanProts)

In [12]:
allHumanProtsFasta = analysisPath.joinpath('all_hit_human_proteins.fasta')
toWrite = []
for prot in allHumanProts.values():
    toWrite.append(prot.getEntry())
with open(allHumanProtsFasta, 'w', newline='') as output:
    output.write(''.join(toWrite))