In [None]:
#%%

import pandas as pd
import os
import single_cell_reloc_parquet.global_functions.global_variables as glv
from icecream import ic
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq

In [None]:
#%%

#* Dictionaries for renaming reg results
Tkach_dictionary = {'to cyto': ' -> cytoplasm',
	   'to nuc': ' -> nucleus',
	   'nucleus': ' -> nucleus',
	   'nuc foci': ' -> nuclear foci',
	   'cyto foci': ' -> cyto foci',
	   'from budneck/tip': 'bud -> ',
	   'from bud tip': 'bud -> ',
	   'to pm': ' -> plasma membrane',
	   'to bud neck': ' -> bud neck',
	   'to nuc/periphery': ' -> nucleus and nuclear periph',
	   'to nuc (from nuc foci)': 'nuclear foci -> nucleus',
	   'nuc foci': ' -> nuclear foci',
	   'from bud neck': 'bud neck -> ',
	   'to vac': ' -> vacuole',
	   'to nuc, nuc foci, cyto foci' : ' -> nucleus and nuclear foci and cyto foci', #* This one is hard to deal with. Almost decided to do a leveled assignment
	#    'no cells': np.nan,
	   'to pm foci': ' -> pm foci',
	   'to nucleolus, cyto foci' : ' -> nuceolus and cyto foci',
	   'to vac (from pm)': 'plasma membrane -> vacuole',
	   'to nucleolus': ' -> nucleolus',
	   'to nuc periph (cyto?)': 'cytoplasm -> nuclear periph',
	   'nuc foci (weak)': ' -> nuclear foci',
	   'to cyto (from pm/endosome)': 'plasma membrane -> cytoplasm',
	   'to nuc (from nucleolus)': 'nucleolus -> nucleus',
	   'er foci': ' -> ER foci',
	   'to vac (abund inc)': ' -> vacuole',
	   'to vac (focus)': ' -> vacuole foci',
	   'to cyto (from pm)': 'plasma membrane -> cytoplasm',
	   'shorter microtubules': ' -> microtubules',
	   'to nuc/nuc periph': ' -> nucleus and nuclear periphery',
	   'to cyto (daughter)': ' -> cytoplasm (daughter)',
	   'to diffuse nuc (really, more numerous less intense foci, from foci)': 'nuclear foci -> nucleus and nuclear foci',
	   'to cyto (decreased abund)': 'cytoplasm -> cytoplasm',
	   'from budneck/tip, to pm': 'bud -> plasma membrane',
	   'to pm foci (endosome)': ' -> pm foci',
	   'to nuc (nucleolus)': ' -> nucleolus',
	   'to nucleolus (weak)': ' -> nucleolus',
	   'not the same strain': np.nan,
	   'to er': ' -> ER',
	   'from budneck': 'bud neck -> ',
	   'nuc periph' : ' -> nuclear periph',
	   'to pm (foci)': ' -> pm foci',
	   'to vac (from vac mb)': 'vacuole -> vacuole',
	   'from bud neck (to pm)': 'bud neck -> plamsa membrane',
	   'to diffuse nuc (from foci)': 'nuclear foci -> nucleus',
	   'to er foci (weak)': ' -> ER foci',
	   'nuc periph foci': ' -> nuclear foci and nuclear periph',
	   'to nuc (diffuse)': ' -> nucleus',
	   'from bud  tip': 'bud neck -> ',
	   'to nuc (nucleolus?)': ' -> nucleus',
	   'to cyto (from vac)': 'vacuole -> cytoplasm',
	   'to er foci(?)': ' -> ER foci',
	   'cyto foci (nuc foci?)': ' -> cyto foci',
	   'to nuc (?)': ' -> nucleus',
	   'to cyto (from cyto foci/vac)' : 'cyto foci and vacuole -> cytoplasm',
	   'to cyto (weak)': ' -> cytoplasm',
	   'vacuole membrane (focus)' : ' -> vacuole foci',
	   'to cyto (from vac, degradation?)': 'vacuole -> cytoplasm',
	   'vac foci?': ' -> vacuole foci'}



Denervaud_dictionary = {'Cyto->Nuc': 'cytoplasm -> nucleus',
		'Nuc -> Cytoplasm': 'nucleus -> cytoplasm',
		'Nuc -> NucFoci': 'nucleus -> nuclear foci',
		'Nuc Periphery Agg': ' -> nuclear periph',
		'Cyto Agg': 'cytoplasm -> cytoplasm', #!
		'Cyto Disagg': 'cytoplasm -> cytoplasm', #!
		   'From cell Periphery': 'plasma membrane -> cytoplasm',#!
		'To Cell Periphery': 'cytoplasm -> plasma membrane'#!
}

Mazumder_dictionary = {'cytoplasm': ' -> cytoplasm',
		'nucleus': ' -> nucleus',
		'cytoplasm,nucleus': ' -> cytoplasm and nucleus',
		'vacuole': ' -> vacuole',
		'ambiguous': '',
		'cytoplasm,punctate composite': ' -> cyto foci',
		'microtubule': ' -> microtubule',
		'ambiguous,spindle pole': ' -> spindle pole',
		'spindle pole': ' -> spindle pole',
		'mitochondrion': ' -> mitochondrion',
		'peroxisome': ' -> peroxisome',
		'ER,ambiguous,cytoplasm': ' -> ER and cytoplasm',
		'cytoplasm,late Golgi': ' -> cytoplasm and late Golgi',
		'late Golgi': ' -> late Golgi',
		'nucleolus,nucleus': ' -> nucleolus and nucleus',
		'nucleolus': ' -> nucleolus',
		'ER': ' -> ER',
		'actin': ' -> actin',
		'cytoplasm,mitochondrion,nucleus': ' -> cytoplasm and mitochondrion and nucleus',
		'cytoplasm,nucleus,punctate composite': ' -> cytoplasm and nucleus and punctate composite',
		'cytoplasm,nucleolus': ' -> cytoplasm and nucleolus',
		'nuclear periphery': ' -> nuclear periphery',
		'bud neck,cytoplasm,bud': ' -> bud neck and cytoplasm and bud',
		'ER,cell periphery': ' -> ER and cell periphery',
		'punctate composite': ' -> punctate composite',
		'endosome': ' -> endosome',
		'vacuolar membrane': ' -> vacuolar membrane',
		'cell periphery,bud': ' -> cell periphery and bud',
		'bud neck,cytoplasm,mitochondrion,cell periphery': ' -> bud neck and cytoplasm and mitochondrion and cell periphery',
		'cytoplasm,mitochondrion': ' -> cytoplasm and mitochondrion',
		'cytoplasm,nuclear periphery,nucleus': ' -> cytoplasm and nuclear periphery and nucleus',
		'ambiguous,cytoplasm,nucleus': ' -> cytoplasm and nucleus',
		'ambiguous,bud neck,cytoplasm,cell periphery,bud': ' -> bud neck and cytoplasm and cell periphery and bud',
		'cytoplasm,nucleolus,nucleus': ' -> cytoplasm and nucleolus and nucleus',
		'ER,cytoplasm': ' -> ER and cytoplasm',
		'bud neck,cytoplasm': ' -> bud neck and cytoplasm',
		'cytoplasm,nuclear periphery': ' -> cytoplasm and nuclear periphery',
		'ambiguous,nucleus': ' -> nucleus',
		'vacuolar membrane,endosome': ' -> vacuolar membrane and endosome',
		'lipid particle': ' -> lipid particle',
		'bud neck': ' -> bud neck',
		'bud neck,cell periphery': ' -> bud neck and cell periphery',
		'ambiguous,bud neck,cell periphery,bud': ' -> bud neck and cell periphery and bud',
		'cell periphery': ' -> cell periphery',
		'bud neck,cytoplasm,cell periphery,bud': ' -> bud neck and cytoplasm and cell periphery and bud',
		'cytoplasm,actin': ' -> cytoplasm and actin',
		'Golgi': ' -> Golgi',
		'ambiguous,bud neck,cytoplasm,bud': ' -> bud neck and cytoplasm and bud',
		'ER to Golgi': 'ER -> Golgi',
		'ER,ambiguous,bud': ' -> ER and bud',
		'ambiguous,endosome': ' -> endosome',
		'bud neck,cytoplasm,cell periphery': ' -> bud neck and cytoplasm and cell periphery',
		'punctate composite,late Golgi': ' -> punctate composite and late Golgi',
		'cytoplasm,nucleus,bud': ' -> cytoplasm and nucleus and bud',
		'early Golgi': ' -> early Golgi',
		'punctate composite,lipid particle': ' -> punctate composite and lipid particle',
		'bud neck,cell periphery,punctate composite': ' -> bud neck and cell periphery and punctate composite',
		'mitochondrion,nucleus': ' -> mitochondrion and nucleus',
		'ambiguous,cytoplasm,bud': ' -> cytoplasm and bud',
		'cytoplasm,cell periphery': ' -> cytoplasm and cell periphery',
		'ambiguous,cytoplasm': ' -> cytoplasm',
		'cell periphery,vacuole': ' -> cell periphery and vacuole',
		'punctate composite,early Golgi,late Golgi': ' -> punctate composite and early Golgi and late Golgi',
		'nucleus,spindle pole,microtubule': ' -> nucleus and spindle pole and microtubule',
		'cytoplasm,punctate composite,endosome': ' -> cytoplasm and punctate composite and endosome',
		'punctate composite,early Golgi': ' -> punctate composite and early Golgi',
		'bud neck,cytoplasm,nucleus': ' -> bud neck and cytoplasm and nucleus',
		'Golgi,early Golgi': ' -> Golgi and early Golgi',
		'ambiguous,vacuolar membrane': ' -> vacuolar membrane',
		'ambiguous,cell periphery,bud': ' -> cell periphery and bud',
		'cytoplasm,bud': ' -> cytoplasm and bud',
		'ambiguous,late Golgi': ' -> late Golgi',
		'ambiguous,mitochondrion': ' -> mitochondrion',
		'cytoplasm,endosome,lipid particle': ' -> cytoplasm and endosome and lipid particle',
		'bud neck,cytoplasm,cell periphery,punctate composite,bud': 'bud neck and cytoplasm,cell periphery and punctate composite and bud',
		'ER,cytoplasm,nucleus': ' -> ER and cytoplasm and nucleus',
		'ambiguous,bud neck,bud': ' -> bud neck and bud',
		'cytoplasm,vacuole': ' -> cytoplasm and vacuole',
		'mitochondrion,punctate composite': ' -> mitochondrion and punctate composite',
		'early Golgi,late Golgi': ' -> early Golgi and late Golgi',
		'ER,mitochondrion,nuclear periphery': ' -> ER and mitochondrion and nuclear periphery',
		'cytoplasm,spindle pole': ' -> cytoplasm and spindle pole',
		'spindle pole,microtubule': ' -> spindle pole and microtubule',
		'punctate composite,Golgi': ' -> punctate composite and Golgi',
		'vacuole,endosome': ' -> vacuole and endosome',
		'punctate composite,endosome': ' -> punctate composite and endosome',
		'nuclear periphery,nucleus': ' -> nuclear periphery and nucleus',
		'ambiguous,bud neck,cell periphery,vacuole,bud': ' -> bud neck and cell periphery and vacuole and bud',
		'bud neck,cell periphery,bud': ' -> bud neck and cell periphery and bud',
		'cytoplasm,nucleus,spindle pole': ' -> cytoplasm and nucleus and spindle pole',
		'cytoplasm,endosome': ' -> cytoplasm and endosome',
		'ambiguous,cytoplasm,punctate composite': ' -> cytoplasm and punctate composite',
		'cytoplasm,Golgi,early Golgi': ' -> cytoplasm and Golgi and early Golgi',
		'punctate composite,vacuolar membrane,lipid particle': ' -> punctate composite and vacuolar membrane and lipid particle',
		'endosome,lipid particle': ' -> endosome and lipid particle',
		'vacuole,vacuolar membrane': ' -> vacuole and vacuolar membrane',
		'ER,vacuole': ' -> ER and vacuole',
		'ER,cell periphery,bud': ' -> ER,cell periphery and bud',
		'ambiguous,cytoplasm,cell periphery,bud': ' -> cytoplasm and cell periphery and bud',
		'punctate composite,Golgi,early Golgi': ' -> punctate composite and Golgi and early Golgi',
		'cytoplasm,nuclear periphery,punctate composite': ' -> cytoplasm and nuclear periphery and punctate composite',
		'nucleolus,microtubule': ' -> nucleolus and microtubule',
		'ambiguous,punctate composite': ' -> punctate composite',
		'ER,cell periphery,vacuole,bud': ' -> ER and cell periphery and vacuole,bud',
		'punctate composite,spindle pole': ' -> punctate composite and spindle pole',
		'nucleus,spindle pole': ' -> nucleus and spindle pole',
		'ambiguous,bud neck,cytoplasm,vacuole,bud': ' -> bud neck and cytoplasm and vacuole,bud',
		'cytoplasm,cell periphery,bud': ' -> cytoplasm and cell periphery and bud',
		'cytoplasm,early Golgi': ' -> cytoplasm and early Golgi',
		'punctate composite,actin': ' -> punctate composite and actin',
		'bud neck,nucleus': ' -> bud neck and nucleus',
		'ER,nucleus': ' -> ER and nucleus',
		'cytoplasm,punctate composite,spindle pole, microtubule': 'cytoplasm and punctate composite and spindle pole and microtubule',
		'microtubule': ' -> microtubule',
		'ambiguous,Golgi,early Golgi': ' -> Golgi and early Golgi',
		'punctate composite,endosome,early Golgi,late Golgi': ' -> punctate composite and endosome and early Golgi and late Golgi',
		'nucleus,microtubule': ' -> nucleus and microtubule',
		'ambiguous,bud neck,cytoplasm,nucleus,bud': ' -> bud neck and cytoplasm and nucleus and bud',
		'vacuole,Golgi': ' -> vacuole and Golgi',
		'ambiguous,Golgi': ' -> Golgi',
		'ambiguous,bud neck,cell periphery,punctate composite,late Golgi,bud': 'bud neck and cell periphery and punctate composite and late Golgi and bud', 'late Golgi,bud': 'late Golgi and bud',
		'ER,ambiguous': ' -> ER,ambiguous',
		'bud neck,cell periphery,vacuole': ' -> bud neck and cell periphery and vacuole',
		'ambiguous,punctate composite,bud': ' -> punctate composite and bud',
		'ambiguous,nuclear periphery': 'nuclear periphery'}

In [None]:
#%%

Den_ycd_map_dict = {'cytoplasm,punctate': 'cytoplasm and punctate',
'cytoplasm': 'cytoplasm',
'nothing': 'nothing',
'nothing,nucleus': 'nucleus',
'bud,vacuole,punctate': 'bud and vacuole and punctate',
'cytoplasm,bud,punctate': 'cytoplasm and bud and punctate',
'cytoplasm,nucleus': 'cytoplasm and nucleus',
'nucleus': 'nucleus',
'nothing,cytoplasm': 'cytoplasm',
'nothing,cytoplasm,nucleus': 'cytoplasm and nucleus',
'cytoplasm,bud': 'cytoplasm and bud',
'punctate,actin/spindle': 'punctate and actin/spindle',
'cytoplasm,ER,punctate': 'cytoplasm and ER and punctate',
'nucleolus': 'nucleolus',
'nothing,cytoplasm,bud,punctate': 'cytoplasm and bud and punctate',
'nothing,bud': 'bud',
'cytoplasm,nucleus,bud,punctate': 'cytoplasm and nucleus and bud and punctate',
'cytoplasm,nucleus,unclassified': 'cytoplasm and nucleus',
'mitochondrion': 'mitochondrion',
'cytoplasm,unclassified': 'cytoplasm',
'actin/spindle': 'actin/spindle',
'cytoplasm,cell periphery,punctate': 'cytoplasm and cell periphery and punctate',
'nothing,cytoplasm,punctate': 'cytoplasm and punctate',
'cytoplasm,nucleus,punctate,actin/spindle': 'cytoplasm and nucleus and punctate and actin/spindle',
'cytoplasm,bud,punctate,actin/spindle': 'cytoplasm and bud and punctate and actin/spindle',
'nothing,cytoplasm,nuclear periphery': 'cytoplasm and nuclear periphery',
'nothing,cytoplasm,punctate,actin/spindle,unclassified': 'cytoplasm and punctate and actin/spindle',
'nothing,punctate': 'punctate',
'cytoplasm,nucleus,nucleolus,punctate': 'cytoplasm and nucleus and nucleolus and punctate',
'punctate': 'punctate',
'bud': 'bud',
'nothing,cytoplasm,unclassified': 'cytoplasm',
'mitochondrion,punctate,unclassified': 'mitochondrion and punctate',
'cytoplasm,mitochondrion': 'cytoplasm and mitochondrion',
'nucleus,punctate': 'nucleus and punctate',
'bud,unclassified': 'bud',
'cytoplasm,ER': 'cytoplasm and ER',
'cytoplasm,nucleus,vacuole': 'cytoplasm and nucleus and vacuole',
'vacuole': 'vacuole',
'cytoplasm,nucleus,nuclear periphery': 'cytoplasm and nucleus and nuclear periphery',
'cytoplasm,nuclear periphery,vacuole': 'cytoplasm and nuclear periphery and vacuole',
'nuclear periphery': 'nuclear periphery',
'structured,punctate': 'structured and punctate',
'cell periphery,vacuole,punctate,unclassified': 'cell periphery and vacuole and punctate',
'cell periphery': 'cell periphery',
'cytoplasm,vacuole': 'cytoplasm and vacuole',
'cell periphery,unclassified': 'cell periphery',
'nucleolus,structured,punctate': 'nucleolus and structured and punctate',
'cell periphery,ER,punctate': 'cell periphery and ER and punctate',
'cytoplasm,cell periphery': 'cytoplasm and cell periphery',
'nuclear periphery,punctate': 'nuclear periphery and punctate',
'nuclear periphery,cell periphery,ER': 'nuclear periphery and cell periphery and ER',
'cytoplasm,nucleus,bud': 'cytoplasm and nucleus and bud',
'nothing,mitochondrion': 'mitochondrion',
'ER': 'ER',
'nuclear periphery,cell periphery,ER,punctate,unclassified': 'nuclear periphery and cell periphery and ER and punctate',
'cell periphery,punctate,unclassified': 'cell periphery and punctate',
'cytoplasm,nucleus,nucleolus': 'cytoplasm and nucleus and nucleolus',
'nuclear periphery,ER': 'nuclear periphery and ER',
'cytoplasm,cell periphery,structured,punctate': 'cytoplasm and cell periphery and structured and punctate',
'cytoplasm,mitochondrion,ER': 'cytoplasm and mitochondrion and ER',
'nuclear periphery,ER,punctate': 'nuclear periphery and ER and punctate',
'cytoplasm,nuclear periphery,ER': 'cytoplasm and nuclear periphery and ER',
'cell periphery,ER,vacuole': 'cell periphery and ER and vacuole',
'nuclear periphery,vacuole': 'nuclear periphery and vacuole',
'nucleus,nucleolus': 'nucleus and nucleolus',
'cytoplasm,nucleus,cell periphery': 'cytoplasm and nucleus and cell periphery',
'cytoplasm,nuclear periphery': 'cytoplasm and nuclear periphery',
'nothing,cytoplasm,mitochondrion': 'cytoplasm and mitochondrion',
'cytoplasm,nucleolus': 'cytoplasm and nucleolus',
'cytoplasm,structured': 'cytoplasm and structured',
'nothing,cytoplasm,structured': 'cytoplasm and structured',
'cell periphery,bud': 'cell periphery and bud',
'cell periphery,ER': 'cell periphery and ER',
'bud,punctate': 'bud and punctate',
'ER,punctate': 'ER and punctate',
'mitochondrion,structured': 'mitochondrion and structured',
'nucleus,cell periphery,vacuole': 'nucleus and cell periphery and vacuole',
'nucleolus,nuclear periphery,punctate': 'nucleolus and nuclear periphery and punctate',
'cytoplasm,vacuole,actin/spindle': 'cytoplasm and vacuole and actin/spindle',
'ER,unclassified': 'ER',
'cytoplasm,mitochondrion,unclassified': 'cytoplasm and mitochondrion',
'cytoplasm,cell periphery,vacuole': 'cytoplasm and cell periphery and vacuole',
'nucleolus,cell periphery,vacuole,punctate': 'nucleolus and cell periphery and vacuole and punctate',
'cytoplasm,nucleus,cell periphery,punctate,unclassified': 'cytoplasm and nucleus and cell periphery and punctate',
'cytoplasm,nuclear periphery,vacuole,punctate': 'cytoplasm and nuclear periphery and vacuole and punctate',
'nuclear periphery,cell periphery,vacuole,unclassified': 'nuclear periphery and cell periphery and vacuole',
'nucleus,nuclear periphery,vacuole': 'nucleus and nuclear periphery and vacuole',
'vacuole,punctate,unclassified': 'vacuole and punctate',
'cytoplasm,nuclear periphery,punctate': 'cytoplasm and nuclear periphery and punctate',
'cytoplasm,nucleus,nuclear periphery,punctate': 'cytoplasm and nucleus and nuclear periphery and punctate',
'cell periphery,vacuole,punctate': 'cell periphery and vacuole and punctate',
'nucleus,vacuole,punctate': 'nucleus and vacuole and punctate',
'mitochondrion,punctate': 'mitochondrion and punctate',
'nucleus,cell periphery,punctate,unclassified': 'nucleus and cell periphery and punctate',
'ER,vacuole': 'ER and vacuole',
'unclassified': 'unclassified',
'nucleus,nuclear periphery': 'nucleus and nuclear periphery',
'cytoplasm,vacuole,punctate,actin/spindle': 'cytoplasm and vacuole and punctate and actin/spindle',
'cytoplasm,vacuole,punctate': 'cytoplasm and vacuole and punctate',
'mitochondrion,structured,unclassified': 'mitochondrion and structured',
'nucleolus,nuclear periphery': 'nucleolus and nuclear periphery',
'cytoplasm,nucleus,vacuole,punctate': 'cytoplasm and nucleus and vacuole and punctate',
'cytoplasm,cell periphery,mitochondrion,punctate': 'cytoplasm and cell periphery and mitochondrion and punctate',
'nuclear periphery,cell periphery,punctate,unclassified': 'nuclear periphery and cell periphery and punctate',
'cytoplasm,mitochondrion,punctate': 'cytoplasm and mitochondrion and punctate',
'cytoplasm,nuclear periphery,ER,unclassified': 'cytoplasm and nuclear periphery and ER',
'vacuole,punctate,actin/spindle': 'vacuole and punctate and actin/spindle',
'cytoplasm,cell periphery,vacuole,punctate': 'cytoplasm and cell periphery and vacuole and punctate',
'mitochondrion,unclassified': 'mitochondrion',
'nucleus,vacuole,unclassified': 'nucleus and vacuole',
'cytoplasm,nucleus,punctate,unclassified': 'cytoplasm and nucleus and punctate',
'cytoplasm,nucleus,nuclear periphery,unclassified': 'cytoplasm and nucleus and nuclear periphery',
'nuclear periphery,cell periphery,ER,unclassified': 'nuclear periphery and cell periphery and ER',
'nucleus,nucleolus,punctate': 'nucleus and nucleolus and punctate',
'cytoplasm,nucleus,nuclear periphery,ER,unclassified': 'cytoplasm and nucleus and nuclear periphery and ER',
'cytoplasm,mitochondrion,structured': 'cytoplasm and mitochondrion and structured',
'structured': 'structured',
'nucleus,unclassified': 'nucleus',
'cytoplasm,nucleus,cell periphery,vacuole': 'cytoplasm and nucleus and cell periphery and vacuole',
'cell periphery,bud,punctate': 'cell periphery and bud and punctate',
'cytoplasm,punctate,actin/spindle': 'cytoplasm and punctate and actin/spindle',
'nucleus,nuclear periphery,mitochondrion,structured': 'nucleus and nuclear periphery and mitochondrion and structured',
'nothing,cytoplasm,cell periphery': 'cytoplasm and cell periphery',
'cytoplasm,bud,ER,punctate': 'cytoplasm and bud and ER and punctate',
'vacuole,unclassified': 'vacuole',
'vacuole,punctate': 'vacuole and punctate',
'nuclear periphery,ER,unclassified': 'nuclear periphery and ER',
'structured,ER,punctate,unclassified': 'structured and ER and punctate',
'ER,punctate,actin/spindle': 'ER and punctate and actin/spindle',
'cytoplasm,punctate,unclassified': 'cytoplasm and punctate',
'nucleus,nuclear periphery,unclassified': 'nucleus and nuclear periphery',
'cell periphery,bud,ER,punctate': 'cell periphery and bud and ER and punctate',
'nuclear periphery,mitochondrion,unclassified': 'nuclear periphery and mitochondrion',
'nucleolus,structured': 'nucleolus and structured',
'cell periphery,punctate': 'cell periphery and punctate',
'nuclear periphery,cell periphery,vacuole,punctate': 'nuclear periphery and cell periphery and vacuole and punctate',
'nucleolus,punctate': 'nucleolus and punctate',
'nuclear periphery,vacuole,punctate': 'nuclear periphery and vacuole and punctate',
'cell periphery,vacuole': 'cell periphery and vacuole',
'nuclear periphery,structured,punctate': 'nuclear periphery and structured and punctate',
'nucleolus,vacuole,punctate': 'nucleolus and vacuole and punctate',
'cytoplasm,nucleus,nucleolus,mitochondrion,unclassified': 'cytoplasm and nucleus and nucleolus and mitochondrion',
'cytoplasm,mitochondrion,ER,punctate,unclassified': 'cytoplasm and mitochondrion and ER and punctate',
'nucleus,punctate,unclassified': 'nucleus and punctate',
'punctate,unclassified': 'punctate',
'cytoplasm,nucleus,nuclear periphery,structured': 'cytoplasm and nucleus and nuclear periphery and structured',
'cytoplasm,nuclear periphery,unclassified': 'cytoplasm and nuclear periphery',
'nucleus,nuclear periphery,punctate': 'nucleus and nuclear periphery and punctate',
'cytoplasm,structured,bud,punctate': 'cytoplasm and structured and bud and punctate',
'cytoplasm,nucleus,punctate': 'cytoplasm and nucleus and punctate',
'cell periphery,vacuole,unclassified': 'cell periphery and vacuole',
'nucleolus,nuclear periphery,cell periphery,ER,punctate,unclassified': 'nucleolus and nuclear periphery and cell periphery and ER and punctate',
'nucleus,cell periphery,vacuole,punctate': 'nucleus and cell periphery and vacuole and punctate',
'nucleolus,nuclear periphery,cell periphery,vacuole,punctate': 'nucleolus and nuclear periphery and cell periphery and vacuole and punctate'}

Brandon_dict = {'Cytoplasm': 'Cytoplasm',
'Nucleolus Irregular': 'Nucleolus Irregular',
'Cytoplasmic foci': 'Cytoplasmic foci',
'Nucleus': 'Nucleus',
'Vacuole': 'Vacuole',
'Bud Neck': 'Bud Neck',
'ER': 'ER',
'Nuclear Foci': 'Nuclear Foci',
'Cytoplasm irreg.': 'Cytoplasm irreg.',
'PM (Punctate)': 'Plasma Membrane',
'Nucleus P': 'Nucleus Periphery',
'Nucleolus': 'Nucleolus',
'Nucleus Irregular': 'Nucleus Irregular',
'Vacuole Foci': 'Vacuole Foci',
'ER Foci': 'ER Foci',
'Cytoplasmic Foci': 'Cytoplasmic Foci',
'Nuclear / Cyto Foci': 'Nuclear and Cyto Foci',
'Nuclear foci': 'Nuclear foci',
'Nuclear Periphery': 'Nuclear Periphery',
'ER foci': 'ER foci',
'Mitchondria': 'Mitchondria'}

Huh_dict = {
'cytoplasm,nucleus': 'cytoplasm,nucleus',
'endosome':'endosome',
'cytoplasm':'cytoplasm',
'ER':'ER',
'mitochondrion':'mitochondrion',
'nucleus':'nucleus',
'early Golgi,late Golgi':'Golgi',
'ambiguous,bud neck,cytoplasm,cell periphery,bud':'bud neck,cytoplasm,cell periphery,bud',
'spindle pole':'spindle pole',
'ambiguous,bud neck,cell periphery,punctate composite':'bud neck,cell periphery,punctate composite',
'late Golgi,bud': 'Golgi',
'punctate composite':'punctate composite',
'cytoplasm,mitochondrion':'cytoplasm,mitochondrion',
'vacuolar membrane':'vacuolar membrane',
'nuclear periphery':'nuclear periphery',
'ambiguous,spindle pole':'spindle pole',
'ambiguous':'ambiguous',
'vacuole':'vacuole',
'cytoplasm,nucleolus':'cytoplasm,nucleolus',
'cytoplasm,actin':'cytoplasm,actin',
'late Golgi':'Golgi',
'punctate composite,endosome':'punctate composite,endosome',
'nucleolus':'nucleolus',
'nucleolus,nucleus':'nucleolus,nucleus',
'cell periphery':'cell periphery',
'microtubule':'microtubule',
'bud neck,cell periphery':'bud neck,cell periphery',
'cell periphery,bud':'cell periphery,bud',
'bud neck,cytoplasm':'bud neck,cytoplasm',
'bud neck,cytoplasm,nucleus':'bud neck,cytoplasm,nucleus',
'cytoplasm,vacuole':'cytoplasm,vacuole',
'bud neck,cytoplasm,cell periphery,bud':'bud neck,cytoplasm,cell periphery,bud',
'early Golgi':'Golgi',
'ambiguous,bud neck,cytoplasm,bud':'bud neck,cytoplasm,bud',
'cell periphery,vacuole':'cell periphery,vacuole',
'Golgi':'Golgi',
'ER,vacuole':'ER,vacuole',
'lipid particle':'lipid particle',
'bud neck,cytoplasm,cell periphery':'bud neck,cytoplasm,cell periphery',
'Golgi,early Golgi':'Golgi',
'ambiguous,bud neck,cell periphery,bud':'bud neck,cell periphery,bud',
'nucleus,spindle pole':'nucleus,spindle pole',
'bud neck,cell periphery,bud':'bud neck,cell periphery,bud',
'ambiguous,cytoplasm,cell periphery,bud':'cytoplasm,cell periphery,bud',
'nucleus,microtubule':'nucleus,microtubule',
'punctate composite,early Golgi':'punctate composite,Golgi',
'actin':'actin',
'cytoplasm,nucleolus,nucleus':'cytoplasm,nucleolus,nucleus',
'bud neck':'bud neck',
'spindle pole,microtubule':'spindle pole,microtubule',
'cytoplasm,punctate composite':'cytoplasm,punctate composite',
'bud neck,cell periphery,punctate composite':'bud neck,cell periphery,punctate composite',
'peroxisome':'peroxisome',
'cytoplasm,mitochondrion,nucleus':'cytoplasm,mitochondrion,nucleus',
'ambiguous,nuclear periphery':'nuclear periphery',
'mitochondrion,nucleus':'mitochondrion,nucleus',
'punctate composite,early Golgi,late Golgi':'punctate composite, Golgi',
'ER to Golgi':'ER,Golgi',
'ambiguous,cytoplasm':'cytoplasm',
'ambiguous,endosome':'endosome',
'vacuole,vacuolar membrane':'vacuole,vacuolar membrane',
'ambiguous,cell periphery,bud':'cell periphery,bud',
'nuclear periphery,nucleus':'nuclear periphery,nucleus',
'cytoplasm,nucleus,spindle pole':'cytoplasm,nucleus,spindle pole',
'ambiguous,cytoplasm,nucleus':'cytoplasm,nucleus',
'ambiguous,mitochondrion':'mitochondrion',
'ER,cell periphery':'ER,cell periphery',
'ambiguous,bud neck,bud':'bud neck,bud',
'ambiguous,cytoplasm,bud':'cytoplasm,bud',
'cytoplasm,nuclear periphery,nucleus':'cytoplasm,nuclear periphery,nucleus',
'ambiguous,punctate composite':'punctate composite',
'ER,cytoplasm':'ER,cytoplasm',
'cytoplasm,late Golgi':'cytoplasm,Golgi',
'punctate composite,Golgi,early Golgi':'punctate composite,Golgi,Golgi',
'cytoplasm,endosome,lipid particle':'cytoplasm,endosome',
'ambiguous,cytoplasm,punctate composite':'cytoplasm,punctate composite',
'punctate composite,vacuolar membrane,lipid particle':'punctate composite,vacuolar membrane',
'cytoplasm,nucleus,punctate composite':'cytoplasm,nucleus,punctate composite',
'cytoplasm,early Golgi':'cytoplasm,Golgi',
'ambiguous,Golgi':'Golgi',
'ambiguous,Golgi,early Golgi':'Golgi,Golgi',
'cytoplasm,nuclear periphery':'cytoplasm,nuclear periphery',
'bud neck,cytoplasm,cell periphery,punctate composite':'bud neck,cytoplasm,cell periphery,punctate composite,bud',
'cytoplasm,cell periphery':'cytoplasm,cell periphery',
'bud neck,cytoplasm,mitochondrion,cell periphery':'bud neck,cytoplasm,mitochondrion,cell periphery',
'mitochondrion,punctate composite':'mitochondrion,punctate composite',
'ambiguous,vacuolar membrane':'vacuolar membrane',
'nucleus,spindle pole,microtubule':'nucleus,spindle pole,microtubule',
'ER,cell periphery,vacuole,bud':'ER,cell periphery,vacuole,bud',
'punctate composite,late Golgi':'punctate composite,Golgi',
'cytoplasm,bud':'cytoplasm,bud',
'ambiguous,late Golgi':'Golgi',
'vacuole,endosome':'vacuole,endosome',
'ambiguous,nucleus':'nucleus',
'punctate composite,Golgi':'punctate composite,Golgi',
'ambiguous,bud neck,cytoplasm,vacuole,bud':'bud neck,cytoplasm,vacuole,bud',
'cytoplasm,spindle pole':'cytoplasm,spindle pole',
'ambiguous,bud neck,cell periphery,vacuole,bud':'bud neck,cell periphery,vacuole,bud',
'cytoplasm,Golgi,early Golgi':'cytoplasm,Golgi,Golgi',
'vacuolar membrane,endosome':'vacuolar membrane,endosome',
'ER,ambiguous,bud':'ER,bud',
'cytoplasm,punctate composite,endosome':'cytoplasm,punctate composite,endosome',
'ER,cytoplasm,nucleus':'ER,cytoplasm,nucleus',
'punctate composite,endosome,early Golgi,late Golgi':'punctate composite,endosome,Golgi,Golgi',
'ER,cell periphery,bud':'ER,cell periphery,bud',
'cytoplasm,endosome':'cytoplasm,endosome',
'ER,nucleus':'ER,nucleus',
'cytoplasm,nucleus,bud':'cytoplasm,nucleus,bud',
'punctate composite,actin':'punctate composite,actin',
'cytoplasm,nuclear periphery,punctate composite':'cytoplasm,nuclear periphery,punctate composite',
'bud neck,cytoplasm,bud':'bud neck,cytoplasm,bud',
'nucleolus,microtubule':'nucleolus,microtubule',
'punctate composite,spindle pole':'punctate composite,spindle pole',
'cytoplasm,punctate composite,spindle pole,microtubule':'cytoplasm,punctate composite,spindle pole,microtubule',
'vacuole,Golgi':'vacuole,Golgi',
'bud neck,nucleus':'bud neck,nucleus',
'ER,ambiguous,cytoplasm':'ER,cytoplasm',
'endosome,lipid particle':'endosome',
'ambiguous,punctate composite,bud':'punctate composite,bud',
'ER,mitochondrion,nuclear periphery':'ER,mitochondrion,nuclear periphery',
'punctate composite,lipid particle':'punctate composite',
'ER,ambiguous':'ER',
'ambiguous,bud neck,cytoplasm,nucleus,bud':'bud neck,cytoplasm,nucleus,bud',
'bud neck,cell periphery,vacuole':'bud neck,cell periphery,vacuole',
'cytoplasm,cell periphery,bud':'cytoplasm,cell periphery,bud'}

In [None]:
#%%

class dataset_desc:
	def __init__(self, merge_on, information) -> None:
		self.merge_on = merge_on
		self.information = information
		self.func_match = f"{self}_f"

class microcope_info():
	def __init__(self) -> None:
		self.pixel_ratio_microns = 0.1081

# class GO_term:
# 	def __init__(self, go_list) -> None:
# 		self.listed = str.split(go_list, ", ")


def f_GO_map(micro_map): #! This was left out because decided to display based on a network rather than colored scatter
	go = pd.read_excel("C:\\Users\\pcnba\\Grant Brown's Lab Dropbox\\Peter Bartlett\\Peter Bartlett Data\\Code\\Data_copies\\Information_files\\Localization_merging\\GO_Proteins.xlsx")
	terms = go['TERM']

	# def add_GOs(prot:str, go_group_prot:str, go_group_name:str, go_group_list:list): #! This is not in use because deicided to use a network graph to visualize interaction enrichment rather than as scatter with variable grouped GO color
	#. This below code has not been micro_maped
	# 	if prot in go_group_list:
	# 		go_group_prot + "," + go_group_name
	# 	else:
	# 		pass
	# 	return(go_group_prot)

	# micro_map['GO_group_collected'] = ''
	# for r in terms:
	# 	go_matches = go.loc[r, 'ANNOTATED_GENES']
	# 	micro_map['GO_group_collected'] = micro_map.apply(lambda x: add_GOs(x['Protein'], x['GO_group_collected'], r, go_matches), axis = 1)
	return(go) #. , micro_map)

def sgd_map_f():
	# sgd = pd.read_csv("results_best.csv")
	sgd = pd.read_csv("results.tsv", sep = '\t')
	# sgd.rename(columns={'input':'Gene_Standard_Name'}, inplace=True)
	# sgd.rename(columns={'length':'Gene_Length'}, inplace=True)
	sgd.columns = sgd.columns.str.replace(" > ", "_")
	sgd.columns = sgd.columns.str.replace(" ", "_")
	# sgd = sgd.loc[:, ["Gene_Systematic_Name", "Gene_Standard_Name", "Gene_Name", "Gene_Length", "Gene_Phenotype_Summary", "Gene_Length"]]
	sgd['Gene_Standard_Name'] = sgd['Gene_Standard_Name'].fillna(sgd['Gene_Systematic_Name'])
	sgd = sgd.set_index("Gene_Systematic_Name")
	#* global info_sgd
	#* info_sgd = dataset_desc('Standard_Name','information')
	return(sgd)

def tkach_f():
	tkach = pd.read_excel("Tcak_protein_localization.xlsx", sheet_name='Calls')
	tkach.columns = tkach.columns.str.replace(" ", "_")
	tkach['Standard_Name'] = tkach['Standard_Name'].fillna(tkach['Systematic_ORF'])
	tkach = tkach.set_index("Systematic_ORF")

	tkach['EndLOC_Rescreen_MMS_Tcak'] = tkach['EndLOC_Rescreen_MMS_Tcak'].map(Tkach_dictionary).fillna(tkach['EndLOC_Rescreen_MMS_Tcak'])
	tkach['EndLOC_Rescreen_HU_Tcak'] = tkach['EndLOC_Rescreen_HU_Tcak'].map(Tkach_dictionary).fillna(tkach['EndLOC_Rescreen_HU_Tcak'])
	#* global info_tkach
	#* info_tkach = dataset_desc('Standard_Name','localization')
	return(tkach)

def microfluidics_map_f():
	microfluidics_map = pd.read_excel("MicrofluidicsMap_wCol_USE.xlsx", sheet_name="ProteinLocations")
	microfluidics_map.dropna(subset='Protein', inplace=True)
	microfluidics_map.columns = microfluidics_map.columns.str.replace(' ', '_')
	microfluidics_map['Protein'] = microfluidics_map['Protein'].str.upper()
	microfluidics_map = microfluidics_map.set_index('Protein')
	#* global info_microfluidcs
	#* info_microfluidcs = dataset_desc('Standard_Name/Mix','map')
	return(microfluidics_map)

def denervaud_ycd_f():
	denervaud_ycd = pd.read_excel("Den_data_bestgood.xlsx", sheet_name='Sheet1')
	# denervaud_ycd.rename(columns={'denervaud_ycd_Call':'Call'}, inplace=True)
	denervaud_ycd.drop(denervaud_ycd.columns[denervaud_ycd.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
	# denervaud_ycd = denervaud_ycd.add_suffix("_denervaud_ycd")
	# denervaud_ycd['Standard_Name_denervaud_ycd'] = denervaud_ycd['Standard_Name_denervaud_ycd'].str.upper()
	denervaud_ycd.fillna('unclassified', inplace = True)

	denervaud_ycd['initial_localization'] = denervaud_ycd['initial_localization'].map(Den_ycd_map_dict).fillna(denervaud_ycd['initial_localization'])
	denervaud_ycd['end_localization'] = denervaud_ycd['end_localization'].map(Den_ycd_map_dict).fillna(denervaud_ycd['end_localization'])
	denervaud_ycd.sort_values(by = "movieTag", inplace = True) #* Put in order so that the best movie is first before taking the first instance of an ORF label
	denervaud_ycd['geneName'] = denervaud_ycd['geneName'].replace({'-': np.nan})
	denervaud_ycd['geneName'] = denervaud_ycd['geneName'].fillna(denervaud_ycd['yORF'])
	denervaud_ycd = denervaud_ycd.groupby('yORF').aggregate(lambda x: x.iloc[0])
	denervaud_ycd = denervaud_ycd.drop(columns=['geneName', 'exp_cond', 'movieTag'])
	return(denervaud_ycd)

# def denervaud_f():
# 	denervaud = pd.read_excel("Denervaud Calls.xlsx", sheet_name='Sheet1')
# 	denervaud.columns = denervaud.columns.str.replace(' ', '_')
# 	denervaud.rename(columns={'Denervaud_Call':'Call'}, inplace=True)
# 	denervaud.drop(denervaud.columns[denervaud.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
# 	denervaud = denervaud.add_suffix("_Denervaud")
# 	denervaud['Standard_Name_Denervaud'] = denervaud['Standard_Name_Denervaud'].str.upper()

# 	denervaud['Call_Denervaud'] = denervaud['Call_Denervaud'].map(Denervaud_dictionary).fillna(denervaud['Call_Denervaud'])
# 	denervaud = denervaud.set_index('Standard_Name_Denervaud')
# 	#* global info_Denervaud
# 	#* info_Denervaud = dataset_desc('Standard_Name','Localization')
# 	return(denervaud)

def Ho_loc_pen_f():
	Ho_loc = pd.read_excel("Loc_Ho_SuppT5.xlsx", sheet_name="20210809_HUMMS_penetrance")
	Ho_loc.drop(Ho_loc.columns[Ho_loc.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
	Ho_loc.drop(Ho_loc.columns[Ho_loc.columns.str.contains('HU',case = False)],axis = 1, inplace = True)
	Ho_loc.columns = Ho_loc.columns.str.replace(" ", "_")
	Ho_loc = Ho_loc.add_suffix("_Ho")
	Ho_loc = Ho_loc.set_index('Gene_Ho')
	#* global info_Ho
	#* info_Ho = dataset_desc('Standard Name','LocPen')
	return(Ho_loc)

def Mazumder_f():
	mazumder = pd.read_excel("Mazumder_ver2.xlsx", sheet_name="Mod_dest")
	# mazumder.columns = mazumder.columns.str.replace(" ", "_")
	mazumder = mazumder.add_suffix("_Mazumder")
	mazumder['Localization_Mazumder'] = mazumder['Localization_Mazumder'].map(Mazumder_dictionary).fillna(mazumder['Localization_Mazumder'])
	mazumder['CommName_Mazumder'] = mazumder['CommName_Mazumder'].fillna(mazumder['ORF_Mazumder'])
	mazumder = mazumder.set_index('ORF_Mazumder')
	#* global info_Mazumder
	#* info_Mazumder = dataset_desc('ORF','Localization')
	return(mazumder)

def Huh_f():
	Huh = pd.read_csv('Huh2003.txt', sep = '\t').set_index('yORF')
	Huh['localization summary'] = Huh['localization summary'].map(Huh_dict).fillna(Huh['localization summary'])
	return(Huh)

In [None]:
#%%

class dataset_desc:
	def __init__(self, merge_on, information) -> None:
		self.merge_on = merge_on
		self.information = information
		self.func_match = f"{self}_f"

class microcope_info():
	def __init__(self) -> None:
		self.pixel_ratio_microns = 0.1081

# class GO_term:
# 	def __init__(self, go_list) -> None:
# 		self.listed = str.split(go_list, ", ")


def f_GO_map(micro_map): #! This was left out because decided to display based on a network rather than colored scatter
	go = pd.read_excel("C:\\Users\\pcnba\\Grant Brown's Lab Dropbox\\Peter Bartlett\\Peter Bartlett Data\\Code\\Data_copies\\Information_files\\Localization_merging\\GO_Proteins.xlsx")
	terms = go['TERM']

	# def add_GOs(prot:str, go_group_prot:str, go_group_name:str, go_group_list:list): #! This is not in use because deicided to use a network graph to visualize interaction enrichment rather than as scatter with variable grouped GO color
	#. This below code has not been micro_maped
	# 	if prot in go_group_list:
	# 		go_group_prot + "," + go_group_name
	# 	else:
	# 		pass
	# 	return(go_group_prot)

	# micro_map['GO_group_collected'] = ''
	# for r in terms:
	# 	go_matches = go.loc[r, 'ANNOTATED_GENES']
	# 	micro_map['GO_group_collected'] = micro_map.apply(lambda x: add_GOs(x['Protein'], x['GO_group_collected'], r, go_matches), axis = 1)
	return(go) #. , micro_map)

def sgd_map_f():
	# sgd = pd.read_csv("results_best.csv")
	sgd = pd.read_csv("results.tsv", sep = '\t')
	# sgd.rename(columns={'input':'Gene_Standard_Name'}, inplace=True)
	# sgd.rename(columns={'length':'Gene_Length'}, inplace=True)
	sgd.columns = sgd.columns.str.replace(" > ", "_")
	sgd.columns = sgd.columns.str.replace(" ", "_")
	# sgd = sgd.loc[:, ["Gene_Systematic_Name", "Gene_Standard_Name", "Gene_Name", "Gene_Length", "Gene_Phenotype_Summary", "Gene_Length"]]
	sgd['Gene_Standard_Name'] = sgd['Gene_Standard_Name'].fillna(sgd['Gene_Systematic_Name'])
	sgd = sgd.set_index("Gene_Systematic_Name")
	#* global info_sgd
	#* info_sgd = dataset_desc('Standard_Name','information')
	return(sgd)

def tkach_f():
	tkach = pd.read_excel("Tcak_protein_localization.xlsx", sheet_name='Calls')
	tkach.columns = tkach.columns.str.replace(" ", "_")
	tkach['Standard_Name'] = tkach['Standard_Name'].fillna(tkach['Systematic_ORF'])
	tkach = tkach.set_index("Systematic_ORF")

	tkach['EndLOC_Rescreen_MMS_Tcak'] = tkach['EndLOC_Rescreen_MMS_Tcak'].map(Tkach_dictionary).fillna(tkach['EndLOC_Rescreen_MMS_Tcak'])
	tkach['EndLOC_Rescreen_HU_Tcak'] = tkach['EndLOC_Rescreen_HU_Tcak'].map(Tkach_dictionary).fillna(tkach['EndLOC_Rescreen_HU_Tcak'])
	#* global info_tkach
	#* info_tkach = dataset_desc('Standard_Name','localization')
	return(tkach)

def microfluidics_map_f():
	microfluidics_map = pd.read_excel("MicrofluidicsMap_wCol_USE.xlsx", sheet_name="ProteinLocations")
	microfluidics_map.dropna(subset='Protein', inplace=True)
	microfluidics_map.columns = microfluidics_map.columns.str.replace(' ', '_')
	microfluidics_map['Protein'] = microfluidics_map['Protein'].str.upper()
	microfluidics_map = microfluidics_map.set_index('Protein')
	#* global info_microfluidcs
	#* info_microfluidcs = dataset_desc('Standard_Name/Mix','map')
	return(microfluidics_map)

def denervaud_ycd_f():
	denervaud_ycd = pd.read_excel("Den_data_bestgood.xlsx", sheet_name='Sheet1')
	# denervaud_ycd.rename(columns={'denervaud_ycd_Call':'Call'}, inplace=True)
	denervaud_ycd.drop(denervaud_ycd.columns[denervaud_ycd.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
	# denervaud_ycd = denervaud_ycd.add_suffix("_denervaud_ycd")
	# denervaud_ycd['Standard_Name_denervaud_ycd'] = denervaud_ycd['Standard_Name_denervaud_ycd'].str.upper()
	denervaud_ycd.fillna('unclassified', inplace = True)

	denervaud_ycd['initial_localization'] = denervaud_ycd['initial_localization'].map(Den_ycd_map_dict).fillna(denervaud_ycd['initial_localization'])
	denervaud_ycd['end_localization'] = denervaud_ycd['end_localization'].map(Den_ycd_map_dict).fillna(denervaud_ycd['end_localization'])
	denervaud_ycd.sort_values(by = "movieTag", inplace = True) #* Put in order so that the best movie is first before taking the first instance of an ORF label
	denervaud_ycd['geneName'] = denervaud_ycd['geneName'].replace({'-': np.nan})
	denervaud_ycd['geneName'] = denervaud_ycd['geneName'].fillna(denervaud_ycd['yORF'])
	denervaud_ycd = denervaud_ycd.groupby('yORF').aggregate(lambda x: x.iloc[0])
	denervaud_ycd = denervaud_ycd.drop(columns=['geneName', 'exp_cond', 'movieTag'])
	return(denervaud_ycd)

# def denervaud_f():
# 	denervaud = pd.read_excel("Denervaud Calls.xlsx", sheet_name='Sheet1')
# 	denervaud.columns = denervaud.columns.str.replace(' ', '_')
# 	denervaud.rename(columns={'Denervaud_Call':'Call'}, inplace=True)
# 	denervaud.drop(denervaud.columns[denervaud.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
# 	denervaud = denervaud.add_suffix("_Denervaud")
# 	denervaud['Standard_Name_Denervaud'] = denervaud['Standard_Name_Denervaud'].str.upper()

# 	denervaud['Call_Denervaud'] = denervaud['Call_Denervaud'].map(Denervaud_dictionary).fillna(denervaud['Call_Denervaud'])
# 	denervaud = denervaud.set_index('Standard_Name_Denervaud')
# 	#* global info_Denervaud
# 	#* info_Denervaud = dataset_desc('Standard_Name','Localization')
# 	return(denervaud)

def Ho_loc_pen_f():
	Ho_loc = pd.read_excel("Loc_Ho_SuppT5.xlsx", sheet_name="20210809_HUMMS_penetrance")
	Ho_loc.drop(Ho_loc.columns[Ho_loc.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
	Ho_loc.drop(Ho_loc.columns[Ho_loc.columns.str.contains('HU',case = False)],axis = 1, inplace = True)
	Ho_loc.columns = Ho_loc.columns.str.replace(" ", "_")
	Ho_loc = Ho_loc.add_suffix("_Ho")
	Ho_loc = Ho_loc.set_index('Gene_Ho')
	#* global info_Ho
	#* info_Ho = dataset_desc('Standard Name','LocPen')
	return(Ho_loc)

def Mazumder_f():
	mazumder = pd.read_excel("Mazumder_ver2.xlsx", sheet_name="Mod_dest")
	# mazumder.columns = mazumder.columns.str.replace(" ", "_")
	mazumder = mazumder.add_suffix("_Mazumder")
	mazumder['Localization_Mazumder'] = mazumder['Localization_Mazumder'].map(Mazumder_dictionary).fillna(mazumder['Localization_Mazumder'])
	mazumder['CommName_Mazumder'] = mazumder['CommName_Mazumder'].fillna(mazumder['ORF_Mazumder'])
	mazumder = mazumder.set_index('ORF_Mazumder')
	#* global info_Mazumder
	#* info_Mazumder = dataset_desc('ORF','Localization')
	return(mazumder)

def Huh_f():
	Huh = pd.read_csv('Huh2003.txt', sep = '\t').set_index('yORF')
	Huh['localization summary'] = Huh['localization summary'].map(Huh_dict).fillna(Huh['localization summary'])
	return(Huh)

In [None]:
#%%

if __name__ == "__main__":
	# Global_Variables = glv.global_manager()
	Global_Variables = {
	# 	"analyze": "F:/Microfluidics/Missing_Analyze2",
		"microfluidics_results": "F:/Microfluidics/RES_N_ULTS",
		"information_path": "C:/Users/pcnba/Grant Brown's Lab Dropbox/Peter Bartlett/Peter Bartlett Data/Code/Data_copies/Information_files/Localization_merging",
		"post_path": "D:/ALL_FINAL"} # * ,
	# 	"subset": False,
	# 	'subset_by': 'range',
	# 	'subset_collection': '',
	# 	"cpu_se": 16,
	# 	"timepoint_gap": 7.5,
	# 	"percentiles": [95, 99],
	# 	"multiplex": True}
	os.chdir(Global_Variables['information_path'])
	# files = input("What are the files to be micro_map based on protein? [Comma deliminate]").split(', ')
	# for f in files: #TODO: Add multi-read_functionality
		# pd.read_csv(f).set_index()

	# micro_map = microfluidics_map_f()
	# # .drop(columns=['Predicted_localization_Change', 'Notes', 'Current_Stage', 'Location', 'Fullmicro_map'])
	# map_drop_columns = ['Predicted_localization_Change', 'Notes', 'Current_Stage', 'Location', 'Fullmicro_map']
	# micro_map.drop(micro_map.columns[micro_map.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
	# micro_map.drop(columns=[col for col in micro_map if col in map_drop_columns], inplace=True)

	sgd = sgd_map_f()
	# micro_map = pd.merge(sgd, micro_map, left_index=True, right_index=True, how = 'left')
	#. Decided that will just use the list of proteins from SGD to merge with the other datasets

	#< tkach = tkach_f()
	#< # denervaud = denervaud_f()
	#< mazumder = Mazumder_f()
	#< Den_ycd_map_df = denervaud_ycd_f()
	#< Brandons_map = pd.read_excel("Brandons_Paper.xlsx", sheet_name="Sheet1").set_index('ORF')
	#< Brandons_map = Brandons_map.drop(columns=['Protein']).rename(columns={'Subcellular Compartment Re-localization': 'Dest_Call'}).add_suffix('_Brandons')
	#< Huh = Huh_f()

	tkach = pd.read_excel("C:/Users/pcnba/Grant Brown's Lab Dropbox/Peter Bartlett/Peter Bartlett Data/Code/Data_copies/Information_files/Localization_merging/Tkach_refined.xls", sheet_name='Localization scoring')
	tkach = tkach.rename(columns={'MMS localization change class': 'MMS_localization_class', 'HU Localication change class': 'HU_localization_class'})
	micro_map = sgd.merge(tkach, left_on = "Gene_Standard_Name", right_on = "Standard Name", how = 'left')
	micro_map['MMS_HU_merged_class'] = micro_map['MMS_localization_class'].fillna(micro_map['HU_localization_class'])



	#< loqate = pd.read_excel('proteomesummarylamicro_mapversion.xlsx', sheet_name='Sheet1', usecols=['ORF', 'control Localization']).set_index('ORF').replace('below threshold', np.nan)

	#< micro_map = sgd.merge(Den_ycd_map_df, left_index= True, right_index= True, how = 'left')
	#< micro_map = micro_map.merge(tkach, left_index=True, right_index = True, how= "left")

	#< #Either should work below
	#< micro_map = micro_map.merge(mazumder, left_index = True, right_index = True, how = "left")
	#< # micro_map = micro_map.merge(mazumder, left_on = 'Gene_Standard_Name', right_on = 'CommName_Mazumder', how = "left")

	#< micro_map = pd.merge(micro_map, Brandons_map, right_index=True, left_index=True, how= 'left')
	#< #Artifact of removed micorfluidics map
	#< # micro_map = micro_map.sort_values(by = ['Date', 'Run_Number', 'MapID_(Col_Range)'])

	#< micro_map = micro_map.merge(Huh, left_index=True, right_index=True, how='left')
	#< micro_map = micro_map.merge(loqate, left_index=True, right_index=True, how='left')

In [None]:
#%%

if __name__ == "__main__":
	# Global_Variables = glv.global_manager()
	Global_Variables = {
	# 	"analyze": "F:/Microfluidics/Missing_Analyze2",
		"microfluidics_results": "F:/Microfluidics/RES_N_ULTS",
		"information_path": "C:/Users/pcnba/Grant Brown's Lab Dropbox/Peter Bartlett/Peter Bartlett Data/Code/Data_copies/Information_files/Localization_merging",
		"post_path": "D:/ALL_FINAL"} # * ,
	# 	"subset": False,
	# 	'subset_by': 'range',
	# 	'subset_collection': '',
	# 	"cpu_se": 16,
	# 	"timepoint_gap": 7.5,
	# 	"percentiles": [95, 99],
	# 	"multiplex": True}
	os.chdir(Global_Variables['information_path'])
	# files = input("What are the files to be micro_map based on protein? [Comma deliminate]").split(', ')
	# for f in files: #TODO: Add multi-read_functionality
		# pd.read_csv(f).set_index()

	# micro_map = microfluidics_map_f()
	# # .drop(columns=['Predicted_localization_Change', 'Notes', 'Current_Stage', 'Location', 'Fullmicro_map'])
	# map_drop_columns = ['Predicted_localization_Change', 'Notes', 'Current_Stage', 'Location', 'Fullmicro_map']
	# micro_map.drop(micro_map.columns[micro_map.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
	# micro_map.drop(columns=[col for col in micro_map if col in map_drop_columns], inplace=True)

	sgd = sgd_map_f()
	# micro_map = pd.merge(sgd, micro_map, left_index=True, right_index=True, how = 'left')
	#. Decided that will just use the list of proteins from SGD to merge with the other datasets

	tkach = tkach_f()
	# denervaud = denervaud_f()
	mazumder = Mazumder_f()
	Den_ycd_map_df = denervaud_ycd_f()
	Brandons_map = pd.read_excel("Brandons_Paper.xlsx", sheet_name="Sheet1").set_index('ORF')
	Brandons_map = Brandons_map.drop(columns=['Protein']).rename(columns={'Subcellular Compartment Re-localization': 'Dest_Call'}).add_suffix('_Brandons')
	Huh = Huh_f()

	tkach = pd.read_excel("C:/Users/pcnba/Grant Brown's Lab Dropbox/Peter Bartlett/Peter Bartlett Data/Code/Data_copies/Information_files/Localization_merging/Tkach_refined.xls", sheet_name='Localization scoring')
	tkach = tkach.rename(columns={'MMS localization change class': 'MMS_localization_class', 'HU Localication change class': 'HU_localization_class'})
	micro_map = sgd.merge(tkach, left_on = "Gene_Standard_Name", right_on = "Standard Name", how = 'left')
	micro_map['MMS_HU_merged_class'] = micro_map['MMS_localization_class'].fillna(micro_map['HU_localization_class'])



	# loqate = pd.read_excel('proteomesummarylamicro_mapversion.xlsx', sheet_name='Sheet1', usecols=['ORF', 'control Localization']).set_index('ORF').replace('below threshold', np.nan)

	micro_map = sgd.merge(Den_ycd_map_df, left_index= True, right_index= True, how = 'left')
	micro_map = micro_map.merge(tkach, left_index=True, right_index = True, how= "left")

	#Either should work below
	micro_map = micro_map.merge(mazumder, left_index = True, right_index = True, how = "left")
	# micro_map = micro_map.merge(mazumder, left_on = 'Gene_Standard_Name', right_on = 'CommName_Mazumder', how = "left")

	micro_map = pd.merge(micro_map, Brandons_map, right_index=True, left_index=True, how= 'left')
	#Artifact of removed micorfluidics map
	# micro_map = micro_map.sort_values(by = ['Date', 'Run_Number', 'MapID_(Col_Range)'])

	micro_map = micro_map.merge(Huh, left_index=True, right_index=True, how='left')
	micro_map = micro_map.merge(loqate, left_index=True, right_index=True, how='left')

NameError: name 'loqate' is not defined

In [None]:
#%%

if __name__ == "__main__":
	# Global_Variables = glv.global_manager()
	Global_Variables = {
	# 	"analyze": "F:/Microfluidics/Missing_Analyze2",
		"microfluidics_results": "F:/Microfluidics/RES_N_ULTS",
		"information_path": "C:/Users/pcnba/Grant Brown's Lab Dropbox/Peter Bartlett/Peter Bartlett Data/Code/Data_copies/Information_files/Localization_merging",
		"post_path": "D:/ALL_FINAL"} # * ,
	# 	"subset": False,
	# 	'subset_by': 'range',
	# 	'subset_collection': '',
	# 	"cpu_se": 16,
	# 	"timepoint_gap": 7.5,
	# 	"percentiles": [95, 99],
	# 	"multiplex": True}
	os.chdir(Global_Variables['information_path'])
	# files = input("What are the files to be micro_map based on protein? [Comma deliminate]").split(', ')
	# for f in files: #TODO: Add multi-read_functionality
		# pd.read_csv(f).set_index()

	# micro_map = microfluidics_map_f()
	# # .drop(columns=['Predicted_localization_Change', 'Notes', 'Current_Stage', 'Location', 'Fullmicro_map'])
	# map_drop_columns = ['Predicted_localization_Change', 'Notes', 'Current_Stage', 'Location', 'Fullmicro_map']
	# micro_map.drop(micro_map.columns[micro_map.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
	# micro_map.drop(columns=[col for col in micro_map if col in map_drop_columns], inplace=True)

	sgd = sgd_map_f()
	# micro_map = pd.merge(sgd, micro_map, left_index=True, right_index=True, how = 'left')
	#. Decided that will just use the list of proteins from SGD to merge with the other datasets

	tkach = tkach_f()
	# denervaud = denervaud_f()
	mazumder = Mazumder_f()
	Den_ycd_map_df = denervaud_ycd_f()
	Brandons_map = pd.read_excel("Brandons_Paper.xlsx", sheet_name="Sheet1").set_index('ORF')
	Brandons_map = Brandons_map.drop(columns=['Protein']).rename(columns={'Subcellular Compartment Re-localization': 'Dest_Call'}).add_suffix('_Brandons')
	Huh = Huh_f()

	tkach = pd.read_excel("C:/Users/pcnba/Grant Brown's Lab Dropbox/Peter Bartlett/Peter Bartlett Data/Code/Data_copies/Information_files/Localization_merging/Tkach_refined.xls", sheet_name='Localization scoring')
	tkach = tkach.rename(columns={'MMS localization change class': 'MMS_localization_class', 'HU Localication change class': 'HU_localization_class'})
	micro_map = sgd.merge(tkach, left_on = "Gene_Standard_Name", right_on = "Standard Name", how = 'left')
	micro_map['MMS_HU_merged_class'] = micro_map['MMS_localization_class'].fillna(micro_map['HU_localization_class'])



	loqate = pd.read_excel('proteomesummarylamicro_mapversion.xlsx', sheet_name='Sheet1', usecols=['ORF', 'control Localization']).set_index('ORF').replace('below threshold', np.nan)

	micro_map = sgd.merge(Den_ycd_map_df, left_index= True, right_index= True, how = 'left')
	micro_map = micro_map.merge(tkach, left_index=True, right_index = True, how= "left")

	#Either should work below
	micro_map = micro_map.merge(mazumder, left_index = True, right_index = True, how = "left")
	# micro_map = micro_map.merge(mazumder, left_on = 'Gene_Standard_Name', right_on = 'CommName_Mazumder', how = "left")

	micro_map = pd.merge(micro_map, Brandons_map, right_index=True, left_index=True, how= 'left')
	#Artifact of removed micorfluidics map
	# micro_map = micro_map.sort_values(by = ['Date', 'Run_Number', 'MapID_(Col_Range)'])

	micro_map = micro_map.merge(Huh, left_index=True, right_index=True, how='left')
	micro_map = micro_map.merge(loqate, left_index=True, right_index=True, how='left')

FileNotFoundError: [Errno 2] No such file or directory: 'proteomesummarylamicro_mapversion.xlsx'

In [None]:
micro_map = microfluidics_map_f()

In [None]:
micro_map.drop(micro_map.columns[micro_map.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)

In [None]:
map_drop_columns = ['Predicted_localization_Change', 'Notes', 'Current_Stage', 'Location', 'Fullmicro_map']
micro_map.drop(micro_map.columns[micro_map.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
micro_map.drop(columns=[col for col in micro_map if col in map_drop_columns], inplace=True)

In [None]:
sgd = sgd_map_f()

In [None]:
tkach = tkach_f()

In [None]:
mazumder = Mazumder_f()

In [None]:
Den_ycd_map_df = denervaud_ycd_f()
Brandons_map = pd.read_excel("Brandons_Paper.xlsx", sheet_name="Sheet1").set_index('ORF')
Brandons_map = Brandons_map.drop(columns=['Protein']).rename(columns={'Subcellular Compartment Re-localization': 'Dest_Call'}).add_suffix('_Brandons')
Huh = Huh_f()

In [None]:
micro_map = sgd.merge(Den_ycd_map_df, left_index= True, right_index= True, how = 'left')
micro_map = micro_map.merge(tkach, left_index=True, right_index = True, how= "left")

In [None]:
micro_map = micro_map.merge(mazumder, left_on = 'Gene_Standard_Name', right_on = 'CommName_Mazumder', how = "left")

In [None]:
micro_map = pd.merge(micro_map, Brandons_map, right_index=True, left_index=True, how= 'left')
#< #Artifact of removed micorfluidics map
micro_map = micro_map.sort_values(by = ['Date', 'Run_Number', 'MapID_(Col_Range)'])

KeyError: 'Date'

In [None]:
micro_map

Unnamed: 0,Gene_Standard_Name,Gene_Name,intensity_change,localization_change,initial_localization,end_localization,Standard_Name,EndLOC_Rescreen_MMS_Tcak,EndLOC_Rescreen_HU_Tcak,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,Dest_Call_Brandons
0,CDC24,Cell Division Cycle,Constant,Yes,cytoplasm and nucleus,cytoplasm,CDC24,-> cytoplasm,-> cytoplasm,,,,,
1,RFA1,Replication Factor A,Up,Yes,nucleus,nucleus and nuclear periphery and punctate,,,,,,,,
2,YAR009C,,,,,,YAR009C,,-> cyto foci,,,,,
3,BUD14,BUD site selection,,,,,BUD14,-> nucleus,-> nucleus,,,,,
4,CDC15,Cell Division Cycle,Up,Yes,cytoplasm,mitochondrion,CDC15,-> cytoplasm,-> cytoplasm,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,SEC8,SECretory,Down,Yes,cytoplasm and bud,cytoplasm,SEC8,,,,,,,
316,OPY2,Overproduction-induced Pheromone-resistant Yeast,Constant,Yes,cytoplasm,cytoplasm and punctate,OPY2,-> vacuole,-> vacuole,,,,,
317,CTR1,Copper TRansport,,,,,CTR1,-> plasma membrane,,,,,,
318,SCD6,Suppressor of Clathrin Deficiency,Up,Yes,cytoplasm,cytoplasm and punctate,SCD6,,,,,,,


In [None]:
micro_map = micro_map.merge(Huh, left_index=True, right_index=True, how='left')

In [None]:
micro_map

Unnamed: 0,Gene_Standard_Name,Gene_Name,intensity_change,localization_change,initial_localization,end_localization,Standard_Name,EndLOC_Rescreen_MMS_Tcak,EndLOC_Rescreen_HU_Tcak,CommName_Mazumder,...,late Golgi,peroxisome,actin,nucleolus,cytoplasm,ER to Golgi,early Golgi,lipid particle,nucleus,bud
0,CDC24,Cell Division Cycle,Constant,Yes,cytoplasm and nucleus,cytoplasm,CDC24,-> cytoplasm,-> cytoplasm,,...,,,,,,,,,,
1,RFA1,Replication Factor A,Up,Yes,nucleus,nucleus and nuclear periphery and punctate,,,,,...,,,,,,,,,,
2,YAR009C,,,,,,YAR009C,,-> cyto foci,,...,,,,,,,,,,
3,BUD14,BUD site selection,,,,,BUD14,-> nucleus,-> nucleus,,...,,,,,,,,,,
4,CDC15,Cell Division Cycle,Up,Yes,cytoplasm,mitochondrion,CDC15,-> cytoplasm,-> cytoplasm,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,SEC8,SECretory,Down,Yes,cytoplasm and bud,cytoplasm,SEC8,,,,...,,,,,,,,,,
316,OPY2,Overproduction-induced Pheromone-resistant Yeast,Constant,Yes,cytoplasm,cytoplasm and punctate,OPY2,-> vacuole,-> vacuole,,...,,,,,,,,,,
317,CTR1,Copper TRansport,,,,,CTR1,-> plasma membrane,,,...,,,,,,,,,,
318,SCD6,Suppressor of Clathrin Deficiency,Up,Yes,cytoplasm,cytoplasm and punctate,SCD6,,,,...,,,,,,,,,,


In [None]:
micro_map = microfluidics_map_f()
# # .drop(columns=['Predicted_localization_Change', 'Notes', 'Current_Stage', 'Location', 'Fullmicro_map'])
map_drop_columns = ['Predicted_localization_Change', 'Notes', 'Current_Stage', 'Location', 'Fullmicro_map']
micro_map.drop(micro_map.columns[micro_map.columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
micro_map.drop(columns=[col for col in micro_map if col in map_drop_columns], inplace=True)

sgd = sgd_map_f()
# micro_map = pd.merge(sgd, micro_map, left_index=True, right_index=True, how = 'left')
#. Decided that will just use the list of proteins from SGD to merge with the other datasets

tkach = tkach_f()
# denervaud = denervaud_f()
mazumder = Mazumder_f()
Den_ycd_map_df = denervaud_ycd_f()
Brandons_map = pd.read_excel("Brandons_Paper.xlsx", sheet_name="Sheet1").set_index('ORF')
Brandons_map = Brandons_map.drop(columns=['Protein']).rename(columns={'Subcellular Compartment Re-localization': 'Dest_Call'}).add_suffix('_Brandons')
Huh = Huh_f()

tkach = pd.read_excel("C:/Users/pcnba/Grant Brown's Lab Dropbox/Peter Bartlett/Peter Bartlett Data/Code/Data_copies/Information_files/Localization_merging/Tkach_refined.xls", sheet_name='Localization scoring')
tkach = tkach.rename(columns={'MMS localization change class': 'MMS_localization_class', 'HU Localication change class': 'HU_localization_class'})
micro_map = sgd.merge(tkach, left_on = "Gene_Standard_Name", right_on = "Standard Name", how = 'left')
micro_map['MMS_HU_merged_class'] = micro_map['MMS_localization_class'].fillna(micro_map['HU_localization_class'])



#< loqate = pd.read_excel('proteomesummarylamicro_mapversion.xlsx', sheet_name='Sheet1', usecols=['ORF', 'control Localization']).set_index('ORF').replace('below threshold', np.nan)

micro_map = sgd.merge(Den_ycd_map_df, left_index= True, right_index= True, how = 'left')
micro_map = micro_map.merge(tkach, left_index=True, right_index = True, how= "left")

#< #Either should work below
#< micro_map = micro_map.merge(mazumder, left_index = True, right_index = True, how = "left")
micro_map = micro_map.merge(mazumder, left_on = 'Gene_Standard_Name', right_on = 'CommName_Mazumder', how = "left")

micro_map = pd.merge(micro_map, Brandons_map, right_index=True, left_index=True, how= 'left')
#< #Artifact of removed micorfluidics map
micro_map = micro_map.sort_values(by = ['Date', 'Run_Number', 'MapID_(Col_Range)'])

KeyError: 'Date'

In [None]:
micro_map

Unnamed: 0,Gene_Standard_Name,Gene_Name,intensity_change,localization_change,initial_localization,end_localization,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,Dest_Call_Brandons
0,CDC24,Cell Division Cycle,Constant,Yes,cytoplasm and nucleus,cytoplasm,,,,,,,,,
1,RFA1,Replication Factor A,Up,Yes,nucleus,nucleus and nuclear periphery and punctate,,,,,,,,,
2,YAR009C,,,,,,,,,,,,,,
3,BUD14,BUD site selection,,,,,,,,,,,,,
4,CDC15,Cell Division Cycle,Up,Yes,cytoplasm,mitochondrion,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,SEC8,SECretory,Down,Yes,cytoplasm and bud,cytoplasm,,,,,,,,,
316,OPY2,Overproduction-induced Pheromone-resistant Yeast,Constant,Yes,cytoplasm,cytoplasm and punctate,,,,,,,,,
317,CTR1,Copper TRansport,,,,,,,,,,,,,
318,SCD6,Suppressor of Clathrin Deficiency,Up,Yes,cytoplasm,cytoplasm and punctate,,,,,,,,,


In [None]:
tkach

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class
0,YKL010C,UFD4,nucleus,nucleus
1,YPL242C,IQG1,from budneck,from budneck
2,YOR127W,RGA1,from budneck,from budneck
3,YLR187W,SKG3,from budneck,from budneck
4,YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...,...
249,YNR061C,,,cyto
250,YMR056C,AAC1,,cyto
251,YHR133C,NSG1,,nuc periph
252,YLR220W,CCC1,,cyto; nucleus


In [None]:
den = denervaud_ycd_f()

In [None]:
tkach

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class
0,YKL010C,UFD4,nucleus,nucleus
1,YPL242C,IQG1,from budneck,from budneck
2,YOR127W,RGA1,from budneck,from budneck
3,YLR187W,SKG3,from budneck,from budneck
4,YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...,...
249,YNR061C,,,cyto
250,YMR056C,AAC1,,cyto
251,YHR133C,NSG1,,nuc periph
252,YLR220W,CCC1,,cyto; nucleus


In [None]:
tkach.filter(~(isna(MMS_localization_class)))

NameError: name 'isna' is not defined

In [None]:
tkach.dropna(subset="MMS_localization_class")

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class
0,YKL010C,UFD4,nucleus,nucleus
1,YPL242C,IQG1,from budneck,from budneck
2,YOR127W,RGA1,from budneck,from budneck
3,YLR187W,SKG3,from budneck,from budneck
4,YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...,...
184,YBR205W,KTR3,other,
185,YOR101W,RAS1,nucleus,
186,YDR519W,FPR2,other,
187,YML018C,,other,


In [None]:
tkach = tkach.dropna(subset="MMS_localization_class")

In [None]:
tkach

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class
0,YKL010C,UFD4,nucleus,nucleus
1,YPL242C,IQG1,from budneck,from budneck
2,YOR127W,RGA1,from budneck,from budneck
3,YLR187W,SKG3,from budneck,from budneck
4,YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...,...
184,YBR205W,KTR3,other,
185,YOR101W,RAS1,nucleus,
186,YDR519W,FPR2,other,
187,YML018C,,other,


In [None]:
maz =Mazumder_f()

In [None]:
max

<function max>

In [None]:
maz

Unnamed: 0_level_0,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
ORF_Mazumder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
YNL312W,RFA2,-> nucleus,nucleus,Subunit of heterotrimeric Replication Protein ...
YDR224C,HTB1,-> nucleus,nucleus,"Histone H2B, core histone protein required for..."
YPR074C,TKL1,-> cytoplasm and nucleus,nucleus,"Transketolase, similar to Tkl2p; catalyzes con..."
YJL173C,RFA3,-> nucleus,nucleus,Subunit of heterotrimeric Replication Protein ...
YDR432W,NPL3,-> nucleus,nucleus,"RNA-binding protein that promotes elongation, ..."
...,...,...,...,...
YLR003C,YLR003C,-> nucleus,cytoplasm,Subunit of U3-containing 90S preribosome proce...
YOR230W,WTM1,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...
YOR229W,WTM2,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...
YOR117W,RPT5,-> nucleus,cytoplasm,One of six ATPases of the 19S regulatory parti...


In [None]:
tkach

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class
0,YKL010C,UFD4,nucleus,nucleus
1,YPL242C,IQG1,from budneck,from budneck
2,YOR127W,RGA1,from budneck,from budneck
3,YLR187W,SKG3,from budneck,from budneck
4,YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...,...
184,YBR205W,KTR3,other,
185,YOR101W,RAS1,nucleus,
186,YDR519W,FPR2,other,
187,YML018C,,other,


In [None]:
mms_merged = pd.merge(tkach, maz, left_on = 'Systematic ORF', right_index = True)

In [None]:
mms_merged = pd.merge(tkach, maz, left_on = 'Systematic ORF', right_index = True, how = 'outer')

In [None]:
mms_merged

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
0.0,YKL010C,UFD4,nucleus,nucleus,,,,
1.0,YPL242C,IQG1,from budneck,from budneck,,,,
2.0,YOR127W,RGA1,from budneck,from budneck,,,,
3.0,YLR187W,SKG3,from budneck,from budneck,,,,
4.0,YNL242W,ATG2,cyto foci,cyto foci,,,,
...,...,...,...,...,...,...,...,...
,YNL141W,,,,AAH1,-> cytoplasm and nucleus,cytoplasm,"Adenine deaminase (adenine aminohydrolase), co..."
,YLR335W,,,,NUP2,-> nuclear periphery,cytoplasm,Nucleoporin involved in nucleocytoplasmic tran...
,YLR003C,,,,YLR003C,-> nucleus,cytoplasm,Subunit of U3-containing 90S preribosome proce...
,YOR229W,,,,WTM2,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...


In [None]:
maz

Unnamed: 0_level_0,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
ORF_Mazumder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
YNL312W,RFA2,-> nucleus,nucleus,Subunit of heterotrimeric Replication Protein ...
YDR224C,HTB1,-> nucleus,nucleus,"Histone H2B, core histone protein required for..."
YPR074C,TKL1,-> cytoplasm and nucleus,nucleus,"Transketolase, similar to Tkl2p; catalyzes con..."
YJL173C,RFA3,-> nucleus,nucleus,Subunit of heterotrimeric Replication Protein ...
YDR432W,NPL3,-> nucleus,nucleus,"RNA-binding protein that promotes elongation, ..."
...,...,...,...,...
YLR003C,YLR003C,-> nucleus,cytoplasm,Subunit of U3-containing 90S preribosome proce...
YOR230W,WTM1,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...
YOR229W,WTM2,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...
YOR117W,RPT5,-> nucleus,cytoplasm,One of six ATPases of the 19S regulatory parti...


In [None]:
maz.reset_index(inplace=True)

In [None]:
mms_merged = pd.merge(tkach, maz, left_on = 'Systematic ORF', right_on = 'ORF_Mazumder')

In [None]:
mms_merged

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class,ORF_Mazumder,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
0,YLR258W,GSY2,other,other,YLR258W,GSY2,-> cytoplasm and nucleus and punctate composite,cytoplasm,"Glycogen synthase, similar to Gsy1p; expressio..."
1,YIL101C,XBP1,nucleus,nucleus,YIL101C,XBP1,-> nucleus,nucleus,Transcriptional repressor that binds to promot...
2,YIL101C,XBP1,nucleus,nucleus,YIL101C,XBP1,-> nucleus,nucleus,Transcriptional repressor that binds to promot...
3,YER088C,DOT6,nucleus,nucleus,YER088C,DOT6,-> cytoplasm and nucleus,nucleus,Protein involved in rRNA and ribosome biogenes...
4,YGR211W,ZPR1,nucleus,nucleus,YGR211W,ZPR1,-> cytoplasm and nucleus,nucleus,"Essential protein with two zinc fingers, prese..."
5,YML085C,TUB1,nuclear foci,nuclear foci,YML085C,TUB1,"cytoplasm,punctate composite,spindle pole,micr...",nucleus,Alpha-tubulin; associates with beta-tubulin (T...
6,YGR180C,RNR4,cyto,cyto,YGR180C,RNR4,-> cytoplasm and nucleus,cytoplasm,"Ribonucleotide-diphosphate reductase (RNR), sm..."
7,YGR180C,RNR4,cyto,cyto,YGR180C,RNR4,-> cytoplasm and nucleus,cytoplasm,"Ribonucleotide-diphosphate reductase (RNR), sm..."
8,YNL250W,RAD50,nuclear foci,,YNL250W,RAD50,-> cytoplasm and nucleus,nucleus,"Subunit of MRX complex, with Mre11p and Xrs2p,..."
9,YHL030W,ECM29,cyto,,YHL030W,ECM29,-> cytoplasm and nucleus,nucleus,Scaffold protein that assists in association o...


In [None]:
mms_merged = pd.merge(tkach, maz, left_on = 'Systematic ORF', right_on = 'ORF_Mazumder', how = 'outer')

In [None]:
mms_merged

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class,ORF_Mazumder,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
0,YKL010C,UFD4,nucleus,nucleus,,,,,
1,YPL242C,IQG1,from budneck,from budneck,,,,,
2,YOR127W,RGA1,from budneck,from budneck,,,,,
3,YLR187W,SKG3,from budneck,from budneck,,,,,
4,YNL242W,ATG2,cyto foci,cyto foci,,,,,
...,...,...,...,...,...,...,...,...,...
369,,,,,YNL141W,AAH1,-> cytoplasm and nucleus,cytoplasm,"Adenine deaminase (adenine aminohydrolase), co..."
370,,,,,YLR335W,NUP2,-> nuclear periphery,cytoplasm,Nucleoporin involved in nucleocytoplasmic tran...
371,,,,,YLR003C,YLR003C,-> nucleus,cytoplasm,Subunit of U3-containing 90S preribosome proce...
372,,,,,YOR229W,WTM2,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...


In [None]:
mms_merged = pd.merge(tkach, maz, left_on = 'Systematic ORF', right_on = 'ORF_Mazumder', how = 'cross')

MergeError: Can not pass on, right_on, left_on or set right_index=True or left_index=True

In [None]:
mms_merged = pd.merge(tkach, maz, left_on = 'Systematic ORF', right_on = 'ORF_Mazumder', how = 'outer')

In [None]:
mms_merged

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class,ORF_Mazumder,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
0,YKL010C,UFD4,nucleus,nucleus,,,,,
1,YPL242C,IQG1,from budneck,from budneck,,,,,
2,YOR127W,RGA1,from budneck,from budneck,,,,,
3,YLR187W,SKG3,from budneck,from budneck,,,,,
4,YNL242W,ATG2,cyto foci,cyto foci,,,,,
...,...,...,...,...,...,...,...,...,...
369,,,,,YNL141W,AAH1,-> cytoplasm and nucleus,cytoplasm,"Adenine deaminase (adenine aminohydrolase), co..."
370,,,,,YLR335W,NUP2,-> nuclear periphery,cytoplasm,Nucleoporin involved in nucleocytoplasmic tran...
371,,,,,YLR003C,YLR003C,-> nucleus,cytoplasm,Subunit of U3-containing 90S preribosome proce...
372,,,,,YOR229W,WTM2,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...


In [None]:
tkach

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class
0,YKL010C,UFD4,nucleus,nucleus
1,YPL242C,IQG1,from budneck,from budneck
2,YOR127W,RGA1,from budneck,from budneck
3,YLR187W,SKG3,from budneck,from budneck
4,YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...,...
184,YBR205W,KTR3,other,
185,YOR101W,RAS1,nucleus,
186,YDR519W,FPR2,other,
187,YML018C,,other,


In [None]:
maz = Mazumder_f()

In [None]:
mms_merged = pd.merge(tkach, maz, left_index = True, right_index = True, how = 'outer')

In [None]:
mms_merged

Unnamed: 0,Systematic ORF,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
0,YKL010C,UFD4,nucleus,nucleus,,,,
1,YPL242C,IQG1,from budneck,from budneck,,,,
2,YOR127W,RGA1,from budneck,from budneck,,,,
3,YLR187W,SKG3,from budneck,from budneck,,,,
4,YNL242W,ATG2,cyto foci,cyto foci,,,,
...,...,...,...,...,...,...,...,...
YPR074C,,,,,TKL1,-> cytoplasm and nucleus,nucleus,"Transketolase, similar to Tkl2p; catalyzes con..."
YPR119W,,,,,CLB2,-> nucleus,nucleus,B-type cyclin involved in cell cycle progressi...
YPR180W,,,,,AOS1,-> nucleus,nucleus,Subunit of a heterodimeric nuclear SUMO activa...
YPR190C,,,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82


In [None]:
tkach.set_index('Systematic ORF', inplace = True)

In [None]:
tkach

Unnamed: 0_level_0,Standard Name,MMS_localization_class,HU_localization_class
Systematic ORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
YKL010C,UFD4,nucleus,nucleus
YPL242C,IQG1,from budneck,from budneck
YOR127W,RGA1,from budneck,from budneck
YLR187W,SKG3,from budneck,from budneck
YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...
YBR205W,KTR3,other,
YOR101W,RAS1,nucleus,
YDR519W,FPR2,other,
YML018C,,other,


In [None]:
maz


Unnamed: 0_level_0,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
ORF_Mazumder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
YNL312W,RFA2,-> nucleus,nucleus,Subunit of heterotrimeric Replication Protein ...
YDR224C,HTB1,-> nucleus,nucleus,"Histone H2B, core histone protein required for..."
YPR074C,TKL1,-> cytoplasm and nucleus,nucleus,"Transketolase, similar to Tkl2p; catalyzes con..."
YJL173C,RFA3,-> nucleus,nucleus,Subunit of heterotrimeric Replication Protein ...
YDR432W,NPL3,-> nucleus,nucleus,"RNA-binding protein that promotes elongation, ..."
...,...,...,...,...
YLR003C,YLR003C,-> nucleus,cytoplasm,Subunit of U3-containing 90S preribosome proce...
YOR230W,WTM1,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...
YOR229W,WTM2,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...
YOR117W,RPT5,-> nucleus,cytoplasm,One of six ATPases of the 19S regulatory parti...


In [None]:
mms_merged = pd.merge(tkach, maz, left_index = True, right_index = True, how = 'outer')

In [None]:
mms_merged

Unnamed: 0,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
YAL016W,,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...
YAL033W,,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...
YAL041W,CDC24,cyto,,,,,
YAL043C,,,,PTA1,-> nucleus,nucleus,"Subunit of holo-CPF, a multiprotein complex an..."
YBL023C,MCM2,nucleus,nucleus,,,,
...,...,...,...,...,...,...,...
YPR119W,,,,CLB2,-> nucleus,nucleus,B-type cyclin involved in cell cycle progressi...
YPR124W,CTR1,other,,,,,
YPR180W,,,,AOS1,-> nucleus,nucleus,Subunit of a heterodimeric nuclear SUMO activa...
YPR190C,,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82


In [None]:
den


Unnamed: 0_level_0,intensity_change,localization_change,initial_localization,end_localization
yORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
YAL007C,Up,Yes,cytoplasm and ER and punctate,cytoplasm and bud and ER and punctate
YAL041W,Constant,Yes,cytoplasm and nucleus,cytoplasm
YAL047C,Up,Yes,cytoplasm and punctate,punctate
YAL054C,Constant,Yes,cytoplasm,unclassified
YAL055W,Up,Yes,nothing,punctate
...,...,...,...,...
YPR149W,Up,Yes,cell periphery and punctate,cytoplasm and nucleus and cell periphery and p...
YPR174C,Up,Yes,nothing,nuclear periphery
YPR184W,Up,Yes,cytoplasm and punctate,cytoplasm
YPR187W,Up,Yes,nucleus,cytoplasm and nucleus


In [None]:
mms_merged = pd.merge(mms_merged, den, left_index = True, right_index = True, how = 'outer')

In [None]:
mms_merged

Unnamed: 0,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,intensity_change,localization_change,initial_localization,end_localization
YAL007C,,,,,,,,Up,Yes,cytoplasm and ER and punctate,cytoplasm and bud and ER and punctate
YAL016W,,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...,,,,
YAL033W,,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...,,,,
YAL041W,CDC24,cyto,,,,,,Constant,Yes,cytoplasm and nucleus,cytoplasm
YAL043C,,,,PTA1,-> nucleus,nucleus,"Subunit of holo-CPF, a multiprotein complex an...",,,,
...,...,...,...,...,...,...,...,...,...,...,...
YPR184W,,,,,,,,Up,Yes,cytoplasm and punctate,cytoplasm
YPR187W,,,,,,,,Up,Yes,nucleus,cytoplasm and nucleus
YPR188C,,,,,,,,Up,Yes,nothing,bud
YPR190C,,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82,,,,


In [None]:
mms_merged.to_csv('mms_merged.csv')

In [None]:
mms_merged.to_parquet('mms_merged.parquet')

In [None]:
copy_merged = mms_merged.reset_index(drop = False)

In [None]:
copy_merged

Unnamed: 0,index,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,intensity_change,localization_change,initial_localization,end_localization
0,YAL007C,,,,,,,,Up,Yes,cytoplasm and ER and punctate,cytoplasm and bud and ER and punctate
1,YAL016W,,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...,,,,
2,YAL033W,,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...,,,,
3,YAL041W,CDC24,cyto,,,,,,Constant,Yes,cytoplasm and nucleus,cytoplasm
4,YAL043C,,,,PTA1,-> nucleus,nucleus,"Subunit of holo-CPF, a multiprotein complex an...",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
772,YPR184W,,,,,,,,Up,Yes,cytoplasm and punctate,cytoplasm
773,YPR187W,,,,,,,,Up,Yes,nucleus,cytoplasm and nucleus
774,YPR188C,,,,,,,,Up,Yes,nothing,bud
775,YPR190C,,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82,,,,


In [None]:
protein_list = [
	'AAC1', 'ACE2', 'AFT1', 'AGR2', 'AIM20', 'AIM4', 'AIP1', 'AME1', 'APC1', 'APC4', 'APC5', 'APJ1', 'ARO4', 'ARP4', 'ARP6', 'ASE1', 'ATC1', 'ATG16', 'ATG18', 'ATG29', 'ATG3', 'AVL9', 'BCH1', 'BIR1', 'BMH1', 'BMH2', 'BTN2', 'BUD4', 'CAF8', 'CAP1', 'CAP2', 'CAT8', 'CBK1', 'CBP2', 'CCC1', 'CDC14', 'CDC15', 'CDC20', 'CDC24', 'CDC27', 'CDC40', 'CDC48', 'CDC6', 'CGR1', 'CHK1', 'CHS7', 'CLB3', 'CMS1', 'CRM11', 'CSM1', 'CTR1', 'CTR86', 'CYK3', 'DBF4', 'DCP1', 'DCP2', 'DDC1', 'DDC2', 'DHH1', 'DIP5', 'DMA2', 'DOA1', 'DOT6', 'DPB11', 'DSE1', 'DSE3', 'DSF2', 'DUS3', 'ECM29', 'ECM3', 'ECO1', 'EDC1', 'EDC2', 'EDC3', 'ENT1', 'ESP2', 'EXO1', 'EXO70', 'FAA1', 'FAA4', 'FAR1', 'FEN2', 'FGV2', 'FIG4', 'FIN1', 'FLR1', 'FPR2', 'FUI1', 'GCD8', 'GIC1', 'GLC3', 'GLN1', 'GSY1', 'GSY2', 'GTB1', 'GYL1', 'GYP5', 'HAA1', 'HAC1', 'HFA1', 'HGH1', 'HHF1', 'HMG1', 'HMG2', 'HNT3', 'HOS2', 'HSL7', 'HSP104', 'HSP26', 'HSP42', 'HTA2', 'INO80', 'IPL1', 'IQG1', 'ITR1', 'IWR1', 'IZH4', 'KTR1', 'KTR3', 'LAG1', 'LAP4', 'LCD1', 'LIF1', 'LOC1', 'LRS4', 'LSM1', 'LSM12', 'LSM2', 'LSM3d0217', 'LSM4', 'LSM7', 'LST8', 'MCM2', 'MGS1', 'MKT1', 'MMS21', 'MOB1', 'MRC1', 'MRE11', 'MRS8', 'MRT4', 'MSB1', 'MSB3', 'MSD1', 'MSH3', 'MSN1', 'MSN2d0222r2', 'MTC5', 'MTR10', 'NAM7', 'NEJ1', 'NMD4', 'NOB1', 'NOP13', 'NOP56', 'NOP58', 'NPL4', 'NSG1', 'NSP1', 'OPY2', 'PAT1', 'PBP1', 'PBP2', 'PBP4', 'PDR3', 'PEX21', 'PEX29', 'PHO81', 'PIL1', 'PKP2', 'PNC1', 'POL30', 'PPH21', 'PPH22d0214', 'PPH3', 'PPN1', 'PRE3', 'PRS5', 'PSO2', 'PSY1', 'PXL1', 'QCR6', 'RAD24', 'RAD5', 'RAD50', 'RAD51', 'RAD52d0220r1', 'RAD53', 'RAD54', 'RAD55', 'RAD57', 'RAD9', 'RAS1', 'RBD2', 'RDH54', 'REV1', 'RFA1d0213', 'RFA2', 'RFC2', 'RFC3', 'RFC4', 'RGA1', 'RIM1', 'RME1', 'RMI1', 'RMT2', 'RNR1d0216r1', 'RNR4', 'RPC10', 'RPL15B', 'RPL40A', 'RPN4', 'RPS18A', 'RQC2', 'RRB1', 'RRD1', 'RRP17d0210', 'RRP5', 'RSF2', 'RTR2d0215r2', 'RTS1', 'RTT107', 'RVB1', 'SAC6', 'SAE2', 'SCD6', 'SCH9', 'SEC11', 'SEC3', 'SFH5', 'SGS1', 'SGT2', 'SIP5', 'SIZ2', 'SKG3', 'SLD2', 'SLD3', 'SLX4', 'SLX8', 'SNT2', 'SPT21', 'SQS1', 'SRP68', 'SRS2', 'STB2', 'STB4', 'SUB2', 'SUT1', 'SVL3', 'TDR3', 'TIS11', 'TOF2', 'TOP3', 'TOS4', 'TRM112', 'TSA1', 'TSC13', 'TSR1', 'TSR3', 'TUB1', 'UBC9', 'UFD4', 'ULP1', 'ULP2', 'ULS1', 'VPH1', 'VPS1', 'XBP1', 'XRS2d0215', 'YAP1', 'YAR009C', 'YBR197C', 'YBR259W', 'YDL111C', 'YDL129W', 'YDL156W', 'YDR089W', 'YDR115W', 'YDR132C', 'YDR170W-A', 'YDR348C', 'YER064C', 'YGR042W', 'YGR122W', 'YGR151C', 'YHR182W', 'YIL108W', 'YJR056C', 'YKL060W', 'YKU70', 'YKU80', 'YLR108C', 'YLR126C', 'YLR297W', 'YLR363W-A', 'YML011C', 'YML108W', 'YMR031C', 'YMR061C', 'YMR160W', 'YMR291W', 'YOF1', 'YOR342C', 'YOX1', 'YPR174C', 'YTA8', 'ZIP2', 'ZPR1'
]

In [None]:
copy_merged.loc[copy_merged['index'].isin(protein_list)]

Unnamed: 0,index,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,intensity_change,localization_change,initial_localization,end_localization
53,YBR197C,,nucleus,,,,,,,,,
100,YDL129W,,nucleus,nucleus,,,,,,,,
105,YDL156W,,nuclear foci,,,,,,,,,
130,YDR089W,,cyto,cyto,,,,,,,,
136,YDR115W,,other,other,,,,,,,,
142,YDR132C,,nucleus,,,,,,Constant,Yes,cytoplasm,nuclear periphery and ER
149,YDR170W-A,,cyto,,,,,,Up,Yes,cytoplasm and punctate,cytoplasm and punctate
176,YDR348C,,cyto,,,,,,,,,
229,YER064C,,nucleus,cyto,,,,,,,,
301,YGR042W,,nuclear foci,,,,,,,,,


In [None]:
copy_merged

Unnamed: 0,index,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,intensity_change,localization_change,initial_localization,end_localization
0,YAL007C,,,,,,,,Up,Yes,cytoplasm and ER and punctate,cytoplasm and bud and ER and punctate
1,YAL016W,,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...,,,,
2,YAL033W,,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...,,,,
3,YAL041W,CDC24,cyto,,,,,,Constant,Yes,cytoplasm and nucleus,cytoplasm
4,YAL043C,,,,PTA1,-> nucleus,nucleus,"Subunit of holo-CPF, a multiprotein complex an...",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
772,YPR184W,,,,,,,,Up,Yes,cytoplasm and punctate,cytoplasm
773,YPR187W,,,,,,,,Up,Yes,nucleus,cytoplasm and nucleus
774,YPR188C,,,,,,,,Up,Yes,nothing,bud
775,YPR190C,,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82,,,,


In [None]:
tkach

Unnamed: 0_level_0,Standard Name,MMS_localization_class,HU_localization_class
Systematic ORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
YKL010C,UFD4,nucleus,nucleus
YPL242C,IQG1,from budneck,from budneck
YOR127W,RGA1,from budneck,from budneck
YLR187W,SKG3,from budneck,from budneck
YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...
YBR205W,KTR3,other,
YOR101W,RAS1,nucleus,
YDR519W,FPR2,other,
YML018C,,other,


In [None]:
den

Unnamed: 0_level_0,intensity_change,localization_change,initial_localization,end_localization
yORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
YAL007C,Up,Yes,cytoplasm and ER and punctate,cytoplasm and bud and ER and punctate
YAL041W,Constant,Yes,cytoplasm and nucleus,cytoplasm
YAL047C,Up,Yes,cytoplasm and punctate,punctate
YAL054C,Constant,Yes,cytoplasm,unclassified
YAL055W,Up,Yes,nothing,punctate
...,...,...,...,...
YPR149W,Up,Yes,cell periphery and punctate,cytoplasm and nucleus and cell periphery and p...
YPR174C,Up,Yes,nothing,nuclear periphery
YPR184W,Up,Yes,cytoplasm and punctate,cytoplasm
YPR187W,Up,Yes,nucleus,cytoplasm and nucleus


In [None]:
maz

Unnamed: 0_level_0,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
ORF_Mazumder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
YNL312W,RFA2,-> nucleus,nucleus,Subunit of heterotrimeric Replication Protein ...
YDR224C,HTB1,-> nucleus,nucleus,"Histone H2B, core histone protein required for..."
YPR074C,TKL1,-> cytoplasm and nucleus,nucleus,"Transketolase, similar to Tkl2p; catalyzes con..."
YJL173C,RFA3,-> nucleus,nucleus,Subunit of heterotrimeric Replication Protein ...
YDR432W,NPL3,-> nucleus,nucleus,"RNA-binding protein that promotes elongation, ..."
...,...,...,...,...
YLR003C,YLR003C,-> nucleus,cytoplasm,Subunit of U3-containing 90S preribosome proce...
YOR230W,WTM1,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...
YOR229W,WTM2,-> nucleus,cytoplasm,Transcriptional modulator involved in regulati...
YOR117W,RPT5,-> nucleus,cytoplasm,One of six ATPases of the 19S regulatory parti...


In [None]:
denervaud_ycd = pd.read_excel("Den_data_bestgood.xlsx", sheet_name='Sheet1')

In [None]:
denervaud_ycd

Unnamed: 0,geneName,yORF,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization
0,GDB1,YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm
1,DNA2,YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate"
2,-,YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate
3,CRM1,YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate"
4,CAF120,YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified"
...,...,...,...,...,...,...,...,...
695,ISW1,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus
696,ISW1,YBR245C,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",nucleus
697,RNR1,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm
698,RNR1,YER070W,MMS_main,Good,Up,Yes,"cytoplasm,unclassified",cytoplasm


In [None]:
denervaud_ycd.groupby(geneName)

NameError: name 'geneName' is not defined

In [None]:
denervaud_ycd.groupby('geneName').transform([1])

TypeError: unhashable type: 'list'

In [None]:
denervaud_ycd.groupby('geneName').transform(.loc[1])

SyntaxError: invalid syntax (<ipython-input-73-54173946c644>, line 1)

In [None]:
denervaud_ycd.groupby('geneName').transform(lambda x: x.loc[0])

KeyError: 0

In [None]:
denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0])

Unnamed: 0,yORF,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization
0,YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm
1,YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate"
2,YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate
3,YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate"
4,YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified"
...,...,...,...,...,...,...,...
695,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus
696,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus
697,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm
698,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm


In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0])

In [None]:
denervaud_ycd

Unnamed: 0,geneName,yORF,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization
0,GDB1,YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm
1,DNA2,YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate"
2,-,YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate
3,CRM1,YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate"
4,CAF120,YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified"
...,...,...,...,...,...,...,...,...
695,ISW1,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus
696,ISW1,YBR245C,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",nucleus
697,RNR1,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm
698,RNR1,YER070W,MMS_main,Good,Up,Yes,"cytoplasm,unclassified",cytoplasm


In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0]).unstack()

In [None]:
den

yORF              0        YPR184W
                  1        YHR164C
                  2        YDR128W
                  3        YGR218W
                  4        YNL278W
                           ...    
end_localization  695      nucleus
                  696      nucleus
                  697    cytoplasm
                  698    cytoplasm
                  699    cytoplasm
Length: 4900, dtype: object

In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0])

In [None]:
den

Unnamed: 0,yORF,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization
0,YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm
1,YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate"
2,YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate
3,YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate"
4,YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified"
...,...,...,...,...,...,...,...
695,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus
696,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus
697,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm
698,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm


In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0]).reset_index()

In [None]:
den

Unnamed: 0,index,yORF,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization
0,0,YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm
1,1,YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate"
2,2,YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate
3,3,YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate"
4,4,YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified"
...,...,...,...,...,...,...,...,...
695,695,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus
696,696,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus
697,697,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm
698,698,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm


In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0]).ungroup()

AttributeError: 'DataFrame' object has no attribute 'ungroup'

In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0]).unstack(level = 1)

In [None]:
den

yORF              0        YPR184W
                  1        YHR164C
                  2        YDR128W
                  3        YGR218W
                  4        YNL278W
                           ...    
end_localization  695      nucleus
                  696      nucleus
                  697    cytoplasm
                  698    cytoplasm
                  699    cytoplasm
Length: 4900, dtype: object

In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0]).unstack(level = -1)

In [None]:
den

yORF              0        YPR184W
                  1        YHR164C
                  2        YDR128W
                  3        YGR218W
                  4        YNL278W
                           ...    
end_localization  695      nucleus
                  696      nucleus
                  697    cytoplasm
                  698    cytoplasm
                  699    cytoplasm
Length: 4900, dtype: object

In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0]).unstack(level = 0)

In [None]:
den

yORF              0        YPR184W
                  1        YHR164C
                  2        YDR128W
                  3        YGR218W
                  4        YNL278W
                           ...    
end_localization  695      nucleus
                  696      nucleus
                  697    cytoplasm
                  698    cytoplasm
                  699    cytoplasm
Length: 4900, dtype: object

In [None]:
denervaud_ycd['Name_copy'] = denervaud_ycd['geneName']

In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0]).unstack(level = 0)

In [None]:
den

yORF       0      YPR184W
           1      YHR164C
           2      YDR128W
           3      YGR218W
           4      YNL278W
                   ...   
Name_copy  695       ISW1
           696       ISW1
           697       RNR1
           698       RNR1
           699       RNR1
Length: 5600, dtype: object

In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0])

In [None]:
den

Unnamed: 0,yORF,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
0,YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
1,YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate",DNA2
2,YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate,-
3,YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate",CRM1
4,YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified",CAF120
...,...,...,...,...,...,...,...,...
695,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
696,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
697,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1
698,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1


In [None]:
den.set_index('yORF')

Unnamed: 0_level_0,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
yORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate",DNA2
YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate,-
YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate",CRM1
YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified",CAF120
...,...,...,...,...,...,...,...
YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1
YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1


In [None]:
mms_merged = pd.merge(tkach, maz, left_index = True, right_index = True, how = 'outer')

In [None]:
mms_merged = pd.merge(mms_merged, den, left_index = True, right_index = True, how = 'outer')

TypeError: '<' not supported between instances of 'str' and 'int'

In [None]:
mms_merged

Unnamed: 0,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder
YAL016W,,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...
YAL033W,,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...
YAL041W,CDC24,cyto,,,,,
YAL043C,,,,PTA1,-> nucleus,nucleus,"Subunit of holo-CPF, a multiprotein complex an..."
YBL023C,MCM2,nucleus,nucleus,,,,
...,...,...,...,...,...,...,...
YPR119W,,,,CLB2,-> nucleus,nucleus,B-type cyclin involved in cell cycle progressi...
YPR124W,CTR1,other,,,,,
YPR180W,,,,AOS1,-> nucleus,nucleus,Subunit of a heterodimeric nuclear SUMO activa...
YPR190C,,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82


In [None]:
tkach

Unnamed: 0_level_0,Standard Name,MMS_localization_class,HU_localization_class
Systematic ORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
YKL010C,UFD4,nucleus,nucleus
YPL242C,IQG1,from budneck,from budneck
YOR127W,RGA1,from budneck,from budneck
YLR187W,SKG3,from budneck,from budneck
YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...
YBR205W,KTR3,other,
YOR101W,RAS1,nucleus,
YDR519W,FPR2,other,
YML018C,,other,


In [None]:
tkach.drop_na(subset = "MMS_localization_class", inplace = True)

AttributeError: 'DataFrame' object has no attribute 'drop_na'

In [None]:
tkach.drop_na(subset = "MMS_localization_class")

AttributeError: 'DataFrame' object has no attribute 'drop_na'

In [None]:
tkach.dropna(subset = "MMS_localization_class")

Unnamed: 0_level_0,Standard Name,MMS_localization_class,HU_localization_class
Systematic ORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
YKL010C,UFD4,nucleus,nucleus
YPL242C,IQG1,from budneck,from budneck
YOR127W,RGA1,from budneck,from budneck
YLR187W,SKG3,from budneck,from budneck
YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...
YBR205W,KTR3,other,
YOR101W,RAS1,nucleus,
YDR519W,FPR2,other,
YML018C,,other,


In [None]:
tkach = tkach.dropna(subset = "MMS_localization_class")

In [None]:
tkach

Unnamed: 0_level_0,Standard Name,MMS_localization_class,HU_localization_class
Systematic ORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
YKL010C,UFD4,nucleus,nucleus
YPL242C,IQG1,from budneck,from budneck
YOR127W,RGA1,from budneck,from budneck
YLR187W,SKG3,from budneck,from budneck
YNL242W,ATG2,cyto foci,cyto foci
...,...,...,...
YBR205W,KTR3,other,
YOR101W,RAS1,nucleus,
YDR519W,FPR2,other,
YML018C,,other,


In [None]:
mms_merged = pd.merge(tkach, maz, left_index = True, right_index = True, how = 'outer')

In [None]:
mms_merged = pd.merge(mms_merged, den, left_index = True, right_index = True, how = 'outer')

TypeError: '<' not supported between instances of 'str' and 'int'

In [None]:
den

Unnamed: 0,yORF,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
0,YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
1,YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate",DNA2
2,YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate,-
3,YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate",CRM1
4,YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified",CAF120
...,...,...,...,...,...,...,...,...
695,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
696,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
697,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1
698,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1


In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0])
den.set_index('yORF')

Unnamed: 0_level_0,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
yORF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate",DNA2
YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate,-
YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate",CRM1
YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified",CAF120
...,...,...,...,...,...,...,...
YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1
YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1


In [None]:
mms_merged = pd.merge(mms_merged, den, left_index = True, right_index = True, how = 'outer')

TypeError: '<' not supported between instances of 'str' and 'int'

In [None]:
den.set_index('yORF', inplace = True)

In [None]:
mms_merged = pd.merge(mms_merged, den, left_index = True, right_index = True, how = 'outer')

In [None]:
mms_merged.to_csv('mms_merged.csv')

In [None]:
mms_merged.to_parquet('mms_merged.parquet')

ArrowTypeError: ("Expected bytes, got a 'int' object", 'Conversion failed for column Name_copy with type object')

In [None]:
mms_merged

Unnamed: 0,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
YAL007C,,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,ER,punctate","cytoplasm,bud,ER,punctate",ERP2
YAL016W,,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...,,,,,,,
YAL033W,,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...,,,,,,,
YAL041W,CDC24,cyto,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,CDC24
YAL041W,CDC24,cyto,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,CDC24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YPR184W,,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
YPR187W,,,,,,,,MMS_main,Best,Up,Yes,nucleus,"cytoplasm,nucleus",RPO26
YPR188C,,,,,,,,MMS_main,Good,Up,Yes,nothing,bud,MLC2
YPR190C,,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82,,,,,,,


In [None]:
mms_merged['Standard Name'] = mms_merged['Standard Name'].fillna(mms_merged['CommName_Mazumder'])

In [None]:
mms_merged['Standard Name'] = mms_merged['Standard Name'].fillna(mms_merged['Name_copy'])

In [None]:
mms_merged

Unnamed: 0,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
YAL007C,ERP2,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,ER,punctate","cytoplasm,bud,ER,punctate",ERP2
YAL016W,TPD3,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...,,,,,,,
YAL033W,POP5,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...,,,,,,,
YAL041W,CDC24,cyto,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,CDC24
YAL041W,CDC24,cyto,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,CDC24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YPR184W,GDB1,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
YPR187W,RPO26,,,,,,,MMS_main,Best,Up,Yes,nucleus,"cytoplasm,nucleus",RPO26
YPR188C,MLC2,,,,,,,MMS_main,Good,Up,Yes,nothing,bud,MLC2
YPR190C,RPC82,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82,,,,,,,


In [None]:
mms_merged.to_csv('mms_merged_wnames.csv')

In [None]:
mms_merged.to_parquet('mms_merged_wnames.parquet')

ArrowTypeError: ("Expected bytes, got a 'int' object", 'Conversion failed for column Standard Name with type object')

In [None]:
copy_merged.loc[copy_merged['index'].isin(protein_list)]

Unnamed: 0,index,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,intensity_change,localization_change,initial_localization,end_localization
53,YBR197C,,nucleus,,,,,,,,,
100,YDL129W,,nucleus,nucleus,,,,,,,,
105,YDL156W,,nuclear foci,,,,,,,,,
130,YDR089W,,cyto,cyto,,,,,,,,
136,YDR115W,,other,other,,,,,,,,
142,YDR132C,,nucleus,,,,,,Constant,Yes,cytoplasm,nuclear periphery and ER
149,YDR170W-A,,cyto,,,,,,Up,Yes,cytoplasm and punctate,cytoplasm and punctate
176,YDR348C,,cyto,,,,,,,,,
229,YER064C,,nucleus,cyto,,,,,,,,
301,YGR042W,,nuclear foci,,,,,,,,,


In [None]:
den = denervaud_ycd.groupby('geneName').transform(lambda x: x.iloc[0])

In [None]:
den

Unnamed: 0,yORF,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
0,YPR184W,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
1,YHR164C,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate",DNA2
2,YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate,-
3,YGR218W,MMS_main,Good,Up,Yes,"nothing,nucleus","nucleus,punctate",CRM1
4,YNL278W,MMS_main,Good,Constant,Yes,"bud,vacuole,punctate","vacuole,unclassified",CAF120
...,...,...,...,...,...,...,...,...
695,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
696,YBR245C,MMS_main,Best,Up,Yes,"cytoplasm,nucleus",nucleus,ISW1
697,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1
698,YER070W,MMS_main,Best,Up,Yes,"cytoplasm,unclassified",cytoplasm,RNR1


In [None]:
den = denervaud_ycd.groupby('geneName').agg(lambda x: x.iloc[0])

In [None]:
den

Unnamed: 0_level_0,yORF,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
geneName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
37012,YPL100W,MMS_main,Good,Constant,Yes,"nothing,cytoplasm","nuclear periphery,ER,unclassified",37012
-,YDR128W,MMS_main,Good,Constant,Yes,nothing,punctate,-
ACC1,YNR016C,MMS_main,Good,Down,Yes,"cytoplasm,cell periphery,punctate","nothing,cytoplasm,cell periphery",ACC1
ACE2,YLR131C,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,ACE2
ACH1,YBL015W,MMS_main,Best,Up,Yes,"cytoplasm,mitochondrion",mitochondrion,ACH1
...,...,...,...,...,...,...,...,...
YKU70,YMR284W,MMS_main,Good,Up,Yes,cytoplasm,unclassified,YKU70
YMR31,YFR049W,MMS_main,Best,Up,Yes,punctate,"mitochondrion,punctate",YMR31
YRR1,YOR162C,MMS_main,Good,Up,Yes,cytoplasm,unclassified,YRR1
YTA6,YPL074W,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,mitochondrion,unclassified",YTA6


In [None]:
den.set_index('yORF', inplace = True)

In [None]:
mms_merged = pd.merge(tkach, maz, left_index = True, right_index = True, how = 'outer')

In [None]:
mms_merged = pd.merge(mms_merged, den, left_index = True, right_index = True, how = 'outer')

In [None]:
mms_merged

Unnamed: 0,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
YAL007C,,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,ER,punctate","cytoplasm,bud,ER,punctate",ERP2
YAL016W,,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...,,,,,,,
YAL033W,,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...,,,,,,,
YAL041W,CDC24,cyto,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,CDC24
YAL043C,,,,PTA1,-> nucleus,nucleus,"Subunit of holo-CPF, a multiprotein complex an...",,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YPR184W,,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
YPR187W,,,,,,,,MMS_main,Best,Up,Yes,nucleus,"cytoplasm,nucleus",RPO26
YPR188C,,,,,,,,MMS_main,Good,Up,Yes,nothing,bud,MLC2
YPR190C,,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82,,,,,,,


In [None]:
mms_merged['Standard Name'] = mms_merged['Standard Name'].fillna(mms_merged['CommName_Mazumder'])

In [None]:
mms_merged['Standard Name'] = mms_merged['Standard Name'].fillna(mms_merged['Name_copy'])

In [None]:
mms_merged.to_parquet('mms_merged_wnames.parquet')

ArrowTypeError: ("Expected bytes, got a 'int' object", 'Conversion failed for column Standard Name with type object')

In [None]:
mms_merged.to_csv('mms_merged_wnames.csv')

In [None]:
mms_merged

Unnamed: 0,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
YAL007C,ERP2,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,ER,punctate","cytoplasm,bud,ER,punctate",ERP2
YAL016W,TPD3,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...,,,,,,,
YAL033W,POP5,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...,,,,,,,
YAL041W,CDC24,cyto,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,CDC24
YAL043C,PTA1,,,PTA1,-> nucleus,nucleus,"Subunit of holo-CPF, a multiprotein complex an...",,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YPR184W,GDB1,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
YPR187W,RPO26,,,,,,,MMS_main,Best,Up,Yes,nucleus,"cytoplasm,nucleus",RPO26
YPR188C,MLC2,,,,,,,MMS_main,Good,Up,Yes,nothing,bud,MLC2
YPR190C,RPC82,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82,,,,,,,


In [None]:
mms_merged.reset_index(inplace = True, drop = False)

In [None]:
mms_merged['Standard Name'] = mms_merged['Standard Name'].fillna(mms_merged['index'])

In [None]:
mms

NameError: name 'mms' is not defined

In [None]:
mms_merged.to_csv('mms_merged_wnames.csv')

In [None]:
mms_merged

Unnamed: 0,index,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
0,YAL007C,ERP2,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,ER,punctate","cytoplasm,bud,ER,punctate",ERP2
1,YAL016W,TPD3,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...,,,,,,,
2,YAL033W,POP5,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...,,,,,,,
3,YAL041W,CDC24,cyto,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,CDC24
4,YAL043C,PTA1,,,PTA1,-> nucleus,nucleus,"Subunit of holo-CPF, a multiprotein complex an...",,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,YPR184W,GDB1,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
682,YPR187W,RPO26,,,,,,,MMS_main,Best,Up,Yes,nucleus,"cytoplasm,nucleus",RPO26
683,YPR188C,MLC2,,,,,,,MMS_main,Good,Up,Yes,nothing,bud,MLC2
684,YPR190C,RPC82,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82,,,,,,,


In [None]:
mms_merged.loc[mms_merged['Standard Name'].isin(protein_list)]

Unnamed: 0,index,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
3,YAL041W,CDC24,cyto,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,CDC24
9,YAR019C,CDC15,,,,,,,MMS_main,Good,Up,Yes,"nothing,cytoplasm","mitochondrion,unclassified",CDC15
12,YBL023C,MCM2,nucleus,nucleus,,,,,MMS_main,Good,Constant,Yes,"cytoplasm,nucleus",cytoplasm,MCM2
16,YBL026W,LSM2,cyto foci,cyto foci,,,,,,,,,,,
17,YBL042C,FUI1,other,other,,,,,MMS_main,Best,Constant,Yes,cell periphery,cytoplasm,FUI1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
661,YPL265W,DIP5,other,other,,,,,MMS_main,Good,Constant,Yes,cell periphery,"cell periphery,vacuole",DIP5
667,YPR035W,GLN1,cyto foci,,,,,,MMS_main,Best,Constant,Yes,cytoplasm,"cytoplasm,punctate",GLN1
675,YPR075C,OPY2,other,,,,,,MMS_main,Good,Constant,Yes,cytoplasm,"cytoplasm,punctate",OPY2
677,YPR124W,CTR1,other,,,,,,,,,,,,


In [None]:
mms_merged.loc[mms_merged['Standard Name'].isin(protein_list)].to_csv('library_prots_moveMMS.csv')

In [None]:
mms_merged

Unnamed: 0,index,Standard Name,MMS_localization_class,HU_localization_class,CommName_Mazumder,Localization_Mazumder,Dest_Mazumder,Function_Mazumder,exp_cond,movieTag,intensity_change,localization_change,initial_localization,end_localization,Name_copy
0,YAL007C,ERP2,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,ER,punctate","cytoplasm,bud,ER,punctate",ERP2
1,YAL016W,TPD3,,,TPD3,-> cytoplasm and nucleus,nucleus,Regulatory subunit A of the heterotrimeric pro...,,,,,,,
2,YAL033W,POP5,,,POP5,-> cytoplasm and nucleus,nucleus,Subunit of both RNase MRP and nuclear RNase P;...,,,,,,,
3,YAL041W,CDC24,cyto,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,nucleus",cytoplasm,CDC24
4,YAL043C,PTA1,,,PTA1,-> nucleus,nucleus,"Subunit of holo-CPF, a multiprotein complex an...",,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,YPR184W,GDB1,,,,,,,MMS_main,Good,Up,Yes,"cytoplasm,punctate",cytoplasm,GDB1
682,YPR187W,RPO26,,,,,,,MMS_main,Best,Up,Yes,nucleus,"cytoplasm,nucleus",RPO26
683,YPR188C,MLC2,,,,,,,MMS_main,Good,Up,Yes,nothing,bud,MLC2
684,YPR190C,RPC82,,,RPC82,-> cytoplasm and nucleus,nucleus,RNA polymerase III subunit C82,,,,,,,
