# Sec Recon Analyses

Here we perform different analyses to test the quality of secRecon

In [1]:
import datetime
import pickle

import re
import pandas as pd
import numpy as np
from google_sheet import GoogleSheet
from collections import defaultdict

import matplotlib.pyplot as plt
import plotly.express as px
from venn import venn
from upsetplot import UpSet
import matplotlib.pyplot as plt
from itertools import product

# Warnings
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [10]:
##### ----- Generate datasets from Google Sheet ----- #####

#Credential file
KEY_FILE_PATH = 'credentials.json'

#CHO Network Reconstruction + Recon3D_v3 Google Sheet ID
Sec_Recon_SPREADSHEET_ID = '1DaAdZlvMYDqb7g31I5dw-ZCZH52Xj_W3FnQMFUzqmiQ'

# Initialize the GoogleSheet object
sec_recon_gsheet_file = GoogleSheet(Sec_Recon_SPREADSHEET_ID, KEY_FILE_PATH)

# Read data from SecRecon
sec_genes_sheet = 'SecRecon'
ontology_sheet = 'Ontology'

sec_genes = sec_recon_gsheet_file.read_google_sheet(sec_genes_sheet)
ontology = sec_recon_gsheet_file.read_google_sheet(ontology_sheet)

### Network Analysis

In this section we'll use the networks generated in the Network_visualization notebook to visualize experimental data from a CHO High vs Low dataset

In [4]:
# Read dataset
cho_vs_plasma_prot = pd.read_excel('Data/cho_vs_plasma/1-s2.0-S1096717624000521-mmc3.xlsx',
                                   sheet_name = 'Proteome DE proteins')

In [20]:
# Map Mouse genes to Human genes

dict_mouse_human = dict(zip(sec_genes['MOUSE GENE SYMBOL'], sec_genes['GENE SYMBOL']))
cho_vs_plasma_prot['Human_Genes'] = cho_vs_plasma_prot['Mmus_Genes'].map(gene_dict)

In [25]:
cho_vs_plasma_prot.Human_Genes.dropna()

0       UBE2J2
6        VAMP3
8        UBE4B
11       PLOD1
15        DDI2
         ...  
1470      DPM3
1479     STX12
1504     FKBP8
1526     VAMP2
1538     CALM1
Name: Human_Genes, Length: 194, dtype: object

In [8]:
# Read the dictionary from the pickle file
with open('gene_dict.pkl', 'rb') as f:
    gene_dict = pickle.load(f)

In [9]:
gene_dict

{'A3GALT2': {'systems': ['Post-translational modifications'],
  'subsystems': ['Glycosylation'],
  'processes': ['Glycosphingolipid'],
  'subprocesses': ['Isoglobo series'],
  'subcellular_localization': ['Golgi'],
  'protein_complex': ['']},
 'A4GALT': {'systems': ['Post-translational modifications'],
  'subsystems': ['Glycosylation'],
  'processes': ['Glycosphingolipid'],
  'subprocesses': ['Globo series'],
  'subcellular_localization': ['Golgi', 'Plasma Membrane'],
  'protein_complex': ['']},
 'A4GNT': {'systems': ['Post-translational modifications'],
  'subsystems': ['Glycosylation'],
  'processes': ['O-glycan'],
  'subprocesses': ['O-glycan mucin'],
  'subcellular_localization': ['Golgi'],
  'protein_complex': ['']},
 'ABL1': {'systems': ['Post-translational modifications',
   'Proteostasis',
   'Vesicle trafficking'],
  'subsystems': ['Post-Golgi',
   'ER stress response/UPR',
   'Apoptosis',
   'Cytoskeletal remodeling',
   'Phosphorylation'],
  'processes': ['Kinase', 'Endocyto