In [1]:
import pandas as pd
import sys

import os

workdir = os.getcwd()
os.environ['MPLCONFIGDIR'] = f'{workdir}/.cache'
os.environ['WANDB_CACHE_DIR'] = f'{workdir}/.cache'
import cyclic_gan as gan

import wandb

workdir = os.getcwd()
sys.path.append(workdir)
os.environ['WANDB_NOTEBOOK_NAME'] = "10_run_cyclicGAN_integration.ipynb"
wandb.login()

%reload_ext autoreload
%autoreload 2

[34m[1mwandb[0m: Currently logged in as: [33mraminass[0m. Use [1m`wandb login --relogin`[0m to force relogin


# U2OS Data

In [10]:
# img_path = f'{workdir}/3_outputs_format_image_emd/hek293_img_emb.tsv' # hek293
img_path = f'{workdir}/3_outputs_format_image_emd/HPA_U2OS_emd_{1024}.zip' # U2OS
img_features = pd.read_table(img_path, sep='\t', index_col=0)

In [11]:

# ppi_path = '/home/bnet/raminasser/mlm/outputs/Dec18_13-27-48_n-501_feat.tsv' # BioPlex3
# # ppi_path = '/home/bnet/raminasser/CLIP-MUSE/5_outputs_run_clip_integration/Nov02_09-12-56_n-501_feat.tsv' # BioPlex2.0
# ppi_features = pd.read_table(ppi_path, sep='\t', index_col=0)
ppi_features = pd.read_table(f'{workdir}/2_outputs_run_node2vec_format_apms_emd/bioplex_U2OS_emd_{1024}.tsv', sep='\t', index_col=0)

In [6]:
overlapping_proteins = list(set(img_features.index.values).intersection(set(ppi_features.index.values)))
print(len(overlapping_proteins))

5250


In [7]:
img_features = img_features.loc[overlapping_proteins]
ppi_features = ppi_features.loc[overlapping_proteins]

# BioPlex 3.0

In [2]:
img_first = pd.read_table(f'{workdir}/3_outputs_format_image_emd/hek293_img_emb_first.tsv', sep='\t', index_col=0)
# img_mean = pd.read_table(f'{workdir}/3_outputs_format_image_emd/hek293_img_emb_mean.tsv', sep='\t', index_col=0)
ppi_node2vec = pd.read_table(f'{workdir}/2_outputs_run_node2vec_format_apms_emd/bioplex3.tsv', sep='\t', index_col=0)
# ppi_bert = pd.read_table(f'/home/bnet/raminasser/mlm/outputs/Dec18_13-27-48_n-501_feat.tsv', sep='\t', index_col=0)

In [3]:
overlapping_proteins = list(set(img_first.index.values).intersection(set(ppi_node2vec.index.values)))
print(len(overlapping_proteins))

876


In [4]:
img_first = img_first.loc[overlapping_proteins]
# img_mean = img_mean.loc[overlapping_proteins]
ppi_node2vec = ppi_node2vec.loc[overlapping_proteins]
# ppi_bert = ppi_bert.loc[overlapping_proteins]

# ModelTrain

## 4 neworks

In [None]:
wandb.finish()
wandb.init(
    # Set the project where this run will be logged
    project="Cyclic-GAN",
    # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
    # name=f"U2OS",
    # Track hyperparameters and run metadata
    config={
        # Define the model hyperparameters
        "batch_size": 16,
        "dropout": 0.0,  # 0.25
        "latent_dim": 128,  # output dim of embedding
        "hidden_dim": 512,
        # Training Configuration
        "isTrain": True,
        "continue_train": False,
        "verbose": True,
        "n_epochs": 100, # number of epochs with the initial learning rate
        "epoch_count": 1, #
        "n_epochs_decay": 100,  # number of epochs to linearly decay learning rate to zero
        "gpu_ids": [1],
        "num_threads": 4,
        # initializers
        "init_type": "normal", # normal, xavier, kaiming, orthogonal
        "init_gain": 0.02, # scaling factor for normal, xavier and orthogonal.
        # Loss weights
        "lambda_G": 1.0,
        "lambda_latent_cycle": 10.0,
        "lambda_short_cycle": 10.0,
        # Gradient penalty
        "lambda_gp": 0.1,#gradient penalty lambda https://arxiv.org/pdf/1704.00028 
        "gp_constant": 0.0, # gradient penalty constant https://arxiv.org/pdf/1704.00028
        "gp_type": "real", # [real | fake | mixed] Type of the gradient penalty
        "pool_size": 8, # the size of image buffer that stores previously generated images
        "gan_mode": "lsgan", # [vanilla| lsgan | wgangp], the type of GAN objective. vanilla GAN loss is the cross-entropy objective used in the orignal GAN paper.
        # Adam optimizer
        "beta1": 0.5, # momentum term of adam
        "lr": 0.0002,  # initial learning rate for adam
        "lr_policy": "linear", # learning rate policy. [linear | step | plateau | cosine]
        #Data
        "workdir": f"{workdir}/10_outputs_run_gan",
        "input_files": [
            "HEK293_interactions/apms_ppi/ppi_edgelist_emb_1024.tsv",
            "HEK293_interactions/plms_ppi/ppi_edgelist_emb_1024.tsv",
            "HEK293_interactions/secms_ppi/ppi_edgelist_emb_1024.tsv",
            "HEK293_interactions/xlms_ppi/ppi_edgelist_emb_1024.tsv",
        ],
        "modalities": ["apms", "plms", "secms", "xlms"],
    },
)

## Proteomics

In [None]:
wandb.finish()
wandb.init(
    # Set the project where this run will be logged
    project="Cyclic-GAN",
    # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
    # name=f"U2OS",
    # Track hyperparameters and run metadata
    config={
        # Define the model hyperparameters
        "batch_size": 16,
        "dropout": 0.0,  # 0.25
        "latent_dim": 128,  # output dim of embedding
        "hidden_dim": 512,
        # Training Configuration
        "isTrain": True,
        "continue_train": False,
        "verbose": True,
        "n_epochs": 100,  # number of epochs with the initial learning rate
        "epoch_count": 1,
        "n_epochs_decay": 100,  # number of epochs to linearly decay learning rate to zero
        "gpu_ids": [0],
        "num_threads": 4,
        # initializers
        "init_type": "xavier",  # normal, xavier, kaiming, orthogonal
        "init_gain": 0.02,  # scaling factor for normal, xavier and orthogonal.
        # Loss weights
        "lambda_G": 1.0,
        "lambda_latent_cycle": 10.0,
        "lambda_short_cycle": 10.0,
        # Gradient penalty
        "lambda_gp": 0.1,  # gradient penalty lambda https://arxiv.org/pdf/1704.00028
        "gp_constant": 0.0,  # gradient penalty constant https://arxiv.org/pdf/1704.00028
        # [real | fake | mixed] Type of the gradient penalty
        "gp_type": "real",
        "pool_size": 50,  # the size of image buffer that stores previously generated images
        # [vanilla| lsgan | wgangp], the type of GAN objective. vanilla GAN loss is the cross-entropy objective used in the orignal GAN paper.
        "gan_mode": "lsgan",
        # Adam optimizer
        "beta1": 0.5,  # momentum term of adam
        "lr": 0.0002,  # initial learning rate for adam
        # learning rate policy. [linear | step | plateau | cosine]
        "lr_policy": "linear",
        # Data
        "workdir": f"{workdir}/10_outputs_run_gan",
        "input_files": [
            "2_outputs_run_node2vec_format_apms_emd/bioplex3.tsv",
            "3_outputs_format_image_emd/hek293_img_emb_first_image.tsv",
            "0_inputs/seq_emb.tsv",
        ],
        "modalities": ["bioplex", "hpa_images", "sequence"],
        "aligned": True,
    },
)

In [None]:
model = gan.clip_fit_predict(
    config=wandb.config,
    wandb=wandb,
)
wandb.finish()