In [3]:
import pandas as pd
import os

# 1. Define the path to the parent directory

In [4]:
# Define the path to the parent directory
data_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))  # Move one level up

# 2. Choose the phenotype

In [None]:
phenotype = "YPD_doublingtime"
#phenotype = "YPDCUSO410MM_40h"

# 3. Load the data

In [None]:
# Define the paths to the data files
x_matrix_path = os.path.join(data_dir, f"data/X_matrix_{phenotype}.pkl")
important_proteins_path = os.path.join(data_dir, f"results/combined_importance_ranking.csv")

# Load the data
X_matrix = pd.read_pickle(x_matrix_path)
important_proteins = pd.read_csv(important_proteins_path)

# 4. Modify the columns names of the X_matrix

In [None]:
# Get the column names from the X_matrix
columns = X_matrix.columns

# Function to add the suffix _CNV if the name is not in the Protein_ID_index_AA_AA format
def transform_protein_name(protein_name):
    # If the name does not contain "index" or a mutation AA_AA, it is probably a CNV
    if "_" not in protein_name:  # Checks if the format is simple, e.g., "YHL048C"
        return f"{protein_name}_CNV"
    return protein_name

# Apply the transformation to the column names
new_columns = [transform_protein_name(col) for col in columns]

# Assign the new column names to the X_matrix
X_matrix.columns = new_columns

# 5. Create the X_matrix (mutations from important proteins)

In [None]:
# Extract the important protein names
important_features = important_proteins['Feature_ID'].tolist()

# Extract the corresponding columns from X_matrix
# Also retain the 'Yeast_ID' column
filtered_columns = ['Yeast_ID'] + [col for col in X_matrix.columns if any(protein in col for protein in important_features)]

# Create a new restricted DataFrame
X_matrix_restricted = X_matrix[filtered_columns]

# Save the new file
X_matrix_restricted.to_csv(os.path.join(data_dir, f"data/X_matrix_restricted_{phenotype}.csv"), index=False)
X_matrix_restricted.to_pickle(os.path.join(data_dir, f"data/X_matrix_restricted_{phenotype}.pkl"))
