In [4]:
import pandas as pd

def read_file(file_path):
    """Reads a tab-delimited file and returns a DataFrame."""
    return pd.read_csv(file_path, sep='\t')

def handle_missing_values(df):
    """Replaces NaN values with 0 in the DataFrame."""
    return df.fillna(0)

def cap_values(df, threshold=2000):
    """Caps values greater than the threshold to the threshold value."""
    return df.applymap(lambda x: threshold if isinstance(x, (int, float)) and x > threshold else x)

def select_context_columns(df):
    """Selects columns with names containing 'Context_'."""
    return df.filter(like="Context_", axis=1)

def save_file(df, output_path):
    """Saves the DataFrame to a tab-delimited file."""
    df.to_csv(output_path, sep='\t', index=False)

# Main function to process the file
def process_gene_expression(file_path, output_path):
    df = read_file(file_path)
    df = handle_missing_values(df)
    df = cap_values(df)
    selected_columns = select_context_columns(df)
    save_file(selected_columns, output_path)

# File paths
input_path = '/Users/mohanavenkataphaneendrareddyalla/Downloads/GSE152632_TRAPedFCNF_25Genes_WithHead.txt'
output_path = '/Users/mohanavenkataphaneendrareddyalla/Desktop/Adv_pro_molecular_bio/Homework_9/new_GSE152632_TRAPedFCNF_25Genes_WithHead_Selected.txt'

# Execute the process
process_gene_expression(input_path, output_path)
print(f"Processed file saved to {output_path}")


Processed file saved to /Users/mohanavenkataphaneendrareddyalla/Desktop/Adv_pro_molecular_bio/Homework_9/new_GSE152632_TRAPedFCNF_25Genes_WithHead_Selected.txt


  return df.applymap(lambda x: threshold if isinstance(x, (int, float)) and x > threshold else x)


In [7]:
# SHORT CODE

In [5]:
import pandas as pd

# Read the file
df = pd.read_csv('/Users/mohanavenkataphaneendrareddyalla/Downloads/GSE152632_TRAPedFCNF_25Genes_WithHead.txt', sep='\t')

df.fillna(0, inplace=True)

# Convert all columns to numeric, coercing errors to NaN, then fill NaNs with 0
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
# Cap values greater than 2000 to 2000
df[df > 2000] = 2000
selected_df = df.filter(like='Context_')
# Save the selected columns to a new file
selected_df.to_csv('/Users/mohanavenkataphaneendrareddyalla/Downloads/GSE152632_TRAPedFCNF_25Genes_WithHead_Selected.txt', sep='\t', index=False)
