In [12]:
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit

# Define the Exponential Decay Function
def exp_decay(time, N_0, k):
    return N_0 * np.exp(-k * time)

# Function to Calculate Half-Life
def calculate_half_life(replicate_data, time_points):
    replicate_data = pd.to_numeric(replicate_data, errors='coerce')
    replicate_data[replicate_data <= 0] = np.nan  # Avoid log(0) issues
    
    if replicate_data.isna().all():
        return np.nan

    min_positive = replicate_data[replicate_data > 0].min()
    replicate_data.fillna(min_positive, inplace=True)

    log_data = np.log(replicate_data)

    try:
        params, _ = curve_fit(exp_decay, time_points, log_data, bounds=(0, np.inf))
        N_0, k = params
        return np.log(2) / k if k > 0 else np.nan
    except:
        return np.nan

# Load the Dataset
data = pd.read_csv('DecayTimecourse.txt', delimiter='\t')

# Save and reload for consistency
data.to_csv('DecayTimecourse.csv', index=False)
data = pd.read_csv("DecayTimecourse.csv")

# Initialize list to store half-life calculations
half_lives = []
time_points = np.array([0, 5, 10, 15, 20, 30, 40, 50, 60])

# Iterate through each row to calculate half-lives
for index, row in data.iterrows():
    transcript_id = row['Time course #']
    
    if transcript_id == 'YORF':  # Skip invalid entries
        continue
    
    replicate_half_lives = []
    
    for replicate_idx in range(3):  # Assuming three replicates
        start_col = replicate_idx * 9 + 2
        replicate_data = row.iloc[start_col:start_col+9]
        half_life = calculate_half_life(replicate_data, time_points)

        if not np.isnan(half_life):
            replicate_half_lives.append(half_life)

    if replicate_half_lives:
        avg_half_life = np.mean(replicate_half_lives)
        half_lives.append({'Transcript': transcript_id, 'Half_Life': avg_half_life})

# Convert the list into a DataFrame and Save to CSV
half_lives_df = pd.DataFrame(half_lives)
output_file = 'Half_Lives_Calculated.csv'
half_lives_df.to_csv(output_file, index=False)

print(f"\nHalf-Lives Calculated and saved to: {output_file}")

# Reload the Saved Half-Life Data
half_lives_df = pd.read_csv(output_file)

# Ensure no NaN values in the dataset before filtering
half_lives_df = half_lives_df.dropna(subset=['Half_Life'])

# Sort the DataFrame by Half-Life in Descending Order
half_lives_df = half_lives_df.sort_values(by='Half_Life', ascending=False)

num_genes_10_percent = round(len(half_lives_df) * 0.1) 

# Select the top and bottom 10% transcripts dynamically
top_10_percent_transcripts = half_lives_df.iloc[:num_genes_10_percent]
bottom_10_percent_transcripts = half_lives_df.iloc[-num_genes_10_percent:]

# Print the genes in the required format
print("\nCalculated Half-Lives:\n")
for index, row in half_lives_df.iterrows():
    print(f"Transcript: {row['Transcript']}, Half-Life: {row['Half_Life']}")

# Count the Number of Genes in the Top and Bottom 10%
num_high_half_life_genes = top_10_percent_transcripts.shape[0]
num_low_half_life_genes = bottom_10_percent_transcripts.shape[0]

print(f"\nNumber of genes in the top 10%: {num_high_half_life_genes}")
print(f"Number of genes in the bottom 10%: {num_low_half_life_genes}")

# Save High and Low Half-Life Genes to Files
high_life_path = 'high_half_life_genes.txt'
low_life_path = 'low_half_life_genes.txt'

with open(high_life_path, 'w') as file:
    for _, row in top_10_percent_transcripts.iterrows():
        file.write(f"Transcript: {row['Transcript']}, Half-Life: {row['Half_Life']}\n")

with open(low_life_path, 'w') as file:
    for _, row in bottom_10_percent_transcripts.iterrows():
        file.write(f"Transcript: {row['Transcript']}, Half-Life: {row['Half_Life']}\n")

print(f"\nHigh half-life genes saved to {high_life_path}")
print(f"Low half-life genes saved to {low_life_path}")



Half-Lives Calculated and saved to: Half_Lives_Calculated.csv

Calculated Half-Lives:

Transcript: YLR081W, Half-Life: 3110103303606925.5
Transcript: YPL174C, Half-Life: 715756749991251.6
Transcript: YBL098W, Half-Life: 16019930866603.436
Transcript: YOR344C, Half-Life: 8623709739924.602
Transcript: YNL019C, Half-Life: 2000265710822.6929
Transcript: YOR255W, Half-Life: 223095500470.6136
Transcript: YJL222W, Half-Life: 26558130.216979124
Transcript: Q0050, Half-Life: 25574025.73878494
Transcript: YMR186W, Half-Life: 23963647.635841858
Transcript: YPL135W, Half-Life: 20049059.65725837
Transcript: YLR120C, Half-Life: 17699765.02346982
Transcript: YJL174W, Half-Life: 14564826.280615635
Transcript: YMR174C, Half-Life: 13187689.94055578
Transcript: YDL194W, Half-Life: 13012611.974133365
Transcript: YKL046C, Half-Life: 11573613.189517876
Transcript: YLR393W, Half-Life: 9692415.551938236
Transcript: YGL129C, Half-Life: 9446821.331156954
Transcript: YDL240W, Half-Life: 7796635.290962242
Transc