In [None]:
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit

# Load the dataset
file_path = "DecayTimecourse.txt"  # Update the path if needed
df = pd.read_csv(file_path, delimiter="\t")

# Function to model exponential decay: y = exp(-t/tau)
def exp_decay(t, tau):
    return np.exp(-t / tau)

# Extract unique time points from the header
time_points = np.array([0, 5, 10, 15, 20, 30, 40, 50, 60])

# Clean dataset
df_cleaned = df.iloc[1:].reset_index(drop=True)  # Skip first row (extra headers)
df_cleaned.columns = df.iloc[0].tolist()  # Use first row as column headers
df_cleaned = df_cleaned.dropna(axis=1, how="all")  # Drop empty columns
df_cleaned = df_cleaned.rename(columns={"YORF": "Gene"})  # Rename first column
df_cleaned.iloc[:, 1:] = df_cleaned.iloc[:, 1:].apply(pd.to_numeric, errors="coerce")  # Convert to float

# Dictionary to store calculated half-lives
half_lives = {}

# Iterate over each gene to calculate half-life
for index, row in df_cleaned.iterrows():
    gene_id = row["Gene"]

    # Extract expression values for three replicates
    expression_levels = row.iloc[1:].values.reshape(3, -1)  # Three time course replicates

    gene_half_lives = []

    for replicate in expression_levels:
        replicate = pd.to_numeric(replicate, errors='coerce')  # Convert to numeric
        valid_mask = ~np.isnan(replicate)  # Remove NaNs

        if sum(valid_mask) > 2:  # Need at least 3 points to fit
            t_valid = time_points[valid_mask]
            y_valid = replicate[valid_mask]

            # Fit exponential decay
            try:
                popt, _ = curve_fit(exp_decay, t_valid, y_valid, p0=[10], maxfev=10000)
                gene_half_lives.append(popt[0])
            except:
                continue

    # Compute mean half-life across three replicates
    if gene_half_lives:
        half_lives[gene_id] = np.mean(gene_half_lives)

# Convert to DataFrame and sort
half_life_df = pd.DataFrame(list(half_lives.items()), columns=["Gene", "Half_Life"])
half_life_df = half_life_df.sort_values(by="Half_Life", ascending=False)

# Identify top 10% and bottom 10%
top_10_percent = half_life_df.head(int(len(half_life_df) * 0.1))
bottom_10_percent = half_life_df.tail(int(len(half_life_df) * 0.1))

# Save results
half_life_df.to_csv("calculated_half_lives.csv", index=False)
top_10_percent.to_csv("top_10_percent_half_lives.csv", index=False)
bottom_10_percent.to_csv("bottom_10_percent_half_lives.csv", index=False)

# Display results
print("Calculated Half-Life Data:")
print(half_life_df.head())

print("\nTop 10% Half-Life Genes:")
print(top_10_percent)

print("\nBottom 10% Half-Life Genes:")
print(bottom_10_percent)


  popt, _ = curve_fit(exp_decay, t_valid, y_valid, p0=[10], maxfev=10000)


Calculated Half-Life Data:
         Gene     Half_Life
229   YOR347C  2.515127e+10
2416  YDL223C  1.811149e+10
3979  YBL098W  1.405650e+10
3155  YDL023C  1.372263e+10
6108  YDR535C  1.341090e+10

Top 10% Half-Life Genes:
         Gene     Half_Life
229   YOR347C  2.515127e+10
2416  YDL223C  1.811149e+10
3979  YBL098W  1.405650e+10
3155  YDL023C  1.372263e+10
6108  YDR535C  1.341090e+10
...       ...           ...
3994  YDL057W  9.355139e+01
3554  YLR112W  9.327800e+01
4328  YER175C  9.326555e+01
5661  YOL164W  9.309609e+01
5769  YKL055C  9.296442e+01

[615 rows x 2 columns]

Bottom 10% Half-Life Genes:
         Gene  Half_Life
3780  YGL097W  14.860722
1230  YNL112W  14.859477
990   YEL037C  14.855587
564   YLR093C  14.838200
4402  YOR026W  14.835974
...       ...        ...
6145  YEL075C   0.210259
6156  YGL260W   0.165824
6149  YHL049C   0.157656
6153  YPR202W   0.135142
6151  YHR218W   0.117626

[615 rows x 2 columns]


  return np.exp(-t / tau)


In [None]:
import pandas as pd

# Load the calculated half-life data
half_life_df = pd.read_csv("calculated_half_lives.csv")  # Update filename if needed

# Identify top 10% and bottom 10% genes
top_10_percent = half_life_df.head(int(len(half_life_df) * 0.1))
bottom_10_percent = half_life_df.tail(int(len(half_life_df) * 0.1))

# Extract gene names only
top_10_genes = top_10_percent["Gene"].tolist()
bottom_10_genes = bottom_10_percent["Gene"].tolist()

# Save gene lists as text files (one gene per line, required format for g:Profiler)
with open("top_10_percent_genes.txt", "w") as f:
    f.write("\n".join(top_10_genes))

with open("bottom_10_percent_genes.txt", "w") as f:
    f.write("\n".join(bottom_10_genes))

print("Top 10% genes saved to 'top_10_percent_genes.txt'")
print("Bottom 10% genes saved to 'bottom_10_percent_genes.txt'")

# Check first few genes for verification
print("\nSample Top 10% Genes:\n", top_10_genes[:10])
print("\nSample Bottom 10% Genes:\n", bottom_10_genes[:10])


Top 10% genes saved to 'top_10_percent_genes.txt'
Bottom 10% genes saved to 'bottom_10_percent_genes.txt'

Sample Top 10% Genes:
 ['YOR347C', 'YDL223C', 'YBL098W', 'YDL023C', 'YDR535C', 'YGR248W', 'YOL052C-A', 'YKR023W', 'YBL064C', 'YJL207C']

Sample Bottom 10% Genes:
 ['YGL097W', 'YNL112W', 'YEL037C', 'YLR093C', 'YOR026W', 'YJL115W', 'YML125C', 'YOR229W', 'YOR301W', 'YCL016C']
