In [4]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

df_scores = pd.read_csv("data/green_monkey/all_structure_files/chr1/chr1_temporal_trendCategories_valuePattern_curveFeatures.csv")

# 1. Magnitude Score: sum of absolute expression + accessibility deltas
expr_cols = ['expr_delta_12h', 'expr_delta_18h', 'expr_delta_24h']
acc_cols = ['acc_delta_12h_tss', 'acc_delta_18h_tss', 'acc_delta_24h_tss']

df_scores['magnitude_score'] = df_scores[expr_cols].abs().sum(axis=1) + df_scores[acc_cols].abs().sum(axis=1)

# 2. Agreement Score: proportion of matched directions between expr_dir and acc_tss_dir
def direction_agreement_score(expr_list, acc_list):
    expr = eval(expr_list) if isinstance(expr_list, str) else expr_list
    acc = eval(acc_list) if isinstance(acc_list, str) else acc_list
    matches = sum(1 for e, a in zip(expr, acc) if e == a and e in ['up', 'down'])
    return matches / len(expr)

df_scores['agreement_score'] = df_scores.apply(lambda row: direction_agreement_score(row['expr_dir'], row['acc_tss_dir']), axis=1)

# 3. Stability Score: fewer inflections means more stable
df_scores['stability_score'] = 1 / (1 + df_scores['expr_num_inflections'])

# 4. Early Response Score: peak earlier = higher score
df_scores['early_response_score'] = 1 - ((df_scores['expr_peak_time'] - 12) / (24 - 12))

# Normalize scores to [0, 1]
scaler = MinMaxScaler()
score_cols = ['magnitude_score', 'agreement_score', 'stability_score', 'early_response_score']
df_scores[score_cols] = scaler.fit_transform(df_scores[score_cols])

# Final biomarker score with customizable weights
weights = [0.3, 0.3, 0.2, 0.2]
df_scores['final_score'] = df_scores[score_cols].dot(weights)

# Sort and extract top 20 candidate biomarkers
top_candidates = df_scores.sort_values(by='final_score', ascending=False).reset_index(drop=True)
top_output = top_candidates[['gene_name'] + score_cols + ['final_score']].head(20)



In [5]:
top_output

Unnamed: 0,gene_name,magnitude_score,agreement_score,stability_score,early_response_score,final_score
0,SLC39A13,0.569866,1.0,1.0,1.0,0.87096
1,EHD1,0.298485,1.0,1.0,1.0,0.789546
2,MRPL17,0.256179,1.0,1.0,1.0,0.776854
3,GALNT18,0.21284,1.0,1.0,1.0,0.763852
4,EIF3F,0.182036,1.0,1.0,1.0,0.754611
5,LAYN,0.16179,1.0,1.0,1.0,0.748537
6,PICALM,0.090131,1.0,1.0,1.0,0.727039
7,NAV2,0.081428,1.0,1.0,1.0,0.724428
8,SPCS2,0.078723,1.0,1.0,1.0,0.723617
9,LOC103237205,0.059682,1.0,1.0,1.0,0.717905
