In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [40]:
final_gep_matrix_k_10 = pd.read_csv('analcancer_cNMF\\s13_cNMF_r40_k6-20\\s13_cNMF_r40_k6-20.gene_spectra_tpm.k_10.dt_0_5.txt', sep='\t')
final_gep_matrix_k_10.drop(columns=["Unnamed: 0"], inplace=True)
final_gep_matrix_k_10.head()

Unnamed: 0,SAMD11,NOC2L,KLHL17,PLEKHN1,HES4,ISG15,AGRN,RNF223,C1orf159,TNFRSF18,...,ELANE,MCHR1,AC007244.1,CA14,UGT1A5,TMEM155,OPN4,GOLGA8R,CHRNE,ADAT3
0,0.0,136.078841,39.552889,50.055059,77.367455,42.118205,258.965139,0.0,20.126201,66.492474,...,0.0,0.0,6.246397,0.0,2.971674,0.0,0.0,3.149021,0.0,1.080112
1,0.0,74.710799,45.720096,62.81152,9.564015,152.032548,25.417925,0.0,20.018054,116.879526,...,0.0,0.079833,0.0,0.0,0.0,9.680502,1.807112,1.918156,2.325711,0.0
2,26.697751,47.348447,34.547292,13.8571,59.080427,111.255839,48.485248,0.0,8.378825,0.0,...,15.303319,0.0,6.56274,0.0,0.361891,0.0,4.690265,6.397728,2.510803,0.066508
3,0.0,135.213966,92.461813,813.548864,19.089314,0.0,0.0,0.0,29.39342,0.0,...,0.0,0.0,9.864929,0.094155,0.987116,0.0,0.0,2.127693,3.920848,5.677128
4,0.0,51.109952,19.009489,40.134916,13.627728,141.042017,102.072322,0.0,3.601396,2.357504,...,1.302154,0.0,6.624451,0.0,0.375956,0.0,0.07741,7.987623,0.847218,0.0


In [41]:
def top_x_percent(row):
    n_top = max(1, int(np.ceil(0.05 * len(row))))  # at least 1 col
    top_vals = row.nlargest(n_top)
    return top_vals

In [42]:
result = final_gep_matrix_k_10.apply(top_x_percent, axis=1)

# Reset index to make it tidy (long-form DataFrame)
result_df = result.reset_index().melt(id_vars="index", 
                                      var_name="Column", 
                                      value_name="Value").dropna()

# Rename "index" to row id
result_df = result_df.rename(columns={"index": "Row"})

grouped_gep = result_df.groupby("Row")

In [43]:
output_folder = "results\\s13_cNMF_r40_k6_normalized_k10"
os.makedirs(output_folder, exist_ok=True)

for row_id, group in grouped_gep:
    # Sort and then take first 500 rows
    group_sorted = group.sort_values("Value", ascending=False).head(75)
    
    plt.figure(figsize=(10, 8))
    plt.plot(group_sorted["Column"], group_sorted["Value"], marker="o", linestyle="-")
    plt.title(f"GEP {row_id+1} - Top 75 Values")
    plt.xlabel("Column")
    plt.ylabel("Value")
    plt.xticks(rotation=90, fontsize=6) 
    plt.tight_layout()

    # save the figure
    filename = f"GEP_{row_id+1}.png"
    filepath = os.path.join(output_folder, filename)
    plt.savefig(filepath, dpi=300) 
    plt.close()  