### Extract LPI data

In [1]:
import pandas as pd
species = 'human'

lppi = pd.read_csv(f'../../annotate/{species}/unweighted_inter.csv')

lpi = lppi[lppi.iloc[:, 0].str.startswith('l')]
lpi = lpi[['source', 'target']]
lpi.columns = ['lncRNA_id', 'protein_id']
lpi.to_csv(f'{species}_lpi.csv', index=False)

### Extract mouse essenrial protein & human essential protein

In [2]:
import pandas as pd

protein = pd.read_csv(f'../../data/LPPI/mouse/protein_updated.csv')
ess_protein = pd.read_csv('Mus musculus_genes.csv')

ess_protein = ess_protein[['gene', 'essentiality']]
ess_protein = ess_protein[(ess_protein['essentiality'] == 'E')]
ess_protein_lpi = protein[protein['protein'].isin(ess_protein['gene'])]
ess_protein_id = ess_protein_lpi['protein_id']

ess_protein_id.to_csv(f'mouse_ess_protein_id.csv', index=False)

In [3]:
import pandas as pd

protein = pd.read_csv('../../data/LPPI/human/protein_updated.csv')
ess_protein = pd.read_csv('CSEGs_CEGs.txt', sep='\t')

ess_protein = ess_protein[['gene', 'essentiality']]
ess_protein = ess_protein[ess_protein['essentiality'] == 'CEGs']

ess_protein_lpi = protein[protein['protein'].isin(ess_protein['gene'])]
ess_protein_id = ess_protein_lpi['protein_id']

ess_protein_id.to_csv(f'human_ess_protein_id.csv', index=False)

### Test the correlation between lncRNA essentiality and the essentiality of interacting proteins.

In [5]:
import pandas as pd
from scipy.stats import chi2_contingency
import os

species = 'mouse'

if species == 'mouse':
    tissues = ['heart', 'lung', 'brain']
    k = 60
    tissue_name_map = {
        "heart": "Heart",
        "lung": "Lung",
        "brain": "Forebrain"
    }
else:
    tissues = ['heart', 'lung', 'stomach']
    k = 40
    tissue_name_map = {
        "heart": "Left ventricle",
        "lung": "Left lung",
        "stomach": "Stomach"
    }

# Load essential proteins
essential_proteins = pd.read_csv(f'{species}_ess_protein_id.csv')
essential_protein_set = set(essential_proteins.iloc[:, 0])

# Load lncRNA-protein interaction pairs
interactions = pd.read_csv(f'{species}_lpi.csv')

# Define all lncRNAs appearing in interactions (as the universe)
all_lncrnas = set(interactions['lncRNA_id'])

# lncRNAs that interact with essential proteins
lnc_with_essentialP = set(
    interactions[interactions['protein_id'].isin(essential_protein_set)]['lncRNA_id']
)

# lncRNAs that do NOT interact with essential proteins
lnc_without_essentialP = all_lncrnas - lnc_with_essentialP

# Output file
output_file = f'{species}_lncrna_enrichment_chi2_result.txt'
if os.path.exists(output_file):
    os.remove(output_file)

for t in tissues:

    # Get full tissue name for writing
    full_name = tissue_name_map.get(t, t)

    # Load essential lncRNAs for this tissue
    ess_path = f'../ess_number/filtered/{species}/BC_top{k}pct_{species}_{t}_esslnc.csv'
    essential_lncrnas = pd.read_csv(ess_path, header=None)
    essential_lncrna_set = set(essential_lncrnas.iloc[:, 0])

    # Background for test: All lncRNAs excluding essential lncRNAs
    background_lncrna_set = all_lncrnas - essential_lncrna_set

    a_test = len(essential_lncrna_set & lnc_with_essentialP)
    b_test = len(background_lncrna_set & lnc_with_essentialP)
    c_test = len(essential_lncrna_set & lnc_without_essentialP)
    d_test = len(background_lncrna_set & lnc_without_essentialP)

    contingency_table_test = [[a_test, b_test], [c_test, d_test]]
    chi2, p_chi2, _, _ = chi2_contingency(contingency_table_test)

    prop_ess = a_test / (a_test + c_test) if (a_test + c_test) > 0 else 0
    prop_bg = b_test / (b_test + d_test) if (b_test + d_test) > 0 else 0
    enrichment = prop_ess / prop_bg if prop_bg > 0 else float('inf')

    # Display counts: Essential vs ALL lncRNAs
    a_all = a_test
    c_all = c_test
    b_all = len(all_lncrnas & lnc_with_essentialP)
    d_all = len(all_lncrnas & lnc_without_essentialP)

    with open(output_file, 'a', encoding='utf-8') as f:
        f.write(f"\n=== Tissue: {full_name} ===\n")
        f.write("Contingency Table (Displayed counts: Essential vs All lncRNAs):\n")
        f.write("                 | Interact_with_essP | No_interact_with_essP\n")
        f.write("-----------------|-------------------|-----------------------\n")
        f.write(f"Essential lncRNA | {a_all:7d}            | {c_all:7d}\n")
        f.write(f"All lncRNA       | {b_all:7d}            | {d_all:7d}\n\n")

        f.write(f"Chi2 statistic: {chi2:.6g}\n")
        f.write(f"Chi-squared test p-value: {p_chi2:.6g}\n\n")

        f.write("Proportion interacting with essential proteins (used in Chi-square test):\n")
        f.write(f"Essential:  {prop_ess:.3f}  ({a_test}/{a_test+c_test})\n")
        f.write(f"Background: {prop_bg:.3f}  ({b_test}/{b_test+d_test})\n")
        f.write(f"Enrichment (Essential / Background): {enrichment:.3f}\n")

print(f"\n✅ Results saved to {output_file}")



✅ Results saved to mouse_lncrna_enrichment_chi2_result.txt
