# Gene comparison between FPKM and DGE

In [1]:
import math
import pandas as pd

In [2]:
# Init
minLogTPM = math.log2(2)

In [3]:
# Set comparison function
def compare_a_and_b(lA, lB):
    a = set(lA)
    b = set(lB)
    return {
        'a': len(a),
        'b': len(b),
        'a&b': len(a.intersection(b)),
        'a-b': len(a.difference(b)),
        'b-a': len(b.difference(a)),
        'a|b': len(a.union(b))
    }

## Homo sapiens

In [4]:
# Load FPKM and DGE tables.
df_HS_FPKM = pd.read_csv("../2-core_genes/results/HS-FPKM_genes.csv.gz", index_col='id_gene', usecols=['id_string', 'id_gene', 'gene', 'FPKM', 'TPM', 'Spermatocytes', 'biotype'])
# Selects only FPKM with TPM >= 1 and from Spermatocytes
df_HS_FPKM = df_HS_FPKM.loc[((df_HS_FPKM['TPM'] >= minLogTPM) & (df_HS_FPKM['Spermatocytes'] == True)), :]
df_HS_DGE = pd.read_csv("../2-core_genes/results/HS-DE_genes.csv.gz", index_col='id_gene', usecols=['id_string', 'id_gene', 'gene', 'Cyte_vs_Gonia', 'Tid_vs_Cyte'])
# Adds meiotic entry or exit information
df_HS_FPKM['meiotic-entry'] = df_HS_DGE['Cyte_vs_Gonia']
df_HS_FPKM['meiotic-exit'] = df_HS_DGE['Tid_vs_Cyte']

In [5]:
print("Comparison: HS -> FPKM(TPM >= 1) with DGE()")
r = compare_a_and_b(
    df_HS_FPKM.index.tolist(),
    df_HS_DGE.index.tolist()
)
print("FPKM: {:,d}".format(r['a']))
print("DGE: {:,d}".format(r['b']))
print("FPKM & DGE: {:,d} ({:.2%})".format(r['a&b'], r['a&b']/r['a|b']))
print("FPKM - DGE: {:,d} ({:.2%})".format(r['a-b'], r['a-b']/r['a|b']))
print("DGE - FPKM: {:,d} ({:.2%})".format(r['b-a'], r['b-a']/r['a|b']))

Comparison: HS -> FPKM(TPM >= 1) with DGE()
FPKM: 19,623
DGE: 16,527
FPKM & DGE: 13,674 (60.84%)
FPKM - DGE: 5,949 (26.47%)
DGE - FPKM: 2,853 (12.69%)


In [6]:
print("Comparison: HS -> FPKM(meiotic-entry) with FPKM(meiotic-exit)")
r = compare_a_and_b(
    df_HS_FPKM.loc[df_HS_FPKM['meiotic-entry'] == True, :].index.tolist(),
    df_HS_FPKM.loc[df_HS_FPKM['meiotic-exit'] == True, :].index.tolist()
)
print("Entry: {:,d}".format(r['a']))
print("Exit: {:,d}".format(r['b']))
print("Entry & Exit: {:,d} ({:.2%})".format(r['a&b'], r['a&b']/r['a|b']))
print("Entry - Exit: {:,d} ({:.2%})".format(r['a-b'], r['a-b']/r['a|b']))
print("Exit - Entry: {:,d} ({:.2%})".format(r['b-a'], r['b-a']/r['a|b']))

Comparison: HS -> FPKM(meiotic-entry) with FPKM(meiotic-exit)
Entry: 13,412
Exit: 12,331
Entry & Exit: 12,069 (88.26%)
Entry - Exit: 1,343 (9.82%)
Exit - Entry: 262 (1.92%)


# Mus musculus

In [7]:
# Load FPKM and DGE tables.
df_MM_FPKM = pd.read_csv("../2-core_genes/results/MM-FPKM_genes.csv.gz", index_col='id_gene', usecols=['id_string', 'id_gene', 'gene', 'FPKM', 'TPM', 'Spermatocytes', 'biotype'])
# Selects only FPKM with TPM >= 1 and from Spermatocytes
df_MM_FPKM = df_MM_FPKM.loc[((df_MM_FPKM['TPM'] >= minLogTPM) & (df_MM_FPKM['Spermatocytes'] == True)), :]
df_MM_DGE = pd.read_csv("../2-core_genes/results/MM-DE_genes.csv.gz", index_col='id_gene', usecols=['id_string', 'id_gene', 'gene', 'Cyte_vs_Gonia', 'Tid_vs_Cyte'])
# Adds meiotic entry or exit information
df_MM_FPKM['meiotic-entry'] = df_MM_DGE['Cyte_vs_Gonia']
df_MM_FPKM['meiotic-exit'] = df_MM_DGE['Tid_vs_Cyte']

In [8]:
print("Comparison: MM -> FPKM(TPM >= 1) with DGE()")
r = compare_a_and_b(
    df_MM_FPKM.index.tolist(),
    df_MM_DGE.index.tolist()
)
print("FPKM: {:,d}".format(r['a']))
print("DGE: {:,d}".format(r['b']))
print("FPKM & DGE: {:,d} ({:.2%})".format(r['a&b'], r['a&b']/r['a|b']))
print("FPKM - DGE: {:,d} ({:.2%})".format(r['a-b'], r['a-b']/r['a|b']))
print("DGE - FPKM: {:,d} ({:.2%})".format(r['b-a'], r['b-a']/r['a|b']))

Comparison: MM -> FPKM(TPM >= 1) with DGE()
FPKM: 16,505
DGE: 14,958
FPKM & DGE: 13,287 (73.10%)
FPKM - DGE: 3,218 (17.70%)
DGE - FPKM: 1,671 (9.19%)


In [9]:
print("Comparison: MM -> FPKM(meiotic-entry) with FPKM(meiotic-exit)")
r = compare_a_and_b(
    df_MM_FPKM.loc[df_MM_FPKM['meiotic-entry'] == True, :].index.tolist(),
    df_MM_FPKM.loc[df_MM_FPKM['meiotic-exit'] == True, :].index.tolist()
)
print("Entry: {:,d}".format(r['a']))
print("Exit: {:,d}".format(r['b']))
print("Entry & Exit: {:,d} ({:.2%})".format(r['a&b'], r['a&b']/r['a|b']))
print("Entry - Exit: {:,d} ({:.2%})".format(r['a-b'], r['a-b']/r['a|b']))
print("Exit - Entry: {:,d} ({:.2%})".format(r['b-a'], r['b-a']/r['a|b']))

Comparison: MM -> FPKM(meiotic-entry) with FPKM(meiotic-exit)
Entry: 10,417
Exit: 12,801
Entry & Exit: 9,931 (74.74%)
Entry - Exit: 486 (3.66%)
Exit - Entry: 2,870 (21.60%)


# Drosophila Melanogaster

In [10]:
# Load FPKM and DGE tables.
df_DM_FPKM = pd.read_csv("../2-core_genes/results/DM-FPKM_genes.csv.gz", index_col='id_gene', usecols=['id_string', 'id_gene', 'gene', 'FPKM', 'TPM', 'Middle', 'biotype'])
# Selects only FPKM with TPM >= 1 and from Spermatocytes
df_DM_FPKM = df_DM_FPKM.loc[((df_DM_FPKM['TPM'] >= minLogTPM) & (df_DM_FPKM['Middle'] == True)), :]
df_DM_DGE = pd.read_csv("../2-core_genes/results/DM-DE_genes.csv.gz", index_col='id_gene', usecols=['id_string', 'id_gene', 'gene', 'Middle_vs_Apical', 'Basal_vs_Middle'])
# Adds meiotic entry or exit information
df_DM_FPKM['meiotic-entry'] = df_DM_DGE['Middle_vs_Apical']
df_DM_FPKM['meiotic-exit'] = df_DM_DGE['Basal_vs_Middle']

In [11]:
print("Comparison: DM -> FPKM(TPM >= 1) with DGE()")
r = compare_a_and_b(
    df_DM_FPKM.index.tolist(),
    df_DM_DGE.index.tolist()
)
print("FPKM: {:,d}".format(r['a']))
print("DGE: {:,d}".format(r['b']))
print("FPKM & DGE: {:,d} ({:.2%})".format(r['a&b'], r['a&b']/r['a|b']))
print("FPKM - DGE: {:,d} ({:.2%})".format(r['a-b'], r['a-b']/r['a|b']))
print("DGE - FPKM: {:,d} ({:.2%})".format(r['b-a'], r['b-a']/r['a|b']))

Comparison: DM -> FPKM(TPM >= 1) with DGE()
FPKM: 10,348
DGE: 11,349
FPKM & DGE: 9,637 (79.91%)
FPKM - DGE: 711 (5.90%)
DGE - FPKM: 1,712 (14.20%)


In [12]:
print("Comparison: DM -> FPKM(meiotic-entry) with FPKM(meiotic-exit)")
r = compare_a_and_b(
    df_DM_FPKM.loc[df_DM_FPKM['meiotic-entry'] == True, :].index.tolist(),
    df_DM_FPKM.loc[df_DM_FPKM['meiotic-exit'] == True, :].index.tolist()
)
print("Entry: {:,d}".format(r['a']))
print("Exit: {:,d}".format(r['b']))
print("Entry & Exit: {:,d} ({:.2%})".format(r['a&b'], r['a&b']/r['a|b']))
print("Entry - Exit: {:,d} ({:.2%})".format(r['a-b'], r['a-b']/r['a|b']))
print("Exit - Entry: {:,d} ({:.2%})".format(r['b-a'], r['b-a']/r['a|b']))

Comparison: DM -> FPKM(meiotic-entry) with FPKM(meiotic-exit)
Entry: 9,589
Exit: 9,469
Entry & Exit: 9,421 (97.76%)
Entry - Exit: 168 (1.74%)
Exit - Entry: 48 (0.50%)
