In [1]:
# Base imports
import os
import pickle
import re

# Compute imports
import numpy as np
import pandas as pd
import scipy
from tqdm.notebook import tqdm, trange

# Plotting imports
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
from plotly import express as px
import matplotlib.patches as mpatches

# ML import
from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error, median_absolute_error
from sklearn.metrics.pairwise import cosine_similarity

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['svg.fonttype'] = 'none'
matplotlib.rcParams['font.sans-serif'] = 'Arial'
matplotlib.rcParams['font.family'] = 'sans-serif'
sns.set_style('ticks')
matplotlib.rcParams['text.color'] = '#000000'
matplotlib.rcParams['axes.labelcolor'] = '#000000'
matplotlib.rcParams['xtick.color'] = '#000000'
matplotlib.rcParams['ytick.color'] = '#000000'

In [2]:
DF_GENES = '../../data/processed/cd-hit-results/sim80/Ebacter_strain_by_gene.pickle.gz'
ENRICHED_METADATA = '../../data/metadata/enriched_metadata.csv'
DF_EGGNOG = '../../data/processed/df_eggnog.csv'

DF_CORE_COMPLETE = '../../data/processed/CAR_genomes/df_core_complete.pickle'
DF_ACC_COMPLETE = '../../data/processed/CAR_genomes/df_acc_complete.pickle'
DF_RARE_COMPLETE = '../../data/processed/CAR_genomes/df_rare_complete.pickle'

L_BINARIZED = '../../data/processed/nmf-outputs/L_binarized.csv'
A_BINARIZED = '../../data/processed/nmf-outputs/A_binarized.csv'
L_MATRIX = '../../data/processed/nmf-outputs/L.csv'
A_MATRIX = '../../data/processed/nmf-outputs/A.csv'
BAKTA_ANNOTATIONS = '../../data/processed/bakta_gene_annotations.csv'

In [3]:
bakta_annotations = pd.read_csv(BAKTA_ANNOTATIONS, index_col=0)

In [4]:
gene_locs_acc = pd.read_csv('acc_gene_location.csv', index_col=0)
gene_locs = pd.read_csv('complete_gene_location.csv', index_col=0)

In [5]:
df_rare = pd.read_pickle(DF_RARE_COMPLETE)
df_acc = pd.read_pickle(DF_ACC_COMPLETE)
df_core = pd.read_pickle(DF_CORE_COMPLETE)

In [6]:
metadata = pd.read_csv(ENRICHED_METADATA, index_col=0, dtype='object')

display( metadata.shape, metadata.head())

(2575, 31)

Unnamed: 0,genome_id,genome_name,taxon_id,genome_status,strain,completion_date,bioproject_accession,biosample_accession,assembly_accession,genbank_accessions,...,refseq_cds,isolation_source,collection_date,isolation_country,geographic_location,host_name,comments,additional_metadata,complete_mash_cluster,mlst
0,158836.1771,Enterobacter hormaechei FUJ80149,158836,WGS,FUJ80149,2022-06-04T00:00:00Z,PRJDB13516,SAMD00490860,,BRIC00000000,...,,not collected,2019-12-22,Japan,Japan,Homo sapiens,,,,78
1,158836.2297,Enterobacter hormaechei EC50,158836,WGS,EC50,2023-03-20T00:00:00Z,PRJNA944415,SAMN33748124,,JARJGB000000000,...,5227.0,,2015,China,China: Sichuan,Homo sapiens,,collected_by:Lizhang Liu,,200
2,61645.609,Enterobacter asburiae C210030,61645,WGS,C210030,1900-01-01T00:00:00Z,PRJNA837096,SAMN28189480,GCA_023753065.1,JAMGNC000000000,...,4727.0,,2021,China,China: Zhejiang,Homo sapiens,,sample_type:whole organism,,484
3,158836.809,Enterobacter hormaechei strain RHBSTW-00198,158836,Complete,RHBSTW-00198,2020-07-27T00:00:00Z,PRJNA605147,SAMN15148534,GCA_013744415.1,"CP056756,CP056757,CP056758,CP056759",...,4561.0,Freshwater sample from downstream of wastewate...,2017,United Kingdom,United Kingdom,,These isolates represent a dataset assembled b...,sample_type:culture;biomaterial_provider:Moder...,3.0,-1
4,158836.1952,Enterobacter hormaechei ECL66,158836,WGS,ECL66,2022-07-03T00:00:00Z,PRJNA846540,SAMN28906575,GCA_024128555.1,JAMYCZ000000000,...,4784.0,sputum,2019-10-14,China,China: Chongqing,Homo sapiens,,collected_by:Department of Microbiology,,177


In [7]:
# Load in (full) P matrix
df_genes = pd.read_pickle(DF_GENES)

# Filter metadata for Complete sequences only
metadata_complete = metadata[metadata.genome_status == 'Complete'] # filter for only Complete sequences

# Filter P matrix for Complete sequences only
df_genes_complete = df_genes[metadata_complete.genome_id].copy()
df_genes_complete.fillna(0, inplace=True) # replace N/A with 0
df_genes_complete = df_genes_complete.sparse.to_dense().astype('int8') # densify & typecast to int8 for space and compute reasons
inCompleteseqs = df_genes_complete.sum(axis=1) > 0 # filter for genes found in complete sequences
df_genes_complete = df_genes_complete[inCompleteseqs]

df_genes_complete.shape

(67539, 473)

In [8]:
# Load in eggNOG annotations
df_eggnog = pd.read_csv(DF_EGGNOG, index_col=0)
df_eggnog.fillna('-', inplace=True)

display(
    df_eggnog.shape,
    df_eggnog.head()
)

  df_eggnog = pd.read_csv(DF_EGGNOG, index_col=0)


(128358, 21)

Unnamed: 0_level_0,allele,seed_ortholog,evalue,score,eggNOG_OGs,max_annot_lvl,COG_category,Description,Preferred_name,GOs,...,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMs
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ebacter_C27603,Ebacter_C27603A0,716541.ECL_01176,0.0,618.0,"COG0667@1|root,COG0667@2|Bacteria,1MV2Y@1224|P...",2|Bacteria,C,Aldo/keto reductase family,yajO,"GO:0003674,GO:0003824,GO:0005575,GO:0005622,GO...",...,ko:K05882,-,-,-,-,"ko00000,ko01000",-,-,-,Aldo_ket_red
Ebacter_C6462,Ebacter_C6462A0,1045856.EcWSU1_00942,0.0,1217.0,"COG1154@1|root,COG1154@2|Bacteria,1MUSJ@1224|P...",2|Bacteria,H,Catalyzes the acyloin condensation reaction be...,dxs,"GO:0000287,GO:0003674,GO:0003824,GO:0005488,GO...",...,ko:K01662,"ko00730,ko00900,ko01100,ko01110,ko01130,map007...",M00096,R05636,RC00032,"ko00000,ko00001,ko00002,ko01000",-,-,"iEcSMS35_1347.EcSMS35_0456,iIT341.HP0354,iJN74...","DXP_synthase_N,Transket_pyr,Transketolase_C"
Ebacter_C31898,Ebacter_C31898A0,716541.ECL_01178,0.0,553.0,"COG0142@1|root,COG0142@2|Bacteria,1MWNG@1224|P...",2|Bacteria,H,Belongs to the FPP GGPP synthase family,ispA,"GO:0003674,GO:0003824,GO:0004161,GO:0004337,GO...",...,"ko:K00795,ko:K02523,ko:K13789","ko00900,ko01100,ko01110,ko01130,map00900,map01...","M00364,M00366","R01658,R02003,R02061,R09248",RC00279,"ko00000,ko00001,ko00002,ko01000,ko01006",-,-,"iPC815.YPO3176,iSFV_1184.SFV_0386",polyprenyl_synt
Ebacter_C44055,Ebacter_C44055A0,693444.D782_3430,0.0,306.0,"COG0363@1|root,COG0363@2|Bacteria,1R8UH@1224|P...",2|Bacteria,G,glucosamine-6-phosphate deaminase,nagB,"GO:0003674,GO:0003824,GO:0005575,GO:0005622,GO...",...,"ko:K01057,ko:K02080,ko:K02564","ko00030,ko00052,ko00520,ko01100,ko01110,ko0112...","M00004,M00006,M00008","R00765,R02035,R08365","RC00163,RC00537","ko00000,ko00001,ko00002,ko01000",-,-,"iB21_1397.B21_02959,iECB_1328.ECB_03008,iECD_1...",Glucosamine_iso
Ebacter_C14404,Ebacter_C14404A0,693444.D782_3429,0.0,768.0,"COG1486@1|root,COG1486@2|Bacteria,1NI6G@1224|P...",2|Bacteria,G,Catalyzes the fromation of N-acetyl-D-glucosam...,celF,-,...,"ko:K01222,ko:K01232","ko00010,ko00500,map00010,map00500",-,"R00837,R00838,R00839,R05133,R05134,R06113","RC00049,RC00171,RC00714","ko00000,ko00001,ko01000",-,"GH4,GT4",-,"Glyco_hydro_4,Glyco_hydro_4C"


In [9]:
# Load in A_binarized matrix
A_binarized = pd.read_csv(A_BINARIZED, index_col=0)
A_binarized

Unnamed: 0,158836.809,550.1141,1686399.7,1296536.241,550.3830,1812935.464,1333851.3,208224.404,158836.2021,158836.2356,...,158836.801,550.2813,158836.2201,550.2254,299766.198,1045856.3,299766.187,158836.1100,299766.116,158836.2022
hormaechei-steigerwaltii-1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
roggenkampii,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
hormaechei-hoffmannii-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
unchar-1,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
ludwigii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
hormaechei-xiangfangensis,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
asburiae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
kobei,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
cloacae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
hormaechei-oharae,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# Load in L_binarized matrix
L_binarized = pd.read_csv(L_BINARIZED, index_col=0)
L_binarized

Unnamed: 0,hormaechei-steigerwaltii-1,roggenkampii,hormaechei-hoffmannii-1,unchar-1,ludwigii,hormaechei-xiangfangensis,asburiae,kobei,cloacae,hormaechei-oharae,hormaechei-steigerwaltii-2,hormaechei-steigerwaltii-3,unchar-2,hormaechei-hormaechei,unchar-3,bugandensis,cancerogenous,unchar-4,hormaechei-hoffmannii-2
Ebacter_C15853,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ebacter_C51247,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
Ebacter_C100996,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ebacter_C20249,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ebacter_C95262,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ebacter_C76682,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ebacter_C440,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ebacter_C48006,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
Ebacter_C8929,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [11]:
phylon_order = ['hormaechei-xiangfangensis',
 'hormaechei-oharae',
 'hormaechei-steigerwaltii-2',
 'hormaechei-steigerwaltii-1',
 'hormaechei-steigerwaltii-3',
 'hormaechei-hormaechei',
 'hormaechei-hoffmannii-1',
 'hormaechei-hoffmannii-2',
 'unchar-1',
 'unchar-2',
 'unchar-3',
 'unchar-4',
 'roggenkampii',
 'asburiae',
 'kobei',
 'bugandensis',
 'cancerogenous',
 'ludwigii',
 'cloacae']

characterized_order = ['hormaechei-xiangfangensis',
 'hormaechei-oharae',
 'hormaechei-steigerwaltii-2',
 'hormaechei-steigerwaltii-1',
 'hormaechei-steigerwaltii-3',
 'hormaechei-hormaechei',
 'hormaechei-hoffmannii-1',
 'hormaechei-hoffmannii-2',
 'roggenkampii',
 'asburiae',
 'kobei',
 'bugandensis',
 'cancerogenous',
 'ludwigii',
 'cloacae']

# Analysis of motility genes of Steigerwaltii phylons

In [12]:
def get_strains(phylon, A_binarized = A_binarized):
    phylon_membership = A_binarized.loc[phylon]
    return (phylon_membership[phylon_membership == 1]).index

def get_genes(phylon, L_binarized = L_binarized):
    return [x for x in L_binarized.index if L_binarized.loc[x, phylon] > 0]

def get_shared_genes(phylons, L_binarized = L_binarized):
    if type(phylons) == type(""):
        phylons = [phylons]
    return [x for x in L_binarized.index if L_binarized.loc[x,phylons].sum() == len(phylons) and L_binarized.drop(phylons, axis = 1).loc[x].sum() == 0]

In [13]:
phylons = ['hormaechei-steigerwaltii-3', 'hormaechei-steigerwaltii-1']

In [14]:
shared_genes = get_shared_genes(phylons)

isMotility1 = df_eggnog.loc[shared_genes].COG_category.str.contains('N')
isMotility2 = df_eggnog.loc[shared_genes].Description.str.contains('pilus')
isMotility3 = df_eggnog.loc[shared_genes].Description.str.contains('pili')

steigerwaltii13_motility = df_eggnog.loc[shared_genes][isMotility1 | isMotility2 | isMotility3]
steigerwaltii13_motility.shape[0]

43

In [15]:
steigerwaltii13_motility

Unnamed: 0_level_0,allele,seed_ortholog,evalue,score,eggNOG_OGs,max_annot_lvl,COG_category,Description,Preferred_name,GOs,...,KEGG_ko,KEGG_Pathway,KEGG_Module,KEGG_Reaction,KEGG_rclass,BRITE,KEGG_TC,CAZy,BiGG_Reaction,PFAMs
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ebacter_C81012,Ebacter_C81012A0,95619.PM1_0219485,1e-06,53.1,"COG3190@1|root,COG3190@2|Bacteria,1N79Z@1224|P...",2|Bacteria,N,flagellar,fliO,-,...,ko:K02418,"ko02040,map02040",-,-,-,"ko00000,ko00001,ko02035,ko02044",3.A.6.2,-,-,FliO
Ebacter_C17706,Ebacter_C17706A0,716541.ECL_03274,0.0,524.0,"COG1749@1|root,COG1749@2|Bacteria,1MU5J@1224|P...",2|Bacteria,N,Flagellar basal body protein FlaE,flgE,-,...,ko:K02390,"ko02040,map02040",-,-,-,"ko00000,ko00001,ko02035",-,-,-,"FlaE,Flg_bb_rod,Flg_bbr_C"
Ebacter_C62234,Ebacter_C62234A0,716541.ECL_03296,0.0,223.0,"COG1580@1|root,COG1580@2|Bacteria,1PX2D@1224|P...",2|Bacteria,N,Controls the rotational direction of flagella ...,fliL,-,...,ko:K02415,-,-,-,-,"ko00000,ko02035",-,-,-,FliL
Ebacter_C74657,Ebacter_C74657A0,716541.ECL_03280,0.0,211.0,"COG3418@1|root,COG3418@2|Bacteria,1QR4K@1224|P...",2|Bacteria,N,FlgN protein,flgN,-,...,ko:K02399,"ko02040,map02040",-,-,-,"ko00000,ko00001,ko02035",-,-,-,FlgN
Ebacter_C26995,Ebacter_C26995A0,716541.ECL_03301,0.0,571.0,"COG1536@1|root,COG1536@2|Bacteria,1MV9X@1224|P...",2|Bacteria,N,"FliG is one of three proteins (FliG, FliN, Fli...",fliG,"GO:0001539,GO:0003674,GO:0005488,GO:0005515,GO...",...,ko:K02410,"ko02030,ko02040,map02030,map02040",-,-,-,"ko00000,ko00001,ko02035",-,-,-,"FliG_C,FliG_M,FliG_N"
Ebacter_C77936,Ebacter_C77936A0,716541.ECL_03311,0.0,166.0,"2DQI4@1|root,33700@2|Bacteria,1NAB2@1224|Prote...",2|Bacteria,N,Flagellar protein FlhE,-,-,...,ko:K03516,-,-,-,-,"ko00000,ko02035",-,-,-,FlhE
Ebacter_C45587,Ebacter_C45587A0,716541.ECL_03300,0.0,253.0,"COG1317@1|root,COG1317@2|Bacteria,1NMQE@1224|P...",2|Bacteria,N,Flagellar assembly protein FliH,fliH,-,...,ko:K02411,"ko02040,map02040",-,-,-,"ko00000,ko00001,ko02035,ko02044",3.A.6.2,-,-,FliH
Ebacter_C20772,Ebacter_C20772A47,716541.ECL_03270,1.4e-231,642.0,"COG1706@1|root,COG1706@2|Bacteria,1MVKW@1224|P...",2|Bacteria,N,Assembles around the rod to form the L-ring an...,flgI,-,...,ko:K02394,"ko02040,map02040",-,-,-,"ko00000,ko00001,ko02035",-,-,-,FlgI
Ebacter_C45044,Ebacter_C45044A0,701347.Entcl_0630,0.0,322.0,"COG3121@1|root,COG3121@2|Bacteria,1R3T9@1224|P...",2|Bacteria,NU,"Pili assembly chaperone PapD, C-terminal domain",elfD,"GO:0008150,GO:0009987,GO:0016043,GO:0030030,GO...",...,ko:K07346,-,-,-,-,"ko00000,ko02035,ko02044,ko03110",-,-,-,"PapD_C,PapD_N"
Ebacter_C87336,Ebacter_C87336A0,716541.ECL_03264,0.0,205.0,"2AX91@1|root,31P80@2|Bacteria,1RJCE@1224|Prote...",2|Bacteria,N,Functions in complex with FlhC as a master tra...,flhD,-,...,ko:K02403,"ko02020,ko02024,ko02026,ko02040,map02020,map02...",-,-,-,"ko00000,ko00001,ko02035",-,-,-,FlhD


In [16]:
X = L_binarized[characterized_order].T
links = scipy.cluster.hierarchy.linkage(X, method="ward")


clusters = {x:[x] for x in range(len(X.index))}

for i, (left, right, _, _) in enumerate(links):
    clusters[len(clusters)] = clusters[left] + clusters[right]


def track_split(cluster, clusters, links):
    if len(clusters[cluster]) == 1:
        return cluster

    row = cluster - len(links) - 1
    left_child = int(links[row][0])
    right_child = int(links[row][1])
    
    return {cluster:{left_child:track_split(left_child, clusters, links), right_child:track_split(right_child, clusters, links)}}


split_tree = track_split(max(clusters.keys()), clusters, links)

In [17]:
def get_gene_sets(df_genes, splits, clusters, value):
    cluster_members = clusters[value]
    cluster_size = len(cluster_members)

    df_array = df_genes.values
    member_cols = np.array(cluster_members)
    other_cols = np.array([x for x in range(df_genes.shape[1]) if x not in member_cols])

    if other_cols.size == 0:  # Handle edge case
        unique_genes = (df_array[:, member_cols] == 1).all(axis=1)
        shared_genes = (df_array[:, member_cols] == 1).any(axis=1)
        return(unique_genes.sum(), shared_genes.sum())
    
    unique_genes = (df_array[:, member_cols] == 1).all(axis=1) & (df_array[:, other_cols] == 0).all(axis=1)
    shared_genes = (df_array[:, member_cols] == 1).any(axis=1) & (df_array[:, other_cols] == 0).all(axis=1)
    
    return (unique_genes.sum(), shared_genes.sum())


gene_P = L_binarized[characterized_order]
gene_P.columns = list(range(len(gene_P.columns)))


gene_values = pd.DataFrame(index = clusters.keys(), columns = ['unique_genes', 'shared_genes'])

for cluster in tqdm(list(clusters.keys())):
    gene_values.loc[cluster] = get_gene_sets(gene_P, split_tree, clusters, cluster)

  0%|          | 0/29 [00:00<?, ?it/s]

In [18]:
gene_values

Unnamed: 0,unique_genes,shared_genes
0,29,29
1,38,38
2,94,94
3,1,1
4,109,109
5,139,139
6,102,102
7,11,11
8,108,108
9,62,62
