In [1]:
import pandas as pd

In [45]:
df = pd.read_csv("../data/genes_marker_ovary.csv")

In [46]:
df = (
    df[df["group"].isin(["fibroblast", "lymphocyte", "endothelial cell", "plasma cell", "fallopian tube secretory epithelial cell"])]
    .groupby("group")["gene"]
    .apply(lambda x: ", ".join(sorted(x)))  # Sort genes and join as a string
    .sort_index()  # Sort by index
)

# Rename index and column for better formatting
df.index.name = "cell type"
df.name = "Top 20 genes"

# Generate LaTeX table
print(df.to_latex(column_format="p{3cm}p{10cm}"))

\begin{tabular}{p{3cm}p{10cm}}
\toprule
 & Top 20 genes \\
cell type &  \\
\midrule
endothelial cell & A2M, ADGRL4, APP, CD34, CD93, CDH5, CLEC14A, COL4A1, COL4A2, EGFL7, ENG, GNG11, HSPG2, IGFBP7, INSR, PECAM1, RAMP2, SPARCL1, SPTBN1, VWF \\
fallopian tube secretory epithelial cell & BCAM, CD24, CD9, CLDN3, DSP, ELF3, EPCAM, FOLR1, KRT18, KRT19, KRT7, KRT8, MAL2, MSLN, MUC1, RPL8, S100A13, SLPI, SPINT2, WFDC2 \\
fibroblast & AEBP1, C1R, C1S, CALD1, COL1A1, COL1A2, COL3A1, COL5A1, COL5A2, COL6A1, COL6A2, COL6A3, CTHRC1, DCN, LGALS1, LUM, MMP2, PCOLCE, RARRES2, SPARC \\
lymphocyte & ACAP1, BTG1, CCL5, CD2, CD3D, CD3E, CD3G, CD52, CD69, CORO1A, CXCR4, ETS1, EVL, GZMA, HCST, IL32, NKG7, PTPRC, TRAC, TSC22D3 \\
plasma cell & CD79A, DERL3, FCRL5, FKBP11, FKBP2, HERPUD1, IGHG1, IGHG3, IGHG4, IGKC, JCHAIN, MZB1, PIM2, PRDX4, SEC11C, SPCS3, SSR4, TENT5C, TNFRSF17, XBP1 \\
\bottomrule
\end{tabular}

