In [9]:
import ast
import os
import pandas as pd

# from docx import Document
# from docx.shared import Inches, RGBColor

from pylatexenc.latex2text import LatexNodes2Text

In [3]:
product_name = 'mel_swan'
project_name = 'diygenomics'

original_file = '2021_Wightman-Posthuma_A_genomewide_association_study_with_112_563_individuals_identifies_new_risk_loci_for_Alzheimers_disease'
external_id = '2023_05_02_27142069922ab9506d3dg'

data_path = os.getenv('DATA_PATH')
file_path = lambda *args: os.path.join(data_path, 'eric-client-projects', product_name, project_name, 'experiment-a', 
                                       original_file, 'mathpix', *args)

index_col = 'uuid'

summary_file = 'summary_of_summaries'
topic_file = 'topics'
image_results = 'image_results.csv'

In [4]:
df_image_results = pd.read_csv(file_path(external_id, image_results), index_col=index_col)

In [24]:
mathpix_response = ast.literal_eval(df_image_results.iloc[6]['mathpix_response'])
# LatexNodes2Text().latex_to_text(mathpix_response['latex_styled'])
mathpix_response['latex_styled']

'\\begin{array}{|c|c|c|c|c|c|c|c|}\n\\hline \\text { Genomic locus } & \\text { Gene } & \\text { Position (GRCh37) } & \\text { Lead variant } & \\text { A1 } & A 1 \\text { frequency } & P \\text { value } & N \\\\\n\\hline 1 & \\text { AGRN } & \\text { 1:985,377 } & \\text { rs113020870 } & \\mathrm{T} & 0.0041 & 3.83 \\times 10^{-8} & 776,379 \\\\\n\\hline 2 & \\text { CR1 } & 1: 207,750,568 & \\text { rs679515 } & \\text { C } & 0.82 & 2.42 \\times 10^{-25} & 762,176 \\\\\n\\hline 3 & \\text { NCK2 } & 2: 106,235,428 & \\text { rs115186657 } & \\text { C } & 0.0035 & 1.33 \\times 10^{-8} & 727,537 \\\\\n\\hline 4 & \\text { BIN1 } & \\text { 2:127,891,427 } & \\text { rs4663105 } & \\text { C } & 0.41 & 3.92 \\times 10^{-58} & 1,078,540 \\\\\n\\hline 5 & \\text { INPPD5 } & 2: 234,082,577 & \\text { rs7597763 } & \\text { C } & 0.45 & 4.65 \\times 10^{-9} & 819,541 \\\\\n\\hline 6 & \\text { CLNK } & 4: 11,014,822 & \\text { rs4504245 } & \\text { G } & 0.79 & 5.23 \\times 10^{-1

In [None]:
file_list = os.listdir(file_path('charts'))
df_charts = pd.DataFrame({'filename': file_list})

topics_files = df_charts[df_charts['filename'].str.startswith('topic_')]
topics_files = topics_files[~topics_files['filename'].str.contains(topic_file)]
topics_files = sorted(topics_files['filename'].tolist())

In [None]:
with open(file_path(f'{summary_file}.txt'), 'r') as file:
    main_summary = file.read()

In [None]:
document = Document()
word_file = file_path('insights.docx')

In [None]:
document.add_heading(f'Insights for {project_name.capitalize()}')
document.add_heading('Summary', level=2)
document.add_paragraph(main_summary)

In [None]:
table = document.add_table(rows=4, cols=1)
p = table.rows[2].cells[0].paragraphs[0]
run = p.add_run('Topics')
run.bold = True
font = run.font
font.color.rgb = RGBColor(0x42, 0x24, 0xE9)
sentiment_container = table.rows[3].cells[0].add_paragraph().add_run()
sentiment_container.add_picture(file_path('charts', f'{topic_file}_{i}_star.png'), width=Inches(6.0))

In [None]:
document.save(word_file) 