In [None]:
import sys

import json

import base64

sys.version_info

In [None]:
def load_file(path):
    '''
    objective:
        load json file
    input:
        path - path of json file
    output:
        data - dictionary of messages
    '''
    
    f = open(path)
    
    # returns JSON object as a dictionary
    data = json.load(f)
    
    # closing file
    f.close()
    
    return data
    
    
def add_to_markdown(markdown, line):
    '''
    objective:
        add another line to the markdown output
    input:
        markdown - compiled markdown to add to
        line - line to add to markdown
    output:
        markdown - compiled markdown with new line
    '''
        
    # add space
    markdown += '\n'
    # add line
    markdown += line
    
    return markdown
    

def create_markdown(data):
    '''
    objective:
        iterate through messages and format
    input:
        data - dictionary of messages
    output:
        markdown - compiled markdown of messages
    '''
    date = data['messages'][0]['created_date'][:-19]
    
    markdown = '###################################'

    markdown = add_to_markdown(markdown, date)
    
    for i in data['messages']:
        # check if new date
        if date != i['created_date'][:-19]:
            markdown = add_to_markdown(markdown, '###################################')
            date = i['created_date'][:-19]
            markdown = add_to_markdown(markdown, date)

        # try text
        try:
            # print name
            markdown = add_to_markdown(markdown, f"{i['creator']['name']}: {i['text']}")
        except:
            # print name
            markdown = add_to_markdown(markdown, f"{i['creator']['name']}:")

        # try attachment
        try:
            for attach in i['attached_files']:
                with open('Takeout/' + attach['export_name'][:47] + '.jpg', "rb") as img_file:
                    encoded_string = base64.b64encode(img_file.read()).decode('utf-8')
                markdown = add_to_markdown(markdown, f'<img src="data:image/png;base64,{encoded_string}" alt="Sample Image" />')
        except:
            pass

        # try reply
        try:
            markdown = add_to_markdown(markdown, f"  [IN REPLY TO]: \"{i['quoted_message_metadata']['text']}\" from {i['quoted_message_metadata']['creator']['name']}")

        except:
            pass

        # separate message
        finally:
            markdown = add_to_markdown(markdown, '')
            
    return markdown

def format_google_chat(path):
    
    # load the file
    data = load_file(path)
    
    # create the formatted message
    markdown_text = create_markdown(data)

    return markdown_text
    

In [None]:
# run function
markdown_text = format_google_chat('Takeout/messages.json')
print(markdown_text)

In [None]:
# PDF method 1
# need to install wkhtmltopdf
# seems like this is deprecated though
! pip install markdown2 
! pip install pdfkit 

import markdown2
import pdfkit

html_text = markdown2.markdown(markdown_text)
pdfkit.from_string(html_text, "output.pdf")

In [None]:
# PDF method 2
from fpdf import FPDF

pdf = FPDF()


pdf.add_page()

# Set font - 'Arial', 'B' for bold, 16 for font size
pdf.add_font("DejaVu", '', 'Apple LiGothic.ttf', uni=True)
pdf.set_font('DejaVu', '', 16)

# Add a cell with a title
pdf.cell(200, 10, txt=markdown_text, ln=True, align='C')

# Set font for the rest of the text
pdf.set_font('Arial', '', 12)

# Add some more text
# pdf.cell(200, 10, txt="This is a simple PDF file generated using Python.", ln=True, align='C')

# Save the file
pdf.output("example.pdf")

print("PDF generated successfully!")

In [None]:
# PDF method 3
! pip install markdown
! pip install weasyprint

import markdown
from weasyprint import HTML

# Convert Markdown to HTML
html = markdown.markdown(markdown_text)

# Convert HTML to PDF
HTML(string=html).write_pdf('output.pdf')

In [None]:
# text file method 1
with open("output.txt", "w") as text_file:
    text_file.write(markdown_text)

In [None]:
# priorities:
# 1. export to PDF
# 2. show image inline
# 3. show GIFs
# 4. hyperlink to attachments
# 5. format bold, italic, etc.