In [170]:
import pandas as pd

df = pd.read_csv('a_data.csv')

In [171]:
import re

def cleanify(input_string):
    """
    Remove newline characters, plus signs, minus signs, brackets,
    consecutive dots, and consecutive spaces from the input string.
    
    Args:
    input_string (str): The string to be cleaned.
     
    Returns:
    str: The cleaned string.
    """
    # Define the pattern to remove specific characters
    pattern = r'[\n+\-\[\]\(\)^]'
    cleaned_string = re.sub(pattern, '', input_string)
    
    # Remove consecutive dots
    cleaned_string = re.sub(r'\.{2,}', '.', cleaned_string)
    
    # Remove consecutive spaces
    cleaned_string = re.sub(r'\s{2,}', ' ', cleaned_string)
    
    return cleaned_string.strip()

In [172]:
df['changes'] = df['changes'].apply(cleanify)

In [179]:
df = df.sample(10)

In [180]:
filtered_df = df[df['originalContent'].str.split().str.len() > 2]
filtered_df = filtered_df[~filtered_df['originalContent'].str.contains(r'(\.\.+|,,+)$')]

  filtered_df = filtered_df[~filtered_df['originalContent'].str.contains(r'(\.\.+|,,+)$')]


In [181]:
filtered_df

Unnamed: 0,originalContent,correctedSentence,changes,temp_lower
25,"other words, hx0,1 directly.","In other words, hx is 0 or 1 directly.","<b> In </b> other words, <b> hx is 0 or 1 </b>...","<b> in </b> other words, <b> hx is 0 or 1 </b>..."
54,"by, xy0N0,xy1N1,yBernoulli 0, classes, vectors 1.","By the way, xy0 ~ N0, xy1 ~ N1, and y ~ Bernou...","<b> By the way, xy0 ~ N0, xy1 ~ N1, and y ~ Be...","<b> by the way, xy0 ~ n0, xy1 ~ n1, and y ~ be..."
87,"Looking deeper y0,1 rule, argmaxyPyxargmaxyPxy...","Looking deeper, we can see that the rule for p...","Looking <b> deeper, we can see that the rule f...","looking <b> deeper, we can see that the rule f..."
122,discuss superior viceversa.,"We discuss which one is superior, and vice versa.","<b> We </b> discuss <b> which one is superior,...","<b> we </b> discuss <b> which one is superior,..."
40,A standard 0.6I.,A standard deviation of 0.6I.,A standard <b> deviation of </b> 0.6I.,a standard <b> deviation of </b> 0.6i.
125,"3. converse, implication does Gaussian.","The converse is also true, and the implication...","<b> The converse is also true, and the </b> im...","<b> the converse is also true, and the </b> im..."
43,This because always integrates area under so s...,This is because the area under the curve alway...,This <b> is </b> because <b> the area under th...,this <b> is </b> because <b> the area under th...
100,Discussion Comparing interesting relationship.,The discussion compares the interesting relati...,<b> The discussion compares the </b> interesti...,<b> the discussion compares the </b> interesti...


In [182]:
filtered_df.iloc[3]['changes']

'<b> We </b> discuss <b> which one is superior, and vice versa. </b>'

In [183]:
data = filtered_df.to_dict('records')

In [184]:

data

[{'originalContent': 'other words, hx0,1 directly.',
  'correctedSentence': 'In other words, hx is 0 or 1 directly.',
  'changes': '<b> In </b> other words, <b> hx is 0 or 1 </b> directly.',
  'temp_lower': '<b> in </b> other words, <b> hx is 0 or 1 </b> directly.'},
 {'originalContent': 'by, xy0N0,xy1N1,yBernoulli 0, classes, vectors 1.',
  'correctedSentence': 'By the way, xy0 ~ N0, xy1 ~ N1, and y ~ Bernoulli(0, 1), where the classes are represented by vectors of length 1.',
  'changes': '<b> By the way, xy0 ~ N0, xy1 ~ N1, and y ~ Bernoulli0, 1, where the classes are represented by </b> vectors <b> of length </b> 1.',
  'temp_lower': '<b> by the way, xy0 ~ n0, xy1 ~ n1, and y ~ bernoulli(0, 1), where the classes are represented by </b> vectors <b> of length </b> 1.'},
 {'originalContent': 'Looking deeper y0,1 rule, argmaxyPyxargmaxyPxyPyPx',
  'correctedSentence': 'Looking deeper, we can see that the rule for predicting y is given by the argmax of Pyx, which is equal to the argmax 

In [185]:
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, ListFlowable, ListItem, Image
from reportlab.graphics.shapes import Drawing, Rect
from svglib.svglib import svg2rlg
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
import datetime

# Register custom fonts (use TTF versions if possible)
pdfmetrics.registerFont(TTFont('Aptos', './fonts/Fonts/Aptos.ttf'))
pdfmetrics.registerFont(TTFont('AptosDisplay', './fonts/Fonts/Aptos-Display.ttf'))
pdfmetrics.registerFont(TTFont('AptosDisplay-Bold', './fonts/Fonts/Aptos-Display-Bold.ttf'))



random_sentences = [
    "Dr Kaher put me at ease, even though very anxious, did <b>amazing</b> job my...",
"The <b>service</b> so good Kotecha, recommended change dentist.",
"Fantastic <b>service</b>, professional top notch technology."
]

# Function to create a drawing with a square
def create_square():
    drawing = Drawing(20, 20)
    square = Rect(0, 0, 20, 20, strokeColor=colors.black, fillColor=colors.white)
    drawing.add(square)
    return drawing

# Function to center the square within a table cell
def centered_square():
    square = create_square()
    centered_square_table = Table([[Spacer(1, 0.25*inch)], [square], [Spacer(1, 0.25*inch)]])
    centered_square_table.setStyle(TableStyle([
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
    ]))
    return centered_square_table

# Function to process the text and apply color to bold text
def process_text_with_bold(text):
    parts = text.split('<b>')
    processed_parts = [parts[0]]
    for part in parts[1:]:
        subparts = part.split('</b>')
        processed_parts.append(f'<font color="#f5c949"><b>{subparts[0]}</b></font>{subparts[1]}')
    return ''.join(processed_parts)



In [186]:
def create_pdf(file_name):
    doc = SimpleDocTemplate(file_name, pagesize=letter, leftMargin=0.5*inch, rightMargin=0.5*inch, topMargin=0.5*inch, bottomMargin=0.5*inch)
    styles = getSampleStyleSheet()
    styleN = ParagraphStyle(name='Normal', fontName='Aptos', fontSize=12)
    styleLN = ParagraphStyle(name='AptosDisplay', fontName='Aptos', fontSize=26)
    styleAptosDisplay = ParagraphStyle(name='AptosDisplay', fontName='AptosDisplay', fontSize=14)
    styleAptosDisplayBold = ParagraphStyle(name='AptosDisplay-Bold', fontName='AptosDisplay-Bold', fontSize=14)
    styleTitle = ParagraphStyle(name='Title', fontName='AptosDisplay-Bold', fontSize=24, spaceAfter=20)
    styleFooter = ParagraphStyle(name='Footer', fontName='Aptos', fontSize=12)

    elements = []

    # Add the logo image at the top left
    logo_path = "./imgs/image.png"  # Replace with the actual path to your logo image
    logo = Image(logo_path)
    logo.drawWidth = 1.5 * inch
    logo.drawHeight = logo.drawWidth * 0.6  # Adjust height proportionally to maintain aspect ratio
    logo.hAlign = 'LEFT'
    elements.append(logo)

    # Add the line with website link on the left and current date time on the right
    current_datetime = datetime.datetime.now().strftime("%d/%m/%Y %H:%M GMT")
    footer_data = [
        [Paragraph("www.smilecliniq.com", styleFooter), Spacer(1, 0.1*inch), Paragraph(current_datetime, styleFooter)]
    ]
    footer_table = Table(footer_data, colWidths=[2.5*inch, 3*inch, 2.5*inch])
    footer_table.setStyle(TableStyle([
        ('ALIGN', (0, 0), (0, 0), 'LEFT'),
        ('ALIGN', (2, 0), (2, 0), 'LEFT'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE')
    ]))
    elements.append(footer_table)
    elements.append(Spacer(1, 0.2 * inch))

    # Add the page title below the logo
    elements.append(Spacer(1, 0.2 * inch))  # Adjusted upward
    title = Paragraph("Language Audit", styleLN)
    elements.append(title)
    elements.append(Spacer(1, 0.5 * inch))  # Added gap

    # Load the SVG image
    svg_path = "imgs/discover-icon.svg"  # Replace with the actual path to your SVG image
    svg_drawing = svg2rlg(svg_path)
    svg_drawing.scale(1.0, 1.0)  # Adjust scaling as needed

    # Caution Sentences Header Data
    caution_header_data = [
        [svg_drawing, Paragraph("Caution Sentences", styleAptosDisplayBold)]
    ]

    # Caution Sentences Body Data
    list_items = [ListItem(Paragraph(process_text_with_bold(sentence), styleN)) for sentence in random_sentences]
    caution_body_data = [
        ['', ListFlowable(list_items, bulletType='bullet', start='circle', leftIndent=0.25*inch)]
    ]

    # Combine Caution Sentences Header and Body Data
    caution_data = caution_header_data + caution_body_data

    # Create caution table
    caution_table = Table(caution_data, colWidths=[1*inch, 6*inch])
    caution_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, -1), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTSIZE', (1, 1), (-1, 1), 14),
        ('BOTTOMPADDING', (1, 1), (-1, 1), 12),
        ('BACKGROUND', (1, 2), (-1, -1), colors.white),  # Set the background to white
        ('GRID', (0, 0), (-1, -1), 0, colors.white),     # Remove the grid
    ]))

    elements.append(caution_table)
    elements.append(Spacer(1, 0.5*inch))

    # Header Data
    header_data = [
        [svg_drawing, Paragraph("Language Suggestion", styleAptosDisplayBold), ""]
    ]

    # Table Data
    table_data = [['', Paragraph('Original Content', styleAptosDisplayBold), Paragraph('Changes', styleAptosDisplayBold)]]

    for item in data:
        row = [
            centered_square(),
            Paragraph(item['originalContent'], styleN),
            Paragraph(process_text_with_bold(item['changes']), styleN)
        ]
        table_data.append(row)

    # Combine Header and Table Data
    combined_data = header_data + table_data

    # Create table
    table = Table(combined_data, colWidths=[1*inch, 3*inch, 3*inch])
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, -1), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTSIZE', (1, 1), (-1, 1), 14),
        ('BOTTOMPADDING', (1, 1), (-1, 1), 12),
        ('BACKGROUND', (1, 2), (-1, -1), colors.white),  # Set the background to white
        ('GRID', (0, 0), (-1, -1), 0, colors.white),     # Remove the grid
    ]))

    elements.append(table)
    doc.build(elements)

# Create the PDF
create_pdf("output.pdf")