In [None]:
import pandas as pd
from reportlab.lib import colors
from reportlab.lib.pagesizes import landscape, A4
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
from reportlab.lib.styles import getSampleStyleSheet
from xml.sax.saxutils import escape

def csv_to_pdf(csv_file, pdf_file):
    # Read the CSV file
    df = pd.read_csv(csv_file)

    # Create a PDF document with landscape orientation and A3 size
    pdf = SimpleDocTemplate(pdf_file, pagesize=landscape(A4))
    elements = []

    # Get a sample style sheet to use for Paragraphs
    styles = getSampleStyleSheet()
    style_normal = styles['Normal']

    # Convert DataFrame to list of lists with word-wrapped cells
    # Use Paragraph to handle word wrapping in each cell, and escape special characters
    data = [[Paragraph(escape(str(col)), style_normal) for col in df.columns]]  # Headers
    for row in df.itertuples(index=False):
        data.append([Paragraph(escape(str(cell)), style_normal) for cell in row])  # Row data

    # Create a Table object
    table = Table(data)

    # Add some style to the table to fix overlapping issues
    style = TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 10),  # Smaller font size for the header
        ('FONTSIZE', (0, 1), (-1, -1), 8),  # Smaller font size for the content
        ('BOTTOMPADDING', (0, 0), (-1, 0), 10),  # Add padding to the header
        ('TOPPADDING', (0, 0), (-1, 0), 10),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 6),  # Add padding to the table cells
        ('TOPPADDING', (0, 1), (-1, -1), 6),
        ('LEFTPADDING', (0, 0), (-1, -1), 6),  # Left and right padding for all cells
        ('RIGHTPADDING', (0, 0), (-1, -1), 6),
        ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
        ('GRID', (0, 0), (-1, -1), 1, colors.black),  # Add borders to cells
    ])
    table.setStyle(style)

    # Calculate the required width of each column to fit all columns in the table
    col_widths = [max(len(str(val)) for val in df[col]) * 5 for col in df.columns]

    # Scale column widths to fit within the available page width
    max_width = pdf.width
    scale_factor = max_width / sum(col_widths) if sum(col_widths) > max_width else 1
    col_widths = [w * scale_factor for w in col_widths]

    # Apply the calculated column widths
    table._argW = col_widths

    # Add the table to the elements
    elements.append(table)

    # Build the PDF
    pdf.build(elements)

    print(f'PDF saved as {pdf_file}')

# Example usage
csv_file = '/home/noman/local-LLM-with-RAG/Research - Copy/721563402_Mat-Su_standardcharges_modified_v2.csv'
pdf_file = '/home/noman/local-LLM-with-RAG/Research/721563402_Mat-Su_standardcharges_modified_v2.pdf'
csv_to_pdf(csv_file, pdf_file)

In [3]:
# import pandas as pd
# from reportlab.lib.pagesizes import letter, landscape
# from reportlab.pdfgen import canvas
# from reportlab.lib import colors
# from textwrap import wrap

# def csv_to_pdf(csv_file, pdf_file):
#     # Read the CSV file
#     df = pd.read_csv(csv_file)

#     # Create a canvas object
#     c = canvas.Canvas(pdf_file, pagesize=landscape(letter))
    
#     # Set some basic parameters
#     width, height = landscape(letter)
#     c.setFont("Helvetica", 10)
#     line_height = 14
#     x_offset = 50
#     y_offset = height - 50
#     column_gaps = [0, 350, 500]  # Adjusted x-offset for each column

#     # Print the header with background
#     c.setFillColor(colors.lightgrey)
#     c.rect(x_offset - 5, y_offset - line_height + 4, width - 100, line_height, fill=True, stroke=False)
#     c.setFillColor(colors.black)

#     for col_num, col_name in enumerate(df.columns):
#         c.drawString(x_offset + column_gaps[col_num], y_offset, col_name)
#     y_offset -= line_height

#     # Print each row
#     for index, row in df.iterrows():
#         for col_num, value in enumerate(row):
#             current_x_offset = x_offset + column_gaps[col_num]

#             if df.columns[col_num] == "Description":
#                 # Wrap the text for large "Description" content
#                 wrapped_text = wrap(str(value), width=60)  # Wrap text to fit
#                 for line in wrapped_text:
#                     c.drawString(current_x_offset, y_offset, line)
#                     y_offset -= line_height
#             else:
#                 c.drawString(current_x_offset, y_offset, str(value))

#         y_offset -= line_height

#         # Create a new page if the content is too long for the current page
#         if y_offset < 50:
#             c.showPage()
#             y_offset = height - 50
#             c.setFont("Helvetica", 10)

#     # Save the PDF file
#     c.save()

# # Example usage
# csv_to_pdf('/home/noman/local-LLM-with-RAG/Research/721563402_Mat-Su_standardcharges_modified_v2.csv', '/home/noman/local-LLM-with-RAG/Research/721563402_Mat-Su_standardcharges_modified_v2.pdf')

In [1]:
# import pandas as pd

# def csv_to_json(csv_file, json_file):
#     # Read the CSV file
#     df = pd.read_csv(csv_file)
    
#     # Convert to JSON and save
#     df.to_json(json_file, orient='records', indent=4)

# # Example usage
# csv_to_json('/home/noman/local-LLM-with-RAG/Research/721563402_Mat-Su_standardcharges_modified_v2.csv', '/home/noman/local-LLM-with-RAG/Research/721563402_Mat-Su_standardcharges_modified_v2.json')

In [1]:
from pdf2image import convert_from_path

# Replace with the path to your PDF file
pdf_path = "/home/noman/local-LLM-with-RAG/Research - Copy/Medical-Receipt-Template.pdf"
output_folder = "/home/noman/local-LLM-with-RAG/Research"

# Convert PDF to PNG (each page will be a separate PNG file)
images = convert_from_path(pdf_path, dpi=300)  # You can adjust the dpi (dots per inch) for quality

# Save each page as a PNG
for i, image in enumerate(images):
    output_path = f"{output_folder}/page_{i + 1}.png"
    image.save(output_path, 'PNG')

In [2]:
from PIL import Image
# Load model directly
from transformers import AutoProcessor, AutoModelForPreTraining

processor = AutoProcessor.from_pretrained("alpindale/Llama-3.2-90B-Vision-Instruct")
model = AutoModelForPreTraining.from_pretrained("alpindale/Llama-3.2-90B-Vision-Instruct")

# url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
# image = Image.open(requests.get(url, stream=True).raw)

# Replace with the path to your local image file
local_path = "/home/noman/local-LLM-with-RAG/Research/page_1.png"

# Open the image from the local path
image = Image.open(local_path)

messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": "Patient name is: "}
    ]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(image, input_text, return_tensors="pt").to(model.device)

output = model.generate(**inputs, max_new_tokens=30)
print(processor.decode(output[0]))