In [1]:

import os
import sys
import subprocess

# Ensure necessary packages are installed
def install_missing_packages():
    """Install required packages if not already installed."""
    try:
        import pandas
        from reportlab.lib.pagesizes import letter
        from reportlab.pdfgen import canvas
    except ImportError as e:
        missing_package = str(e).split()[-1].strip("'")
        print(f"Installing missing package: {missing_package}")
        subprocess.check_call([sys.executable, "-m", "pip", "install", missing_package])
        print(f"Package {missing_package} installed successfully.")
        # Retry importing after installation
        if missing_package == "pandas":
            import pandas
        elif missing_package == "reportlab":
            from reportlab.lib.pagesizes import letter
            from reportlab.pdfgen import canvas

install_missing_packages()

import pandas as pd
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

def read_data(file_path):
    """Read data from a file (CSV format assumed)."""
    if not os.path.exists(file_path):
        print(f"Error: File not found: {file_path}")
        return None
    try:
        data = pd.read_csv(file_path)
        return data
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

def analyze_data(data):
    """Perform basic analysis on the data."""
    analysis = {
        "column_summary": data.describe(include='all').to_dict(),
        "row_count": len(data),
        "column_count": len(data.columns),
    }
    return analysis

def generate_pdf_report(file_name, analysis):
    """Generate a formatted PDF report."""
    pdf = canvas.Canvas(file_name, pagesize=letter)
    pdf.setTitle("Automated Report")

    # Title
    pdf.setFont("Helvetica-Bold", 16)
    pdf.drawString(100, 750, "Automated Data Analysis Report")

    # Overview
    pdf.setFont("Helvetica", 12)
    pdf.drawString(100, 720, f"Rows: {analysis['row_count']}")
    pdf.drawString(100, 700, f"Columns: {analysis['column_count']}")

    # Column summary
    y = 680
    pdf.drawString(100, y, "Column Summary:")
    y -= 20
    for column, stats in analysis["column_summary"].items():
        pdf.drawString(100, y, f"{column}:")
        y -= 20
        for stat, value in stats.items():
            pdf.drawString(120, y, f"{stat}: {value}")
            y -= 20
            if y < 50:  # New page if content overflows
                pdf.showPage()
                pdf.setFont("Helvetica", 12)
                y = 750

    pdf.save()

def main():
    input_file ="customers-100.csv"  # Replace with your file path
    output_file = "analysis_report.pdf"

    # Check file existence
    if not os.path.exists(input_file):
        print(f"Error: Input file '{input_file}' does not exist. Please provide a valid file path.")
        return

    # Read and analyze data
    data = read_data(input_file)
    if data is not None:
        analysis = analyze_data(data)
        generate_pdf_report(output_file, analysis)
        print(f"Report generated: {output_file}")
    else:
        print("Failed to process the data.")

if __name__ == "__main__":
    main()

 

Report generated: analysis_report.pdf
