In [None]:
import os
from datetime import datetime
import csv
import pandas as pd

def generate_summary_report(cleaned_df, plot_folder='plots'):

    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    filename_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    report_filename = f"summary_report_{filename_timestamp}.txt"
    report_filename2 = f"summary_report_{filename_timestamp}.csv"

    # Basic stats about the DataFrame
    num_rows, num_cols = cleaned_df.shape
    columns = cleaned_df.columns.tolist()
    missing_counts = cleaned_df.isnull().sum()

    # Gather plot files info
    if os.path.exists(plot_folder):
        plot_files = [f for f in os.listdir(plot_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    else:
        plot_files = []

    with open(report_filename2, 'w', newline='') as f:
        writer = csv.writer(f)

        # Write header for the report
        writer.writerow([f"Summary Report - {timestamp}"])
        writer.writerow([])  # empty row

        # DataFrame info
        writer.writerow(["Cleaned DataFrame Info"])
        writer.writerow(["Rows", num_rows])
        writer.writerow(["Columns", num_cols])
        writer.writerow(["Column names"] + columns)
        writer.writerow([])

        # Missing values section
        writer.writerow(["Missing Values per Column"])
        writer.writerow(["Column", "Missing Count"])
        for col, miss in missing_counts.items():
            writer.writerow([col, miss])
        writer.writerow([])

        # Saved plot files section
        writer.writerow([f"Saved Plot Files in '{plot_folder}'"])
        if plot_files:
            writer.writerow(["Filename", "Size (KB)"])
            for pf in plot_files:
                full_path = os.path.join(plot_folder, pf)
                size_kb = os.path.getsize(full_path) / 1024
                writer.writerow([pf, f"{size_kb:.2f}"])
        else:
            writer.writerow(["No plot files found"])
        writer.writerow([])

        writer.writerow(["End of Report"])

    with open(report_filename, 'w') as f:
        f.write(f"Summary Report - {timestamp}\n")
        f.write("="*50 + "\n\n")

        f.write(f"Cleaned DataFrame Info:\n")
        f.write(f"Rows: {num_rows}\n")
        f.write(f"Columns: {num_cols}\n")
        f.write(f"Column names: {columns}\n\n")

        f.write("Missing Values per Column:\n")
        for col, miss in missing_counts.items():
            f.write(f"  {col}: {miss}\n")
        f.write("\n")

        f.write(f"Saved Plot Files in '{plot_folder}':\n")
        if plot_files:
            for pf in plot_files:
                full_path = os.path.join(plot_folder, pf)
                size_kb = os.path.getsize(full_path) / 1024
                f.write(f"  {pf} - {size_kb:.2f} KB\n")
        else:
            f.write("  No plot files found.\n")

        f.write("\nEnd of Report\n")

    print(f"Summary reports saved as '{report_filename}' and '{report_filename2}'")

df = pd.read_csv("C:\\Users\\T L S\\Desktop\\SoftSinc\\Week 2\\newdataset.csv")

generate_summary_report(df, plot_folder='plots')

Summary reports saved as 'summary_report_20250608_223535.txt' and 'summary_report_20250608_223535.csv'
