In [13]:
%pip install pandas reportlab



In [14]:
import pandas as pd

try:
    df = pd.read_csv('data1.csv')
    display(df.head())
except FileNotFoundError:
    print("Error: data.csv not found. Please make sure the file exists in the correct directory.")
    df = None

Unnamed: 0,col1,col2,col3
0,0.331407,10,A
1,0.168547,82,B
2,0.944946,21,A
3,0.640673,44,C
4,0.321185,43,B


In [15]:
if df is not None:
    print("DataFrame Dimensions (Rows, Columns):")
    print(df.shape)

    print("\nColumn Data Types:")
    print(df.dtypes)

    print("\nSummary Statistics for Numerical Columns:")
    display(df.describe())

    print("\nNumber of Unique Values per Column:")
    print(df.nunique())

    print("\nMissing Values per Column:")
    print(df.isnull().sum())
else:
    print("DataFrame is not loaded. Cannot perform analysis.")

DataFrame Dimensions (Rows, Columns):
(10, 3)

Column Data Types:
col1    float64
col2      int64
col3     object
dtype: object

Summary Statistics for Numerical Columns:


Unnamed: 0,col1,col2
count,10.0,10.0
mean,0.467069,51.8
std,0.266208,27.695968
min,0.04886,10.0
25%,0.32374,37.5
50%,0.477912,43.5
75%,0.615886,75.0
max,0.944946,94.0



Number of Unique Values per Column:
col1    10
col2    10
col3     3
dtype: int64

Missing Values per Column:
col1    0
col2    0
col3    0
dtype: int64


In [16]:
import pandas as pd
import numpy as np

# Create a sample pandas DataFrame
data = {'col1': np.random.rand(10),
        'col2': np.random.randint(1, 100, 10),
        'col3': ['A', 'B', 'A', 'C', 'B', 'C', 'A', 'B', 'C', 'A']}
sample_df = pd.DataFrame(data)

# Save this DataFrame to a CSV file named data1.csv
sample_df.to_csv('data1.csv', index=False)

# Load the data from the newly created data.csv file into a new DataFrame
loaded_df = pd.read_csv('data1.csv')

# Display the head of the newly loaded DataFrame to verify it was loaded correctly
display(loaded_df.head())

Unnamed: 0,col1,col2,col3
0,0.805634,53,A
1,0.627224,57,B
2,0.462732,65,A
3,0.830717,89,C
4,0.418564,91,B


In [17]:
print("DataFrame Dimensions (Rows, Columns):")
print(loaded_df.shape)

print("\nColumn Data Types:")
print(loaded_df.dtypes)

print("\nSummary Statistics for Numerical Columns:")
display(loaded_df.describe())

print("\nNumber of Unique Values per Column:")
print(loaded_df.nunique())

print("\nMissing Values per Column:")
print(loaded_df.isnull().sum())

DataFrame Dimensions (Rows, Columns):
(10, 3)

Column Data Types:
col1    float64
col2      int64
col3     object
dtype: object

Summary Statistics for Numerical Columns:


Unnamed: 0,col1,col2
count,10.0,10.0
mean,0.600447,58.2
std,0.236968,28.310972
min,0.08422,6.0
25%,0.488795,50.75
50%,0.619886,56.0
75%,0.784551,83.0
max,0.874547,92.0



Number of Unique Values per Column:
col1    10
col2    10
col3     3
dtype: int64

Missing Values per Column:
col1    0
col2    0
col3    0
dtype: int64


In [18]:
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib import colors

doc = SimpleDocTemplate("data1_analysis_report.pdf", pagesize=A4)
elements = []

styles = getSampleStyleSheet()
title_style = ParagraphStyle(name='TitleStyle', parent=styles['Title'], spaceAfter=14)
heading_style = ParagraphStyle(name='HeadingStyle', parent=styles['h1'], spaceAfter=12)


elements.append(Paragraph("Data Analysis Report", title_style))
elements.append(Spacer(1, 12))

elements.append(Paragraph("Original Data", heading_style))

data_list = [loaded_df.columns.tolist()] + loaded_df.values.tolist()
table = Table(data_list)
table_style = TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.grey),
                          ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                          ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                          ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                          ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
                          ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
                          ('GRID', (0, 0), (-1, -1), 1, colors.black)])
table.setStyle(table_style)
elements.append(table)

elements.append(Spacer(1, 24))