In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Initializing Variables
Dataset = "Sleep_health_and_lifestyle_dataset.csv"
RequiredColumns = ["Gender", "Occupation", "Sleep Duration", "Quality of Sleep"]
DuplicateValues = "Person ID"

# Reading Data
Data = pd.read_csv(Dataset)

# Cleaning Data
CleanData = Data.drop_duplicates(subset=DuplicateValues, keep="first")
CleanData = CleanData[RequiredColumns]

In [3]:
# Creating Summary Statistics
SumStats = CleanData.describe(exclude=["O"]).reset_index()
Median = CleanData.median(numeric_only=True)
Median_df = pd.DataFrame(Median).T  # Transpose to match the structure of SumStats
SumStats = pd.concat([SumStats, Median_df], ignore_index=True)
SumStats.fillna("median", inplace=True)
SumStats.set_index("index", inplace=True)

# Display Summary Statistics
print(SumStats)

        Sleep Duration  Quality of Sleep
index                                   
count       374.000000        374.000000
mean          7.132086          7.312834
std           0.795657          1.196956
min           5.800000          4.000000
25%           6.400000          6.000000
50%           7.200000          7.000000
75%           7.800000          8.000000
max           8.500000          9.000000
median        7.200000          7.000000


In [4]:
# Creating a Stacked Plot
xVal = "Sleep Duration"
StackVal = "Occupation"
PivotData = CleanData.groupby([xVal, StackVal]).size().unstack().fillna(0)
PivotData.plot(kind="bar", stacked=True, figsize=(12, 8), colormap="tab20")
plt.title(f"Distribution of {StackVal} by {xVal}")
plt.xlabel(f"{xVal}")
plt.ylabel("Count")
plt.legend(title=f"{StackVal}", bbox_to_anchor=(1.05, 1), loc="upper left")
plt.tight_layout()

# Save Stacked Plot to file
stack_plot_file = "stack_plot.png"
plt.savefig(stack_plot_file)
plt.close()

# Creating a Bar Plot
yVal = "Quality of Sleep"
Segregate = "Gender"
GroupingData = CleanData.groupby([yVal, Segregate])[xVal].mean().unstack()
GroupingData.plot(kind="barh", color=["#FFD0EC", "#E59BE9"])
plt.xlabel(f"Average {xVal}")
plt.ylabel(f"{yVal}")
plt.title(f"Average {xVal} by {yVal} and {Segregate}")
plt.legend(title="Gender", loc="best")
plt.tight_layout()

# Save Bar Plot to file
bar_plot_file = "bar_plot.png"
plt.savefig(bar_plot_file)
plt.close()


In [None]:
# Initialize PDF
pdf = FPDF(format='letter')
pdf.add_page()
pdf.set_font("Arial", size=12)

# Add Title
pdf.set_font("Arial", 'B', 14)
pdf.cell(200, 10, txt="Summary Statistics", ln=True, align='C')
pdf.ln(10)

# Add Summary Statistics
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, txt="Summary Statistics:\n")
for col in SumStats.columns:
    pdf.cell(0, 10, txt=f"{col}: {SumStats[col].to_list()}", ln=True)

# Add Plots to PDF
pdf.ln(10)
pdf.set_font("Arial", 'B', 14)
pdf.cell(200, 10, txt="Stacked Plot", ln=True, align='C')
pdf.ln(10)

# Add Stacked Plot to PDF
pdf.image(stack_plot_file, x=10, y=pdf.get_y(), w=180)
pdf.ln(10)

pdf.set_font("Arial", 'B', 14)
pdf.cell(200, 10, txt="Bar Plot", ln=True, align='C')
pdf.ln(10)

# Add Bar Plot to PDF
pdf.image(bar_plot_file, x=10, y=pdf.get_y(), w=180)

# Save PDF
pdf.output("summary_statistics_report.pdf")
