<a href="https://colab.research.google.com/github/thaise-steffani/Clinical-Data-Analysis-Portfolio-Project/blob/main/ClinicalDataExample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
!pip install reportlab



In [32]:
import pandas as pd
import matplotlib.pyplot as plt
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors

In [33]:
#
# 1. Create example dataset

# Load the dataset from github
url = "https://raw.githubusercontent.com/thaise-steffani/Clinical-Data-Analysis-Portfolio-Project/main/clinical_trial_mock_data.csv"
df = pd.read_csv(url)

# Quick check
print(df.head())
print(df.info())

  patient_id  age     sex treatment_group  baseline_score  outcome_score  \
0       P001   25    Male       Treatment            55.0             55   
1       P002   17  Female         Placebo            48.6             60   
2       P003   34    Male       Treatment            56.5             48   
3       P004   45  Female         Placebo            65.2             53   
4       P005   60    Male       Treatment            47.7             49   

  adverse_event  
0           NaN  
1     Pregnancy  
2          Mild  
3        Severe  
4          Mild  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   patient_id       20 non-null     object 
 1   age              20 non-null     int64  
 2   sex              20 non-null     object 
 3   treatment_group  20 non-null     object 
 4   baseline_score   20 non-null     float64
 5   outcom

In [34]:
#
# 2. Data validation rules (edit checks)

# Rule 1: Adverse events should not be missing
missing_adverse = df[df['adverse_event'].isna()]

# Rule 2: Male patients cannot have Pregnancy AE
invalid_pregnancy = df[(df['adverse_event'] == "Pregnancy") & (df['sex'] == "Male")]

# Rule 3: Age must be >= 18
invalid_age = df[df['age'] < 18]

# Rule 4: AE severity check (assuming values "None", "Mild", "Severe")
allowed_ae = ["None", "Mild", "Severe"]
invalid_ae = df[~df['adverse_event'].isin(allowed_ae) & df['adverse_event'].notna()]

# Rule 5: Outcome score cannot be negative (example numeric check)
invalid_outcome_score = df[df['outcome_score'] < 0]

In [35]:
#
# 3. Create validation summary report

validation_report = pd.DataFrame({
    "Validation Rule": [
        "Missing Adverse Event",
        "Male Pregnancy Case",
        "Invalid Age (<18)",
        "Invalid Adverse Event Value",
        "Negative Outcome Score"
    ],
    "Number of Issues": [
        len(missing_adverse),
        len(invalid_pregnancy),
        len(invalid_age),
        len(invalid_ae),
        len(invalid_outcome_score)
    ],
       "Example Patient IDs": [
        ', '.join(missing_adverse['patient_id'].astype(str).tolist()) or "None",
        ', '.join(invalid_pregnancy['patient_id'].astype(str).tolist()) or "None",
        ', '.join(invalid_age['patient_id'].astype(str).tolist()) or "None",
        ', '.join(invalid_ae['patient_id'].astype(str).tolist()) or "None",
        ', '.join(invalid_outcome_score['patient_id'].astype(str).tolist()) or "None"
    ]
})

In [36]:

# 4. Visualization of AEs by Treatment Arm

ae_counts = df.groupby(["treatment_group", "adverse_event"]).size().unstack(fill_value=0)

print("\nAdverse events count:")
print(ae_counts)

# Save plot for PDF
plt.figure(figsize=(10,6))
ae_counts.plot(kind="bar", stacked=True, figsize=(10,6))
plt.title("Adverse Events by Treatment Arm")
plt.xlabel("Treatment Arm")
plt.ylabel("Number of Adverse Events")
plt.legend(title="Adverse Event")
plt.tight_layout()
plt.savefig("ae_plot.png")
plt.close()


Adverse events count:
adverse_event    Mild  Moderate  Pregnancy  Severe
treatment_group                                   
Placebo             2         1          1       4
Treatment           4         1          2       0


<Figure size 1000x600 with 0 Axes>

In [38]:
#
#
# 5. Generate Professional PDF Report

doc = SimpleDocTemplate("Clinical_Data_Validation_Report.pdf")
styles = getSampleStyleSheet()
story = []

# Title
story.append(Paragraph("Clinical Data Validation Report – Phase II Mock Study", styles['Title']))
story.append(Spacer(1, 12))

# Summary
summary_text = f"""
This report presents the results of a data quality review performed on a mock Phase II clinical trial dataset
({len(df)} patients). Data validation focused on patient demographics, adverse events, and outcome scores.
"""
story.append(Paragraph(summary_text, styles['Normal']))
story.append(Spacer(1, 12))

# Validation Table
table_data = [["Validation Rule", "Number of Issues", "Patient ID"]] + validation_report.values.tolist()
table = Table(table_data, hAlign="LEFT")
table.setStyle(TableStyle([
    ('BACKGROUND', (0,0), (-1,0), colors.grey),
    ('TEXTCOLOR',(0,0),(-1,0),colors.whitesmoke),
    ('ALIGN',(0,0),(-1,-1),'CENTER'),
    ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
    ('BOTTOMPADDING', (0,0), (-1,0), 12),
    ('GRID', (0,0), (-1,-1), 1, colors.black),
]))
story.append(Paragraph("Validation Summary:", styles['Heading2']))
story.append(table)
story.append(Spacer(1, 12))

# Add plot
story.append(Paragraph("Adverse Events by Treatment Arm", styles['Heading2']))
story.append(Image("ae_plot.png", width=400, height=300))
story.append(Spacer(1, 12))

# Conclusion
conclusion_text = f"""
Overall, the dataset demonstrates typical clinical data challenges, including missing or inconsistent adverse event data,
patients below the legal age threshold (<18), and male pregnancy cases.
These issues are highlighted in the validation table above with example patient IDs.

Recommendations:
- Resolve missing fields and correct logically inconsistent entries.
- Implement edit checks and data validation rules in the EDC system.
- Monitor adverse events and patient demographics carefully to maintain data integrity.
"""
story.append(Paragraph(conclusion_text, styles['Normal']))

# Build PDF
doc.build(story)

print("\nPDF Report generated: Clinical_Data_Validation_Report.pdf")


PDF Report generated: Clinical_Data_Validation_Report.pdf
