# Oncology RWD: Data Exploration
This notebook explores synthetic oncology patient data, simulating EHR and biomarker records.

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load synthetic data
df = pd.read_csv('../data/synthetic_oncology_patients.csv')
df.head()

## Patient Demographics

In [None]:
# Demographic summary
df[['age', 'gender', 'cancer_type', 'stage']].describe(include='all')

## Cancer Type Distribution

In [None]:
sns.countplot(data=df, x='cancer_type', hue='gender')
plt.title('Cancer Type by Gender')
plt.show()

## Biomarker Status vs. Treatment

In [None]:
pd.crosstab(df['biomarker_status'], df['treatment'])

## Survival Analysis (Simple)

In [None]:
sns.histplot(df['survival_months'], bins=8, kde=True)
plt.xlabel('Survival (months)')
plt.title('Distribution of Survival Time')
plt.show()