In [1]:
import pandas as pd

def demonstrate_patient_data():
    # Create sample patient records
    patients = {
        "P001": {
            "name": "John Smith",
            "age": 45,
            "gender": "Male",
            "blood_type": "O+",
            "conditions": ["Hypertension", "Diabetes Type 2"],
            "medications": ["Lisinopril", "Metformin"],
            "vital_signs": {
                "blood_pressure": "120/80",
                "heart_rate": 72,
                "temperature": 98.6
            },
            "last_visit": "2023-10-15"
        },
        "P002": {
            "name": "Sarah Johnson",
            "age": 32,
            "gender": "Female",
            "blood_type": "A-",
            "conditions": ["Asthma"],
            "medications": ["Albuterol"],
            "vital_signs": {
                "blood_pressure": "118/75",
                "heart_rate": 68,
                "temperature": 98.4
            },
            "last_visit": "2023-11-01"
        },
        "P003": {
            "name": "Robert Davis",
            "age": 58,
            "gender": "Male",
            "blood_type": "B+",
            "conditions": ["Arthritis", "High Cholesterol"],
            "medications": ["Ibuprofen", "Statins"],
            "vital_signs": {
                "blood_pressure": "135/85",
                "heart_rate": 75,
                "temperature": 98.8
            },
            "last_visit": "2023-10-28"
        }
    }
    
    # Convert nested dictionary to DataFrame
    df = pd.DataFrame.from_dict(patients, orient='index')
    
    # Expand the vital_signs dictionary into separate columns
    vital_signs_df = pd.json_normalize(df['vital_signs'])
    df = df.drop('vital_signs', axis=1)
    df = pd.concat([df, vital_signs_df], axis=1)
    
    return df

# Create and display the patient DataFrame
patient_df = demonstrate_patient_data()
print(patient_df)


               name   age  gender blood_type                       conditions  \
P001     John Smith  45.0    Male         O+  [Hypertension, Diabetes Type 2]   
P002  Sarah Johnson  32.0  Female         A-                         [Asthma]   
P003   Robert Davis  58.0    Male         B+    [Arthritis, High Cholesterol]   
0               NaN   NaN     NaN        NaN                              NaN   
1               NaN   NaN     NaN        NaN                              NaN   
2               NaN   NaN     NaN        NaN                              NaN   

                  medications  last_visit blood_pressure  heart_rate  \
P001  [Lisinopril, Metformin]  2023-10-15            NaN         NaN   
P002              [Albuterol]  2023-11-01            NaN         NaN   
P003     [Ibuprofen, Statins]  2023-10-28            NaN         NaN   
0                         NaN         NaN         120/80        72.0   
1                         NaN         NaN         118/75        68.0   


In [2]:
def analyze_patient_data(df):
    insights = {
        # Demographic Analysis
        "age_statistics": df['age'].describe(),
        "gender_distribution": df['gender'].value_counts(),
        "blood_type_distribution": df['blood_type'].value_counts(),
        
        # Medical Condition Analysis
        "common_conditions": pd.Series([condition for conditions in df['conditions'] for condition in conditions]).value_counts(),
        "medication_frequency": pd.Series([med for meds in df['medications'] for med in meds]).value_counts(),
        
        # Vital Signs Analysis
        "avg_heart_rate": df['heart_rate'].mean(),
        "bp_categories": pd.cut(df['blood_pressure'].str.split('/').str[0].astype(float), 
                              bins=[0, 120, 140, 180], 
                              labels=['Normal', 'Pre-hypertensive', 'Hypertensive']).value_counts(),
        
        # Visit Pattern Analysis
        "last_visit_distribution": pd.to_datetime(df['last_visit']).dt.month.value_counts()
    }
    return insights

# Additional analysis functions for specific insights
def identify_high_risk_patients(df):
    return df[
        (df['age'] > 50) & 
        (df['conditions'].str.len() > 1) |
        (df['blood_pressure'].str.split('/').str[0].astype(float) > 130)
    ]

def medication_per_condition(df):
    condition_med_map = {}
    for idx, row in df.iterrows():
        for condition in row['conditions']:
            if condition not in condition_med_map:
                condition_med_map[condition] = set()
            condition_med_map[condition].update(row['medications'])
    return condition_med_map

# Visualization function
def create_patient_visualizations(df):
    import matplotlib.pyplot as plt
    
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    
    # Age distribution
    df['age'].hist(ax=axes[0,0], bins=10)
    axes[0,0].set_title('Age Distribution')
    
    # Gender distribution
    df['gender'].value_counts().plot(kind='pie', ax=axes[0,1])
    axes[0,1].set_title('Gender Distribution')
    
    # Blood pressure analysis
    df['heart_rate'].plot(kind='box', ax=axes[1,0])
    axes[1,0].set_title('Heart Rate Distribution')
    
    # Conditions per patient
    df['conditions'].str.len().value_counts().plot(kind='bar', ax=axes[1,1])
    axes[1,1].set_title('Number of Conditions per Patient')
    
    plt.tight_layout()
    return fig
