In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Add the 'src' directory to the Python path to import our custom module
# Adjust the path if your notebook is not in the 'notebooks' directory
try:
    # Assumes the notebook is in a 'notebooks' folder sibling to 'src'
    module_path = os.path.abspath(os.path.join('..', 'src'))
    if module_path not in sys.path:
        sys.path.append(module_path)
    from load_data import load_stroke_data
except ImportError:
    print("Error: Could not import load_stroke_data.")
    print("Make sure load_data.py is in the src directory and the path is correct.")
    # As a fallback, you might load data directly here if import fails
    # df = pd.read_csv('../path/to/your/downloaded/data.csv') # Example fallback
    df = None


# Load the data using the function
if 'load_stroke_data' in locals():
    df = load_stroke_data()

# --- Start EDA ---
if df is not None:
    print("--- Initial Data Overview ---")
    print("\nShape of the DataFrame:", df.shape)

    print("\nFirst 5 rows:")
    print(df.head())

    print("\nDataFrame Info (columns, types, non-null counts):")
    df.info()

    print("\nDescriptive Statistics:")
    print(df.describe(include='all')) # Include stats for categorical columns too

    print("\nChecking for Missing Values (Sum per column):")
    print(df.isnull().sum())

    # --- Next Steps in EDA (Examples) ---
    # - Visualize distributions (histograms for numerical, countplots for categorical)
    # - Analyze correlations
    # - Explore relationships between features and the target variable ('stroke')
    # - Handle missing values (imputation or removal)
    # - Check for outliers

else:
    print("Data could not be loaded. Cannot proceed with EDA.")

# Set plot style (optional)
# sns.set_style('whitegrid')
# plt.rcParams['figure.figsize'] = (10, 6) # Set default figure size