In [4]:
import pandas as pd
import numpy as np
import dask.dataframe as dd
from dask.distributed import Client, LocalCluster # Optional: for Dask dashboard
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import time # To time computations
import random
import sys

In [5]:
project_root = Path.cwd().parent

if str(project_root) not in sys.path:
    sys.path.append(str(project_root))
    print(f"Added project root to sys.path: {project_root}")
else:
    print("Project root already in sys.path.")

current_dir = Path.cwd()
print(f"Initial CWD: {current_dir}")
if current_dir.name == 'notebooks':
    BASE_DIR = current_dir.parent
    print(f"Adjusted BASE_DIR to parent (project root): {BASE_DIR}")
# Check if the current directory *already* looks like the project root
elif (current_dir / 'src').is_dir() and (current_dir / 'notebooks').is_dir() and (current_dir / 'data').is_dir():
    BASE_DIR = current_dir
    print(f"CWD appears to be project root: {BASE_DIR}")
else:
    # Fallback or Warning - might need manual adjustment if structure is unexpected
    BASE_DIR = current_dir
    print(f"WARNING: Could not reliably determine project root relative to CWD.")
    print(f"Using CWD as BASE_DIR: {BASE_DIR}. Check if paths below are correct.")
    # You might need to adjust manually here if the above checks fail, e.g.
    # BASE_DIR = Path('/absolute/path/to/your/Dataviz-VAST2022')

Added project root to sys.path: /Users/kozy/_projects/Dataviz-VAST2022
Initial CWD: /Users/kozy/_projects/Dataviz-VAST2022/notebooks
Adjusted BASE_DIR to parent (project root): /Users/kozy/_projects/Dataviz-VAST2022


In [6]:
try:
    # We might not need load_data functions if loading primary/processed directly
    # from src.data.load_data import ...
    # We might not need build_features functions if primary data is complete
    # from src.features.build_features import ...
    # We WILL need visualization functions
    from src.visualization.visualize import (
        plot_balance_distribution, # Keep for reference if needed
        # --- Add imports for NEW comparison functions once created ---
        # plot_comparative_balance_timeline,
        # plot_comparative_activity_heatmap,
        # plot_comparative_spending_category
    )
    print("Successfully imported functions from src.")
    # Add placeholder functions locally in notebook until they exist in src
    def plot_comparative_balance_timeline(*args, **kwargs): print("Placeholder: plot_comparative_balance_timeline called")
    def plot_comparative_activity_heatmap(*args, **kwargs): print("Placeholder: plot_comparative_activity_heatmap called")
    def plot_comparative_spending_category(*args, **kwargs): print("Placeholder: plot_comparative_spending_category called")

except ImportError as e:
    print(f"Error importing from src: {e}. Make sure src is importable.")
    print("Define placeholder functions locally.")
    def plot_balance_distribution(*args, **kwargs): print("Placeholder: plot_balance_distribution called")
    def plot_comparative_balance_timeline(*args, **kwargs): print("Placeholder: plot_comparative_balance_timeline called")
    def plot_comparative_activity_heatmap(*args, **kwargs): print("Placeholder: plot_comparative_activity_heatmap called")
    def plot_comparative_spending_category(*args, **kwargs): print("Placeholder: plot_comparative_spending_category called")



Successfully imported functions from src.
