In [1]:
import pickle
import os


filepath="../../data/sentence_eeg_prob_data.pkl"

print(f"Attempting to load processed data from: {filepath}")
if not os.path.exists(filepath):
    print(f"Error: File not found at {filepath}")
try:
    with open(filepath, "rb") as f:
        data = pickle.load(f)
    print("Successfully loaded processed data.")
    if isinstance(data, list):
        print(f"Data loaded successfully. Number of items in the list: {len(data)}")
    else:
        print(f"Error: Loaded object is not a list (type: {type(data)}). Returning None.")
except Exception as e:
    print(f"An unexpected error occurred during loading processed data: {e}")
    

import pickle
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, FancyArrowPatch


def create_simple_flowchart(data, output_dir="../../visualizations"):
    """Creates a simple flowchart diagram of the data structure."""
    if not data:
        print("No data to visualize.")
        return
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Get data statistics for the diagram
    num_sentences = len(set([item['sentence'] for item in data]))
    avg_sentence_length = np.mean([len(s) for s in set([item['sentence'] for item in data])])
    
    # Get EEG chunk statistics
    sample_item = next((item for item in data if 'eeg_chunk' in item and item['eeg_chunk']), None)
    if sample_item and sample_item['eeg_chunk']:
        eeg_chunk = sample_item['eeg_chunk']
        num_samples_in_chunk = len(eeg_chunk)
        if num_samples_in_chunk > 0:
            sample_shape = eeg_chunk[0].shape
        else:
            sample_shape = (0, 0)
    else:
        num_samples_in_chunk = 0
        sample_shape = (0, 0)
    
    # Create the main data structure flowchart
    fig, ax = plt.subplots(figsize=(8, 10))
    ax.set_xlim(0, 10)
    ax.set_ylim(0, 10)
    ax.axis('off')
    
    # Set up grid
    ax.grid(True, linestyle='--', alpha=0.3)
    
    # Define box style
    box_style = dict(boxstyle="round,pad=0.5", facecolor='white', edgecolor='black', linewidth=2)
    
    # Draw the boxes and arrows - Main Data Structure
    # Box 1: Dataset
    ax.text(5, 8, f"data [{len(data)} items from {num_sentences} sentences]", 
            ha='center', va='center', fontsize=12, bbox=box_style)
    
    # Arrow 1
    arrow1 = FancyArrowPatch((5, 7.5), (5, 6.5), arrowstyle='->', color='black', linewidth=1.5)
    ax.add_patch(arrow1)
    
    # Box 2: Sentences
    ax.text(5, 6, f"sentences [on avg {avg_sentence_length:.0f} char]", 
            ha='center', va='center', fontsize=12, bbox=box_style)
    
    # Arrow 2
    arrow2 = FancyArrowPatch((5, 5.5), (5, 4.5), arrowstyle='->', color='black', linewidth=1.5)
    ax.add_patch(arrow2)
    
    # Box 3: Character attributes
    char_attrs = "Each char has:\ncharacter, prefix, sentence, char_idx_in_sentence, eeg_chunk, next_char_probabilities"
    ax.text(5, 4, char_attrs, ha='center', va='center', fontsize=12, bbox=box_style)
    
    # Save the main data structure flowchart
    plt.savefig(f"{output_dir}/data_structure_flowchart.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # Create the EEG chunk structure flowchart
    fig, ax = plt.subplots(figsize=(8, 10))
    ax.set_xlim(0, 10)
    ax.set_ylim(0, 10)
    ax.axis('off')
    
    # Set up grid
    ax.grid(True, linestyle='--', alpha=0.3)
    
    # Draw the boxes and arrows - EEG Chunk Structure
    # Box 1: EEG Chunk
    ax.text(5, 8, f"eeg_chunk [{num_samples_in_chunk} samples]", 
            ha='center', va='center', fontsize=12, bbox=box_style)
    
    # Arrow 1
    arrow1 = FancyArrowPatch((5, 7.5), (5, 6.5), arrowstyle='->', color='black', linewidth=1.5)
    ax.add_patch(arrow1)
    
    # Box 2: Samples
    ax.text(5, 6, f"samples [each is a numpy array]", 
            ha='center', va='center', fontsize=12, bbox=box_style)
    
    # Arrow 2
    arrow2 = FancyArrowPatch((5, 5.5), (5, 4.5), arrowstyle='->', color='black', linewidth=1.5)
    ax.add_patch(arrow2)
    
    # Box 3: Sample shape
    ax.text(5, 4, f"Each sample shape: [{sample_shape[0]} time steps Ã— {sample_shape[1]} channels]", 
            ha='center', va='center', fontsize=12, bbox=box_style)
    
    # Save the EEG chunk structure flowchart
    plt.savefig(f"{output_dir}/eeg_chunk_flowchart.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"Simple flowcharts saved to:")
    print(f"  - {output_dir}/data_structure_flowchart.png")
    print(f"  - {output_dir}/eeg_chunk_flowchart.png")
    return output_dir


if data:
    # Create the simple flowcharts
    output_dir = create_simple_flowchart(data)
    print(f"\nTo view the flowcharts, check the files in the {output_dir} directory.")
else:
    print("Failed to load data. Please check the file path.")




Attempting to load processed data from: ../../data/sentence_eeg_prob_data.pkl
Successfully loaded processed data.
Data loaded successfully. Number of items in the list: 15470
Simple flowcharts saved to:
  - ../../visualizations/data_structure_flowchart.png
  - ../../visualizations/eeg_chunk_flowchart.png

To view the flowcharts, check the files in the ../../visualizations directory.
