# fMRI Data Analysis with AWS

This notebook demonstrates how to:
1. Download processed fMRI data from S3
2. Calculate functional connectivity
3. Visualize brain networks
4. Generate statistical summaries

**Prerequisites:**
- AWS credentials configured locally
- Processed fMRI files in S3 output bucket
- Python dependencies installed: `pip install -r requirements.txt`

## 1. Setup and Configuration

Configure AWS credentials and S3 bucket information.

In [None]:
import boto3
import numpy as np
import pandas as pd
from scipy import stats, signal
from scipy.spatial.distance import pdist, squareform
import nibabel as nib
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from pathlib import Path
import os
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print("Imports successful!")

In [None]:
# Configuration
# UPDATE THESE WITH YOUR ACTUAL BUCKET NAMES
AWS_REGION = 'us-east-1'
OUTPUT_BUCKET = 'fmri-output-{your-username}'  # CHANGE THIS
LOCAL_RESULTS_DIR = '../results'

# Create S3 client
s3 = boto3.client('s3', region_name=AWS_REGION)

# Create local results directory
os.makedirs(LOCAL_RESULTS_DIR, exist_ok=True)

print(f"AWS Region: {AWS_REGION}")
print(f"Output Bucket: {OUTPUT_BUCKET}")
print(f"Results Directory: {LOCAL_RESULTS_DIR}")

## 2. Download Processed Data from S3

In [None]:
def download_s3_file(bucket, key, local_path):
    """Download file from S3."""
    try:
        logger.info(f"Downloading s3://{bucket}/{key}")
        s3.download_file(bucket, key, local_path)
        logger.info(f"Saved to {local_path}")
        return True
    except Exception as e:
        logger.error(f"Error downloading: {e}")
        return False

def list_s3_objects(bucket, prefix=''):
    """List objects in S3 bucket."""
    try:
        response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
        if 'Contents' not in response:
            return []
        return [obj['Key'] for obj in response['Contents']]
    except Exception as e:
        logger.error(f"Error listing S3: {e}")
        return []

# List available results
print(f"Listing results in s3://{OUTPUT_BUCKET}/")
objects = list_s3_objects(OUTPUT_BUCKET)
print(f"Found {len(objects)} objects:")
for obj in objects[:10]:  # Show first 10
    print(f"  - {obj}")

In [None]:
# Download processed fMRI files
# Find smoothed file (final processed output)
smoothed_files = [obj for obj in objects if 'smoothed' in obj.lower()]

if smoothed_files:
    smoothed_key = smoothed_files[0]
    smoothed_path = os.path.join(LOCAL_RESULTS_DIR, 'fmri_smoothed.nii.gz')
    
    download_s3_file(OUTPUT_BUCKET, smoothed_key, smoothed_path)
    print(f"✓ Downloaded processed fMRI: {smoothed_path}")
else:
    print("⚠ No smoothed files found in S3. Did Lambda processing complete?")

## 3. Load and Explore fMRI Data

In [None]:
# Load fMRI data
fmri_path = os.path.join(LOCAL_RESULTS_DIR, 'fmri_smoothed.nii.gz')

if os.path.exists(fmri_path):
    print(f"Loading {fmri_path}")
    img = nib.load(fmri_path)
    fmri_data = img.get_fdata()
    affine = img.affine
    
    print(f"\nfMRI Data Info:")
    print(f"  Shape: {fmri_data.shape}")
    print(f"  Data type: {fmri_data.dtype}")
    print(f"  Range: [{np.min(fmri_data):.3f}, {np.max(fmri_data):.3f}]")
    print(f"  Mean: {np.mean(fmri_data):.3f}")
    print(f"  Std: {np.std(fmri_data):.3f}")
    print(f"  Affine shape: {affine.shape}")
else:
    print(f"File not found: {fmri_path}")

In [None]:
# Extract brain voxels (non-zero)
# Reshape 4D (x, y, z, time) to 2D (voxels, time)
x, y, z, t = fmri_data.shape
print(f"Extracting voxel time series...")

# Reshape to (voxels, timepoints)
fmri_2d = fmri_data.reshape(-1, t)
print(f"Reshaped fMRI: {fmri_2d.shape}")

# Remove voxels with zero variance (non-brain voxels)
voxel_var = np.var(fmri_2d, axis=1)
brain_mask = voxel_var > 0
fmri_brain = fmri_2d[brain_mask, :]

print(f"Brain voxels: {fmri_brain.shape[0]}")
print(f"Timepoints: {fmri_brain.shape[1]}")

## 4. Calculate Functional Connectivity

In [None]:
# Standardize data (z-score normalization)
print("Standardizing fMRI data...")
fmri_standardized = stats.zscore(fmri_brain, axis=1)

# Compute correlation matrix (functional connectivity)
print("Computing functional connectivity matrix...")
connectivity = np.corrcoef(fmri_standardized)
print(f"Connectivity matrix shape: {connectivity.shape}")

# Handle NaN values
connectivity = np.nan_to_num(connectivity, nan=0.0)

print(f"Connectivity range: [{np.min(connectivity):.3f}, {np.max(connectivity):.3f}]")
print(f"Connectivity mean: {np.mean(connectivity):.3f}")

In [None]:
# Subsample connectivity matrix for visualization (too large otherwise)
# Select every Nth voxel
subsample_factor = 10
indices = np.arange(0, connectivity.shape[0], subsample_factor)
connectivity_sub = connectivity[np.ix_(indices, indices)]

print(f"Subsampled connectivity shape: {connectivity_sub.shape}")

# Create heatmap
fig, ax = plt.subplots(figsize=(10, 8))
im = ax.imshow(connectivity_sub, cmap='coolwarm', vmin=-1, vmax=1)
ax.set_title('Functional Connectivity Matrix (Subsampled)', fontsize=14, fontweight='bold')
ax.set_xlabel('Voxel Index')
ax.set_ylabel('Voxel Index')
cbar = plt.colorbar(im, ax=ax)
cbar.set_label('Pearson Correlation')
plt.tight_layout()
plt.show()

print("✓ Connectivity heatmap created")

## 5. Network Statistics

In [None]:
# Calculate network statistics
print("Computing network statistics...")

# Threshold connectivity at 0.5 correlation
threshold = 0.5
connectivity_thresholded = connectivity.copy()
connectivity_thresholded[np.abs(connectivity_thresholded) < threshold] = 0

# Degree (number of connections per voxel)
degree = np.sum(connectivity_thresholded > 0, axis=1)

# Strength (sum of connection weights)
strength = np.sum(np.abs(connectivity_thresholded), axis=1)

# Clustering coefficient
def clustering_coefficient(adj_matrix):
    """Calculate clustering coefficient."""
    cc = []
    n = adj_matrix.shape[0]
    for i in range(min(n, 100)):  # Compute for subset (faster)
        neighbors = np.where(adj_matrix[i] > 0)[0]
        if len(neighbors) > 1:
            subgraph = adj_matrix[np.ix_(neighbors, neighbors)]
            c = np.sum(subgraph > 0) / (len(neighbors) * (len(neighbors) - 1))
            cc.append(c)
    return np.mean(cc) if cc else 0

cc = clustering_coefficient(connectivity_thresholded)

print(f"\nNetwork Statistics (threshold={threshold}):")
print(f"  Total voxels: {len(degree)}")
print(f"  Mean degree: {np.mean(degree):.2f}")
print(f"  Max degree: {np.max(degree):.0f}")
print(f"  Mean strength: {np.mean(strength):.3f}")
print(f"  Clustering coefficient: {cc:.3f}")

In [None]:
# Identify hub voxels (high degree)
hub_percentile = 95
hub_threshold = np.percentile(degree, hub_percentile)
hub_voxels = np.where(degree >= hub_threshold)[0]

print(f"Hub voxels (top {100-hub_percentile}%): {len(hub_voxels)}")

# Visualize degree distribution
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Degree distribution
axes[0].hist(degree, bins=50, alpha=0.7, edgecolor='black')
axes[0].axvline(np.mean(degree), color='r', linestyle='--', label=f'Mean: {np.mean(degree):.2f}')
axes[0].set_xlabel('Degree (Number of Connections)')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Node Degree Distribution')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Strength distribution
axes[1].hist(strength, bins=50, alpha=0.7, color='green', edgecolor='black')
axes[1].axvline(np.mean(strength), color='r', linestyle='--', label=f'Mean: {np.mean(strength):.3f}')
axes[1].set_xlabel('Strength (Sum of Weights)')
axes[1].set_ylabel('Frequency')
axes[1].set_title('Node Strength Distribution')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

print("✓ Network statistics plots created")

## 6. Time Series Analysis

In [None]:
# Analyze temporal patterns
print("Analyzing temporal patterns...")

# Extract mean time series
mean_timeseries = np.mean(fmri_brain, axis=0)

# Power spectrum analysis
from scipy import signal
freqs, power = signal.periodogram(mean_timeseries)

# Plot time series and power spectrum
fig, axes = plt.subplots(2, 1, figsize=(12, 8))

# Time series
axes[0].plot(mean_timeseries, linewidth=0.5)
axes[0].set_xlabel('Time (TR)')
axes[0].set_ylabel('Mean BOLD Signal')
axes[0].set_title('Mean Brain BOLD Time Series')
axes[0].grid(alpha=0.3)

# Power spectrum (log scale)
axes[1].semilogy(freqs[:len(freqs)//2], power[:len(power)//2])
axes[1].set_xlabel('Frequency (Hz)')
axes[1].set_ylabel('Power Spectral Density')
axes[1].set_title('Power Spectrum of Mean BOLD Signal')
axes[1].grid(alpha=0.3, which='both')

plt.tight_layout()
plt.show()

print("✓ Time series plots created")

## 7. Summary Statistics and Export

In [None]:
# Create summary report
summary = {
    'Analysis Date': pd.Timestamp.now().isoformat(),
    'fMRI Shape': str(fmri_data.shape),
    'Brain Voxels': int(fmri_brain.shape[0]),
    'Timepoints': int(fmri_brain.shape[1]),
    'Mean Signal': float(np.mean(fmri_brain)),
    'Std Signal': float(np.std(fmri_brain)),
    'Signal Range': f"[{np.min(fmri_brain):.3f}, {np.max(fmri_brain):.3f}]",
    'Network - Mean Degree': float(np.mean(degree)),
    'Network - Max Degree': float(np.max(degree)),
    'Network - Mean Strength': float(np.mean(strength)),
    'Network - Clustering Coeff': float(cc),
    'Hub Voxels': int(len(hub_voxels)),
}

summary_df = pd.DataFrame([summary])
print("\nAnalysis Summary:")
print(summary_df.to_string(index=False))

# Save summary
summary_path = os.path.join(LOCAL_RESULTS_DIR, 'analysis_summary.csv')
summary_df.to_csv(summary_path, index=False)
print(f"\n✓ Summary saved to {summary_path}")

In [None]:
# Save connectivity data
connectivity_path = os.path.join(LOCAL_RESULTS_DIR, 'connectivity_matrix.npy')
np.save(connectivity_path, connectivity[:1000, :1000])  # Save first 1000x1000 for size
print(f"✓ Connectivity matrix saved to {connectivity_path}")

# Save network metrics
metrics_path = os.path.join(LOCAL_RESULTS_DIR, 'network_metrics.csv')
metrics_df = pd.DataFrame({
    'voxel_id': np.arange(len(degree)),
    'degree': degree,
    'strength': strength
})
metrics_df.to_csv(metrics_path, index=False)
print(f"✓ Network metrics saved to {metrics_path}")

## 8. Next Steps and Cleanup

After completing this analysis:

1. **Review Results:** Check analysis_summary.csv and visualizations
2. **Save Important Files:** Keep any results you want to preserve
3. **Cleanup AWS Resources:** Run the cleanup script to avoid charges

```bash
cd /path/to/tier-2/
python scripts/cleanup.py \
    --input-bucket fmri-input-{your-username} \
    --output-bucket fmri-output-{your-username} \
    --lambda-function fmri-preprocessor \
    --iam-role lambda-fmri-processor \
    --confirm
```

**Important:** This will delete all S3 buckets and AWS resources!

In [None]:
print("\n" + "="*60)
print("Analysis Complete!")
print("="*60)
print(f"\nResults saved to: {LOCAL_RESULTS_DIR}")
print(f"\nGenerated files:")
for file in os.listdir(LOCAL_RESULTS_DIR):
    path = os.path.join(LOCAL_RESULTS_DIR, file)
    if os.path.isfile(path):
        size = os.path.getsize(path) / 1e6
        print(f"  - {file} ({size:.2f}MB)")