# Download Hetionet Null Graphs

This notebook downloads the pre-computed null graphs (permutations) from the Hetionet repository.

**Source**: https://github.com/hetio/hetionet/blob/a95ae76581af604e91d744680aee3f888fa18887/hetnet/permuted/matrix/hetionet-v1.0-permutations.zip



In [None]:
# Import all required libraries
import os
import sys
from pathlib import Path

# Add src directory to path for helper functions
repo_dir = Path().cwd().parent
src_dir = repo_dir / "src"
sys.path.insert(0, str(src_dir))

# Import helper functions from src package
from download_utils import (
    download_file,
    extract_zip,
    organize_permutations,
    download_hetionet_permutations
)

# Data manipulation
import pandas as pd

In [None]:
# Configuration - Download only, no permutations directory creation
DOWNLOAD_URL = "https://github.com/hetio/hetionet/raw/a95ae76581af604e91d744680aee3f888fa18887/hetnet/permuted/matrix/hetionet-v1.0-permutations.zip"
REPO_DIR = Path().cwd().parent
DATA_DIR = REPO_DIR / "data"
DOWNLOAD_DIR = DATA_DIR / "downloads"
ZIP_FILENAME = "hetionet-v1.0-permutations.zip"
ZIP_PATH = DOWNLOAD_DIR / ZIP_FILENAME

print(f"Repository directory: {REPO_DIR}")
print(f"Data directory: {DATA_DIR}")
print(f"Download directory: {DOWNLOAD_DIR}")
print(f"Download URL: {DOWNLOAD_URL}")
print(f"\nNote: This notebook will download the file but NOT create permutations/hetio200 directory")

In [None]:
# Create only the download directory
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)

print(f"✓ Created download directory: {DOWNLOAD_DIR}")
print(f"ℹ️  Skipping permutations directory creation as requested")

In [None]:
# Download and extract the file in downloads directory
print("Downloading Hetionet permutations file...")

# Check if file already exists
if ZIP_PATH.exists():
    print(f"✓ File already exists: {ZIP_PATH}")
    print(f"File size: {ZIP_PATH.stat().st_size / (1024*1024):.1f} MB")
else:
    # Download the file
    success = download_file(DOWNLOAD_URL, ZIP_PATH)
    
    if success:
        print(f"✓ Successfully downloaded: {ZIP_PATH}")
        print(f"File size: {ZIP_PATH.stat().st_size / (1024*1024):.1f} MB")
    else:
        print("✗ Download failed")
        exit()

# Extract the ZIP file in the downloads directory
print(f"\nExtracting ZIP file in downloads directory...")
extract_dir = DOWNLOAD_DIR / "hetionet-permutations"

if extract_dir.exists():
    print(f"✓ Already extracted to: {extract_dir}")
else:
    success = extract_zip(ZIP_PATH, extract_dir)
    
    if success:
        print(f"✓ Successfully extracted to: {extract_dir}")
        
        # Show what was extracted
        extracted_items = list(extract_dir.iterdir())
        print(f"✓ Extracted {len(extracted_items)} items")
        
        # Show first few items
        for item in sorted(extracted_items)[:5]:
            if item.is_dir():
                print(f"  📁 {item.name}/")
            else:
                print(f"  📄 {item.name}")
        if len(extracted_items) > 5:
            print(f"  ... and {len(extracted_items) - 5} more items")
    else:
        print("✗ Extraction failed")

print(f"\nℹ️  ZIP file: {ZIP_PATH}")
print(f"ℹ️  Extracted to: {extract_dir}")
print("ℹ️  All files remain in downloads/ directory")
print("ℹ️  No permutations directory created as requested")

In [None]:
# Summary and verification
print("\n" + "="*60)
print("DOWNLOAD SUMMARY")
print("="*60)

# Check if download was successful
if ZIP_PATH.exists():
    file_size_mb = ZIP_PATH.stat().st_size / (1024*1024)
    print(f"✓ Downloaded file: {ZIP_PATH.name}")
    print(f"✓ File size: {file_size_mb:.1f} MB")
    print(f"✓ Location: {ZIP_PATH}")
else:
    print(f"✗ Download file not found: {ZIP_PATH}")

# Check if extraction was successful
extract_dir = DOWNLOAD_DIR / "hetionet-permutations"
if extract_dir.exists():
    extracted_items = list(extract_dir.iterdir())
    print(f"✓ Extracted directory: {extract_dir.name}")
    print(f"✓ Extracted items: {len(extracted_items)}")
    
    # Show directory contents
    for item in sorted(extracted_items)[:10]:
        if item.is_dir():
            sub_items = len(list(item.iterdir())) if item.is_dir() else 0
            print(f"  📁 {item.name}/ ({sub_items} items)")
        else:
            print(f"  📄 {item.name}")
    if len(extracted_items) > 10:
        print(f"  ... and {len(extracted_items) - 10} more items")
else:
    print(f"✗ Extracted directory not found: {extract_dir}")

# Check existing permutations directory
PERMUTATIONS_DIR = DATA_DIR / "permutations"
if PERMUTATIONS_DIR.exists():
    local_items = list(PERMUTATIONS_DIR.iterdir())
    print(f"✓ Local permutations directory: {len(local_items)} items")
    
    # Show some examples
    for item in sorted(local_items)[:5]:
        print(f"  📁 {item.name}/")
    if len(local_items) > 5:
        print(f"  ... and {len(local_items) - 5} more")
else:
    print(f"ℹ️  No local permutations directory found")

print("\n" + "="*60)
print("CURRENT DIRECTORY STRUCTURE")
print("="*60)
print(f"📁 {DATA_DIR.name}/")
print(f"  📁 downloads/")
print(f"    📄 {ZIP_FILENAME}                    <- Downloaded ZIP file")
if extract_dir.exists():
    print(f"    📁 hetionet-permutations/         <- Extracted permutations")
    extracted_items = list(extract_dir.iterdir())
    for item in sorted(extracted_items)[:3]:
        if item.is_dir():
            print(f"      📁 {item.name}/")
        else:
            print(f"      📄 {item.name}")
    if len(extracted_items) > 3:
        print(f"      📁 ...")

if PERMUTATIONS_DIR.exists():
    print(f"  📁 permutations/                   <- Your local permutations")
    for item in sorted(PERMUTATIONS_DIR.iterdir())[:3]:
        print(f"    📁 {item.name}/")
    if len(list(PERMUTATIONS_DIR.iterdir())) > 3:
        print(f"    📁 ...")

print("\n" + "="*60)
print("STATUS")
print("="*60)
print("✓ Download completed")
print("✓ Extraction completed")
print("ℹ️  All files remain in downloads/ directory")
print("ℹ️  No hetio200 directory created (as requested)")
print("ℹ️  Permutations are available in downloads/hetionet-permutations/")

if extract_dir.exists():
    print(f"\n📍 To use these permutations in your analysis:")
    print(f"   Set permutations_subdirectory = 'downloads/hetionet-permutations'")
    print(f"   Or copy specific permutation folders to data/permutations/ if needed")