In [None]:
# Cell 1: Clear cache and reinstall from main branch
!pip cache purge
!pip install --no-cache-dir git+https://github.com/nicenzhou/edge-gwas.git@main

[0mFiles removed: 0
Collecting git+https://github.com/nicenzhou/edge-gwas.git@main
  Cloning https://github.com/nicenzhou/edge-gwas.git (to revision main) to /private/var/folders/x_/6k45gs0x2l10rxc3kvrwlly80000gn/T/pip-req-build-spf6zqsg
  Running command git clone --filter=blob:none --quiet https://github.com/nicenzhou/edge-gwas.git /private/var/folders/x_/6k45gs0x2l10rxc3kvrwlly80000gn/T/pip-req-build-spf6zqsg
  Resolved https://github.com/nicenzhou/edge-gwas.git to commit 314249cc06f6ec145e741ce670634cada7ba5ac5
  Preparing metadata (setup.py) ... [?25ldone




In [None]:
# Cell 2: Load the functions from package
from edge_gwas import *
import pandas as pd
from IPython.display import Image, display

In [None]:
# Cell 3: Download test files to default 'tests' directory
download_test_files(version='main',overwrite=True)

In [None]:
# Cell 4: Load data
geno, info = load_plink_data('tests/test.bed', 'tests/test.bim', 'tests/test.fam')

In [None]:
# Cell 5: Prepare phenotype (adjust column names as needed)
# First, check what columns are in test.pheno
pheno_raw = pd.read_csv('tests/test.phen', sep='\t')

# Then prepare phenotype with correct column names
pheno = prepare_phenotype_data(
    'tests/test.phen', 
    outcome_col='disease',
    covariate_cols=[],  # Empty list - no covariates
    sep=' '
)

In [None]:
# Cell 6: Split data
from edge_gwas.utils import stratified_train_test_split
train_g, test_g, train_p, test_p = stratified_train_test_split(
    geno, pheno, 'disease', 
    test_size=0.5, 
    random_state=42,
    geno_id_col='sample_id',
    pheno_id_col='IID',
    is_binary=False
)

In [None]:
# Cell 7: Run EDGE analysis
edge = EDGEAnalysis(outcome_type='continuous', n_jobs=-1)
alpha_df, gwas_df = edge.run_full_analysis(
    train_g, train_p, test_g, test_p,
    outcome='disease',
    covariates=[]
)

In [None]:
# Cell 8: Display results
print("\nTop significant SNPs:")
print(gwas_df.nsmallest(10, 'pval'))

In [None]:
# Cell 9: Visualize
# NO CHROM INFO DO NOT RUN
#manhattan_plot(gwas_df, 'manhattan.png')
#display(Image('manhattan.png'))

lambda_gc = qq_plot(gwas_df, 'qq.png')
print(f"Lambda GC: {lambda_gc:.3f}")
display(Image('qq.png'))