# 🧬 Omics AI Explorer - Quick Start

Simple demo using the CoLoRS collection on HiFi Solves.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mfiume/omics-ai-python-library/blob/main/Omics_AI_Explorer_Quick_Start.ipynb)

In [6]:
# Install and import
!pip install git+https://github.com/mfiume/omics-ai-python-library.git --quiet
from omics_ai import list_collections, list_tables, get_schema_fields, query
print("Ready!")

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Ready!


In [7]:
# List collections on HiFi Solves
collections = list_collections("hifisolves")
print(f"Found {len(collections)} collections")
print(f"First: {collections[0]['name']}")

Found 14 collections
First: Radboud UMC - Denovo mutation trio's


In [8]:
# Explore CoLoRS collection
collection = "consortium-of-long-read-sequencing-colors"
tables = list_tables("hifisolves", collection)
print(f"Tables in CoLoRS: {len(tables)}")
for table in tables:
    print(f"- {table['display_name']}")

Tables in CoLoRS: 4
- colors_structural_variants
- colors_gene_bounds
- colors_small_variants
- colors_annotations


In [9]:
# Query variants data
table = "collections.consortium_of_long_read_sequencing_colors.colors_small_variants"
result = query("hifisolves", collection, table, limit=10)

data = result['data']
print(f"Retrieved {len(data)} variants")

# Display as table
import pandas as pd
df = pd.DataFrame(data)
print(f"\nVariants table ({df.shape[0]} rows, {df.shape[1]} columns):")
display(df.head(10))

Retrieved 10 variants

Variants table (10 rows, 17 columns):


Unnamed: 0,chrom,pos,id,ref,alt,qual,filt,af,ac,an,ns,ac_hom,ac_het,ac_hemi,hwe,exchet,nhomalt
0,chrM,9053,.,G,A,-10.0,[],[0.0173787],[24],1381,1381,[0],[0],[24],[1.0],[1.0],[24]
1,chrM,9058,.,A,G,-10.0,[],[0.00217234],[3],1381,1381,[0],[0],[3],[1.0],[1.0],[3]
2,chrM,9072,.,A,G,-10.0,[],[0.00506879],[7],1381,1381,[0],[0],[7],[1.0],[1.0],[7]
3,chrM,9078,.,T,C,-10.0,[],[0.00217234],[3],1381,1381,[0],[0],[3],[1.0],[1.0],[3]
4,chrM,9090,.,T,C,-10.0,[],[0.00362056],[5],1381,1381,[0],[0],[5],[1.0],[1.0],[5]
5,chrM,9055,.,G,A,-10.0,[],[0.0369298],[51],1381,1381,[0],[0],[51],[1.0],[1.0],[51]
6,chrM,9056,.,C,T,-10.0,[],[0.00144823],[2],1381,1381,[0],[0],[2],[1.0],[1.0],[2]
7,chrM,9093,.,A,G,-10.0,[],[0.00362056],[5],1381,1381,[0],[0],[5],[1.0],[1.0],[5]
8,chrM,9094,.,C,T,-10.0,[],[0.00289645],[4],1381,1381,[0],[0],[4],[1.0],[1.0],[4]
9,chrM,9098,.,T,C,-10.0,[],[0.00217234],[3],1381,1381,[0],[0],[3],[1.0],[1.0],[3]


In [10]:
# Query with filters - chromosome 1 only
filters = {
    "chrom": [{
        "operation": "EQ",
        "value": "chr1",
        "type": "STRING"
    }]
}

result_filtered = query("hifisolves", collection, table, filters=filters, limit=10)
data_filtered = result_filtered['data']
print(f"Retrieved {len(data_filtered)} variants from chromosome 1")

# Display filtered results as table
if data_filtered:
    df_filtered = pd.DataFrame(data_filtered)
    print(f"\nChromosome 1 variants ({df_filtered.shape[0]} rows, {df_filtered.shape[1]} columns):")
    display(df_filtered.head(10))
else:
    print("No variants found on chromosome 1")

Retrieved 10 variants from chromosome 1

Chromosome 1 variants (10 rows, 17 columns):


Unnamed: 0,chrom,pos,id,ref,alt,qual,filt,af,ac,an,ns,ac_hom,ac_het,ac_hemi,hwe,exchet,nhomalt
0,chr1,247698758,.,ATTAT,A,-10.0,[],[0.0416365],[115],2762,1381,[12],[103],[0],[0.0278746],[0.992229],[6]
1,chr1,247698758,.,ATTATAT,A,-10.0,[],[0.0242578],[67],2762,1381,[2],[65],[0],[0.559718],[0.810421],[1]
2,chr1,247698758,.,ATTATATAT,A,-10.0,[],[0.025706],[71],2762,1381,[4],[67],[0],[0.226406],[0.941865],[2]
3,chr1,247698758,.,ATTATATATATAT,A,-10.0,[],[0.00470673],[13],2762,1381,[0],[13],[0],[1.0],[0.97203],[0]
4,chr1,247698758,.,ATTATATATATATATATAT,A,-10.0,[],[0.00108617],[3],2762,1381,[0],[3],[0],[1.0],[0.998913],[0]
5,chr1,247698758,.,ATTATATATATATATATATATATATAT,A,-10.0,[],[0.00362056],[10],2762,1381,[0],[10],[0],[1.0],[0.983784],[0]
6,chr1,247698758,.,ATTATATATAT,A,-10.0,[],[0.00651702],[18],2762,1381,[4],[14],[0],[0.00117882],[0.999987],[2]
7,chr1,247698758,.,ATTATATATATATAT,A,-10.0,[],[0.00325851],[9],2762,1381,[0],[9],[0],[1.0],[0.987011],[0]
8,chr1,247698758,.,ATTATATATATATATATATATATAT,A,-10.0,[],[0.00325851],[9],2762,1381,[0],[9],[0],[1.0],[0.987011],[0]
9,chr1,247698758,.,ATTATATATATATATAT,A,-10.0,[],[0.00181028],[5],2762,1381,[0],[5],[0],[1.0],[0.99638],[0]


#Check other networks

In [12]:
for network in ["viral", "neuroscience", "biomedical", "hifisolves", "targetals", "asap"]:
  try:
    collections = list_collections(network)
    print(f"{network}: {len(collections)} collections")
  except Exception as e:
    print(f"{network}: error")

viral: 18 collections
neuroscience: 58 collections
biomedical: 15 collections
hifisolves: 14 collections
targetals: 3 collections
asap: 11 collections
