- **Purpose:** Explore the loompy package
- **Date:** September 10, 2025

# Step 1
Setting it up

In [1]:
# Install the package
!pip install -U loompy

Collecting loompy
  Downloading loompy-3.0.8.tar.gz (49 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/49.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.6/49.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting numpy-groupies (from loompy)
  Downloading numpy_groupies-0.11.3-py3-none-any.whl.metadata (18 kB)
Downloading numpy_groupies-0.11.3-py3-none-any.whl (40 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: loompy
  Building wheel for loompy (setup.py) ... [?25l[?25hdone
  Created wheel for loompy: filename=loompy-3.0.8-py3-none-any.whl size=54012 sha256=50188f26735059e0e80ee152bc131748628151d65b2759d1f558378e9c71d5a9
  Stored in directory: /root/.cache/pip/wheels/2e/45/28/2c7afc7f9eab47612e1b5090cfb9ba0f007776250

In [2]:
# Download loom file
!wget http://loom.linnarssonlab.org/clone/Previously%20Published/Oligos.loom

--2025-09-10 09:28:01--  http://loom.linnarssonlab.org/clone/Previously%20Published/Oligos.loom
Resolving loom.linnarssonlab.org (loom.linnarssonlab.org)... 104.197.181.233
Connecting to loom.linnarssonlab.org (loom.linnarssonlab.org)|104.197.181.233|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 18867689 (18M) [application/octet-stream]
Saving to: ‘Oligos.loom’


2025-09-10 09:28:01 (93.0 MB/s) - ‘Oligos.loom’ saved [18867689/18867689]



In [3]:
# Import packages
import loompy as lp
import numpy as np
import pandas as pd

In [4]:
# Read the loom file
oligo = lp.connect("Oligos.loom")
oligo

0,1,2,3,4,5,6,7,8,9,10,11,12
,cell_id,C1-1771017-032-E11,C1-1862091-035-C04,C1-1771017-030-G04,C1-1772089-196-A01,C1-1862091-032-F04,C1-1771017-032-C02,C1-1772099-078-B10,C1-1772072-243-D07,C1-1862091-105-E12,C1-1772099-048-E04,...
Gene,,,,,,,,,,,,...
Xkr4,,0,0,0,0,0,0,0,1,0,0,...
Rp1,,0,0,0,0,0,0,0,0,0,0,...
Sox17,,0,0,0,0,0,0,0,0,0,0,...
Mrpl15,,0,1,0,0,0,0,0,0,1,0,...
Lypla1,,0,0,0,0,0,0,0,0,0,0,...
Rgs20,,0,0,0,0,0,0,0,0,0,0,...
Oprk1,,0,0,0,0,0,0,0,0,0,0,...
Npbwr1,,0,0,0,0,0,0,0,0,0,0,...


In [5]:
# Close the loom file
oligo.close()

In [8]:
# Filter by one of the genes
oligo[oligo.ra.Gene == "Olig2", :]

array([[0, 2, 6, ..., 0, 0, 0]])

# Step 2
Understanding the semantics of loom files

## 1. Connecting and saving


*   Loom files are **not fully loaded into memory**, only the metadata is loaded
*   Loom files are like **databases** that users can connect to
*   When the work is done, the connection needs to be **closed**





## 2. Reading and writing


*   Reading and writing are not concurrently supported
*   Not suitable for storing data



## 3. Efficient indexing and compression


*   The main matrix is stored in **chunked** format
*   The chunks are automatically **compressed** and **decompressed**





# 4. Matrix and attributes


*   2D matrix
*   Columns can be added but not rows



## 5. Data types


*   The **main matrix** and any additional layers are 2D arrays of numbers which can be 8, 16, 32 or 64-bit signed or unsigned integer or 16, 32 or 64-bit floats
*   **Attirbutes** are N-dimensional arrays. The size of the first dimension must match the corresponding matrix dimension

# Step 3
Creating loom file from fastq file

In [36]:
# Download kallisto
!wget https://github.com/pachterlab/kallisto/releases/download/v0.46.0/kallisto_linux-v0.46.0.tar.gz
!tar -xf kallisto_linux-v0.46.0.tar.gz
!cp kallisto/kallisto /usr/local/bin/

--2025-09-10 03:01:55--  https://github.com/pachterlab/kallisto/releases/download/v0.46.0/kallisto_linux-v0.46.0.tar.gz
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/26562905/8cbbf280-8ca9-11e9-8c32-bec32f378e41?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-09-10T04%3A01%3A02Z&rscd=attachment%3B+filename%3Dkallisto_linux-v0.46.0.tar.gz&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-09-10T03%3A00%3A12Z&ske=2025-09-10T04%3A01%3A02Z&sks=b&skv=2018-11-09&sig=ObrZEYkQFLqOv1bh97BTF7Nttf4ZyajLgiCsFo5oiKY%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1NzQ3MzU3OSwibmJmIjoxNzU3NDczMjc5LCJwYXRoIjoicmVsZW

In [38]:
# Download bustools
!wget https://github.com/BUStools/bustools/releases/download/v0.39.3/bustools_linux-v0.39.3.tar.gz
!tar -xf bustools_linux-v0.39.3.tar.gz
!cp bustools/bustools /usr/local/bin/

--2025-09-10 03:04:01--  https://github.com/BUStools/bustools/releases/download/v0.39.3/bustools_linux-v0.39.3.tar.gz
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/157250571/bf828d00-b3b4-11e9-801d-3356b95223ea?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-09-10T03%3A59%3A36Z&rscd=attachment%3B+filename%3Dbustools_linux-v0.39.3.tar.gz&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-09-10T02%3A59%3A22Z&ske=2025-09-10T03%3A59%3A36Z&sks=b&skv=2018-11-09&sig=sVe7aOGMx1B3ZbFb6quBCpkviSkuk%2FlCu1v6P2xj%2FcA%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1NzQ3Mzc0MSwibmJmIjoxNzU3NDczNDQxLCJwYXRoIjoicmV

In [9]:
# Download pre-built index of human genome
!wget https://storage.googleapis.com/linnarsson-lab-www-blobs/human_GRCh38_gencode.v31.tar.gz

--2025-09-10 02:25:55--  https://storage.googleapis.com/linnarsson-lab-www-blobs/human_GRCh38_gencode.v31.tar.gz
Resolving storage.googleapis.com (storage.googleapis.com)... 192.178.219.207, 108.177.12.207, 74.125.26.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|192.178.219.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5007735860 (4.7G) [application/x-tar]
Saving to: ‘human_GRCh38_gencode.v31.tar.gz’


2025-09-10 02:26:50 (86.8 MB/s) - ‘human_GRCh38_gencode.v31.tar.gz’ saved [5007735860/5007735860]



In [15]:
# Decompress the folder
!tar -xf human_GRCh38_gencode.v31.tar.gz

In [17]:
# List the contents of the decompressed directory
!ls -1 human_GRCh38_gencode.v31.600

10xv1_whitelist.txt
10xv2_whitelist.txt
10xv3_whitelist.txt
fragments2genes.txt
gencode.v31.fragments.idx
gencode.v31.metadata.tab
manifest.json
spliced_fragments.txt
unspliced_fragments.txt


In [18]:
# Download metadata and fastq files
!wget http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_fastqs.tar

--2025-09-10 02:34:08--  http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_fastqs.tar
Resolving cf.10xgenomics.com (cf.10xgenomics.com)... 104.18.1.173, 104.18.0.173, 2606:4700::6812:ad, ...
Connecting to cf.10xgenomics.com (cf.10xgenomics.com)|104.18.1.173|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_fastqs.tar [following]
--2025-09-10 02:34:08--  https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_fastqs.tar
Connecting to cf.10xgenomics.com (cf.10xgenomics.com)|104.18.1.173|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5549312000 (5.2G) [application/x-tar]
Saving to: ‘pbmc_1k_v3_fastqs.tar’


2025-09-10 02:38:24 (20.7 MB/s) - ‘pbmc_1k_v3_fastqs.tar’ saved [5549312000/5549312000]



In [19]:
# Extract the files
!tar -xvf pbmc_1k_v3_fastqs.tar

pbmc_1k_v3_fastqs/
pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L001_R2_001.fastq.gz
pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L002_I1_001.fastq.gz
pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L001_R1_001.fastq.gz
pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L002_R1_001.fastq.gz
pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L002_R2_001.fastq.gz
pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L001_I1_001.fastq.gz


In [39]:
# Create the loom file
!loompy fromfq 1kPBMC.loom 1kPBMC human_GRCh38_gencode.v31.600 metadata.tab pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L001_R1_001.fastq.gz pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L001_R2_001.fastq.gz pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L002_R1_001.fastq.gz pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L002_R2_001.fastq.gz

Loompy v3.0.8 by Linnarsson Lab 🌸 (http://linnarssonlab.org & http://loompy.org)

2025-09-10 03:04:13,728 - INFO - Using 2 threads.
2025-09-10 03:04:13,729 - INFO - kallisto bus -i human_GRCh38_gencode.v31.600/gencode.v31.fragments.idx -o /tmp/tmp5hfmfo5d -x 10xv3 -t 2 pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L001_R1_001.fastq.gz pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L001_R2_001.fastq.gz pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L002_R1_001.fastq.gz pbmc_1k_v3_fastqs/pbmc_1k_v3_S1_L002_R2_001.fastq.gz
2025-09-10 03:04:13,781 - INFO - [index] k-mer length: 31
2025-09-10 03:04:13,781 - INFO - [index] number of targets: 845,338
2025-09-10 03:04:13,782 - INFO - [index] number of k-mers: 271,648,279
Traceback (most recent call last):
  File "/usr/local/bin/loompy", line 8, in <module>
    sys.exit(cli())
             ^^^^^
  File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1442, in __call__
    return self.main(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.1

**Comment:** Encountered out of memory error

# Step 4
API walkthrough

In [17]:
# Creating loom file
## The following inputs are required
#1. A main matrix --> numpy dense array or scipy sparse array
#2. Two dictionaries containing the attributes for the row and the column
filename = "test_file.loom"
main_mat = np.arange(10000).reshape(100, 100)
row_att = {"RowAttr": ["row" + str(num) for num in list(range(1, 101))]}
col_att = {"ColAttr": ["col" + str(num) for num in list(range(1, 101))]}
lp.create(filename, main_mat, row_attrs=row_att, col_attrs=col_att)

In [19]:
# Connecting to loompy file
lp_file = lp.connect("test_file.loom")
lp_file

0,1,2,3,4,5,6,7,8,9,10,11,12
,ColAttr,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,...
RowAttr,,,,,,,,,,,,...
row1,,0,1,2,3,4,5,6,7,8,9,...
row2,,100,101,102,103,104,105,106,107,108,109,...
row3,,200,201,202,203,204,205,206,207,208,209,...
row4,,300,301,302,303,304,305,306,307,308,309,...
row5,,400,401,402,403,404,405,406,407,408,409,...
row6,,500,501,502,503,504,505,506,507,508,509,...
row7,,600,601,602,603,604,605,606,607,608,609,...
row8,,700,701,702,703,704,705,706,707,708,709,...


In [20]:
# Shape of the object
lp_file.shape

(100, 100)

In [9]:
# Global attribute keys
with lp.connect("Oligos.loom") as ds:
  print(ds.attrs.keys())

['CreationDate', 'description', 'doi', 'last_modified', 'title', 'url']


In [8]:
# Title of the attributes
with lp.connect("Oligos.loom") as ds:
  ds.attrs.title = "Oligos"
  print(ds.attrs.title)

Oligos


In [10]:
# Row attributes
with lp.connect("Oligos.loom") as ds:
  print(ds.ra.keys())

['Gene']


In [11]:
# Column attribute
with lp.connect("Oligos.loom") as ds:
  print(ds.ca.keys())

['cell_id']


In [12]:
# Look at the layers
with lp.connect("Oligos.loom") as ds:
  print(ds.layers.keys())

['']


In [14]:
# Graphs
with lp.connect("Oligos.loom") as ds:
  print(ds.row_graphs.keys())
  print(ds.col_graphs.keys())

[]
[]


In [None]:
# The scan method enables scanning an axis of the loom file and retrieve information

In [15]:
# Pacakge versions
print("numpy version: ", np.__version__)
print("pandas version: ", pd.__version__)
print("loompy version: ", lp.__version__)

numpy version:  2.0.2
pandas version:  2.2.2
loompy version:  3.0.8
