In [1]:
import scanpy as sc

# Define the input and output file paths
input_file = "raw_data/sc_cll_3ca.h5ad"
output_file = "raw_data/normalized_sc_cll_3ca.h5ad"

# Load the .h5ad file
adata = sc.read_h5ad(input_file)
print(adata.X)

# Perform normalization without scaling to a specific target
scales_counts = sc.pp.normalize_total(adata, target_sum=1, inplace=False)

# Log-transform the normalized counts
adata.X = sc.pp.log1p(scales_counts["X"], copy=True)

# Save the normalized data to the output file
adata.write_h5ad(output_file)

print(f"Normalized data from {input_file} and saved to {output_file}")

[[0.02956717 0.23196451 0.02567503 ... 0.06111212 0.36055468 2.56828675]
 [0.01488    0.09161613 0.57748051 ... 0.34271102 3.94986387 0.07784286]
 [0.04430719 0.41969181 0.04911118 ... 0.11584567 3.33900653 0.37157311]
 ...
 [0.0507351  0.53274201 0.06490431 ... 0.15026108 3.6416971  2.02400483]
 [0.06831485 1.06571155 0.15500656 ... 0.33645325 1.51296417 3.35746628]
 [0.03730889 4.20228027 0.03634471 ... 0.08528911 0.48749039 1.05628618]]
Normalized data from raw_data/sc_cll_3ca.h5ad and saved to raw_data/normalized_sc_cll_3ca.h5ad


In [4]:
import scanpy as sc

# Load the normalized .h5ad file
adata_norm = sc.read_h5ad('raw_data/normalized_sc_cll_3ca.h5ad')

# Print the AnnData object to see a summary of its contents
print(adata_norm.X)

# Display the shape of the data matrix (cells x genes)
print("Data matrix shape (cells x genes):", adata_norm.shape)

# Show the first few rows of the expression matrix (X)
print("Expression matrix (X):")
print(adata_norm.X[:5, :5])  # Show a subset (first 5 rows and columns) for brevity

# Show the first few rows of the cell metadata (obs)
print("Cell metadata (obs):")
print(adata_norm.obs.head())

# Show the first few rows of the gene metadata (var)
print("Gene metadata (var):")
print(adata_norm.var.head())

# Check for any additional metadata stored in `uns`
print("Unstructured metadata (uns):")
print(adata_norm.uns)

# Check for any multi-dimensional observations (obsm) and variables (varm)
print("Multi-dimensional cell metadata (obsm):")
print(adata_norm.obsm)

print("Multi-dimensional gene metadata (varm):")
print(adata_norm.varm)

  (35, 0)	2.1716589344578097e-06
  (45, 0)	6.983654657233274e-06
  (47, 0)	1.0999990081472788e-05
  (53, 0)	3.099645937254536e-06
  (62, 0)	3.6398248539626366e-06
  (105, 0)	5.384058113122592e-06
  (162, 0)	8.141565558617003e-06
  (165, 0)	6.885463790240465e-06
  (193, 0)	7.095696219039382e-06
  (194, 0)	3.506391294649802e-06
  (240, 0)	1.1892020665982272e-05
  (247, 0)	2.679899444046896e-05
  (251, 0)	7.542814728367375e-06
  (269, 0)	2.7825196866615443e-06
  (330, 0)	6.026619757903973e-06
  (359, 0)	7.938177077448927e-06
  (396, 0)	2.9897601052653044e-05
  (408, 0)	5.011329267290421e-05
  (422, 0)	1.1469336641312111e-05
  (427, 0)	3.69338272321329e-06
  (429, 0)	4.492816970014246e-06
  (462, 0)	4.829063072975259e-06
  (473, 0)	1.003618035610998e-05
  (511, 0)	1.568522748129908e-05
  (524, 0)	4.469603481993545e-06
  :	:
  (655, 55763)	0.0005696585867553949
  (661, 55763)	0.00034831068478524685
  (665, 55763)	0.0002749518898781389
  (668, 55763)	5.609626168734394e-05
  (673, 55763)	0.00