In [1]:
#!/usr/bin/env python3
"""
Test script for understanding and testing the basic data classes
"""

import numpy as np
from dataclasses import dataclass, asdict
from typing import List

@dataclass
class Marker:
    """
    A genetic marker is like a signpost on a chromosome.
    Think of it as a specific location where we can read the DNA.
    
    What it stores:
    - id: A name for this marker (like "SNP_1")
    - physical_position: Where it sits on the chromosome in base pairs (like street address)
    - genetic_position: Where it sits in genetic distance (centiMorgans - related to recombination)
    """
    id: str
    physical_position: float  # in base pairs
    genetic_position: float   # in centiMorgans

@dataclass
class Chromosome:
    """
    A chromosome is like a string of DNA with markers placed on it.
    
    What it stores:
    - id: Chromosome number (1, 2, 3, etc.)
    - physical_length_bp: Total length in base pairs
    - genetic_length_cM: Total genetic length (affects recombination probability)
    - markers: List of markers placed on this chromosome
    """
    id: int
    physical_length_bp: float
    genetic_length_cM: float
    markers: List[Marker]

def test_data_classes():
    """Test the basic data structures"""
    print("=== TESTING DATA CLASSES ===\n")
    
    # Create some test markers
    print("1. Creating markers...")
    marker1 = Marker(id="SNP_001", physical_position=1000000, genetic_position=5.0)
    marker2 = Marker(id="SNP_002", physical_position=5000000, genetic_position=25.0)
    marker3 = Marker(id="SNP_003", physical_position=9000000, genetic_position=45.0)
    
    print(f"Marker 1: {marker1}")
    print(f"Marker 2: {marker2}")
    print(f"Marker 3: {marker3}")
    
    print("\n2. Creating a chromosome with these markers...")
    chromosome = Chromosome(
        id=1,
        physical_length_bp=10_000_000,  # 10 million base pairs
        genetic_length_cM=50.0,         # 50 centiMorgans
        markers=[marker1, marker2, marker3]
    )
    
    print(f"Chromosome: {chromosome}")
    
    print("\n3. Accessing chromosome properties...")
    print(f"Chromosome ID: {chromosome.id}")
    print(f"Physical length: {chromosome.physical_length_bp:,} bp")
    print(f"Genetic length: {chromosome.genetic_length_cM} cM")
    print(f"Number of markers: {len(chromosome.markers)}")
    
    print("\n4. Accessing individual markers on the chromosome...")
    for i, marker in enumerate(chromosome.markers):
        print(f"  Marker {i+1}: {marker.id} at {marker.physical_position:,} bp ({marker.genetic_position} cM)")
    
    print("\n5. Converting to dictionary (for JSON export)...")
    chrom_dict = asdict(chromosome)
    print(f"As dictionary: {chrom_dict}")
    
    return chromosome

def test_marker_spacing():
    """Test how markers are spaced along chromosomes"""
    print("\n=== TESTING MARKER SPACING ===\n")
    
    # Create a chromosome
    chrom_length_bp = 100_000_000  # 100 million bp
    chrom_length_cM = 100.0        # 100 cM
    n_markers = 5
    
    print(f"Creating chromosome: {chrom_length_bp:,} bp, {chrom_length_cM} cM")
    print(f"Placing {n_markers} markers uniformly...")
    
    markers = []
    for i in range(n_markers):
        # Uniform spacing (avoiding chromosome ends)
        physical_pos = chrom_length_bp * ((i + 1) / (n_markers + 1))
        genetic_pos = chrom_length_cM * ((i + 1) / (n_markers + 1))
        
        marker = Marker(
            id=f"chr1_marker_{i+1}",
            physical_position=physical_pos,
            genetic_position=genetic_pos
        )
        markers.append(marker)
        
        print(f"  {marker.id}: {physical_pos:,.0f} bp ({genetic_pos:.1f} cM)")
    
    chromosome = Chromosome(
        id=1,
        physical_length_bp=chrom_length_bp,
        genetic_length_cM=chrom_length_cM,
        markers=markers
    )
    
    print(f"\nCreated chromosome with {len(chromosome.markers)} markers")
    
    # Test genetic distances between markers
    print("\nGenetic distances between adjacent markers:")
    for i in range(len(markers) - 1):
        dist = markers[i+1].genetic_position - markers[i].genetic_position
        print(f"  {markers[i].id} to {markers[i+1].id}: {dist:.1f} cM")
    
    return chromosome

if __name__ == "__main__":
    # Run tests
    test_chromosome = test_data_classes()
    test_spacing_chromosome = test_marker_spacing()
    
    print("\n=== SUMMARY ===")
    print("✓ Markers store ID, physical position (bp), and genetic position (cM)")
    print("✓ Chromosomes store ID, lengths, and a list of markers")
    print("✓ Uniform marker spacing distributes markers evenly")
    print("✓ Data classes can be converted to dictionaries for export")

=== TESTING DATA CLASSES ===

1. Creating markers...
Marker 1: Marker(id='SNP_001', physical_position=1000000, genetic_position=5.0)
Marker 2: Marker(id='SNP_002', physical_position=5000000, genetic_position=25.0)
Marker 3: Marker(id='SNP_003', physical_position=9000000, genetic_position=45.0)

2. Creating a chromosome with these markers...
Chromosome: Chromosome(id=1, physical_length_bp=10000000, genetic_length_cM=50.0, markers=[Marker(id='SNP_001', physical_position=1000000, genetic_position=5.0), Marker(id='SNP_002', physical_position=5000000, genetic_position=25.0), Marker(id='SNP_003', physical_position=9000000, genetic_position=45.0)])

3. Accessing chromosome properties...
Chromosome ID: 1
Physical length: 10,000,000 bp
Genetic length: 50.0 cM
Number of markers: 3

4. Accessing individual markers on the chromosome...
  Marker 1: SNP_001 at 1,000,000 bp (5.0 cM)
  Marker 2: SNP_002 at 5,000,000 bp (25.0 cM)
  Marker 3: SNP_003 at 9,000,000 bp (45.0 cM)

5. Converting to dictionar