# TECHIN 509: Melody Generator with Files & Testing

**Name:** Rushav Dash 
**Date:** 11/22/2025

---

## Part 1: Working with Files

### Load Melodies from File

In [14]:
def load_melodies(path: str) -> list[list[str]]:
    """
    Read melodies from a file and return as list of note lists.
    
    Args:
        path: Path to the file containing melodies
        
    Returns:
        List of melodies, where each melody is a list of note strings
    """

    try:
        with open(path, 'r') as file:
            melodies = []
            for line in file:

                # Strip whitespace and skip empty lines
                line = line.strip()

                if line:
                    # Split the line by spaces to get individual notes
                    notes = line.split()
                    melodies.append(notes)

            return melodies
    
    except FileNotFoundError:
        print(f"File not found: {path}")
        print("Please make sure the dataset file exists.")
        return []
    
    except Exception as e:
        print(f"Error reading file: {e}")
        return []

# Test the function with NES dataset format
print("Testing load_melodies function:")
melodies = load_melodies('data/melodies.txt')
print(f"Loaded {len(melodies)} melodies")

if melodies:
    print(f"First melody has {len(melodies[0])} notes")
    print(f"First 5 notes: {melodies[0][:5]}")

Testing load_melodies function:
Loaded 20 melodies
First melody has 27 notes
First 5 notes: ['C3_0.25', 'G3_0.25', 'C4_0.25', 'E4_0.25', 'F4_0.25']


### Save Generated Melodies to File

In [15]:
def save_melodies(melodies: list[list[str]], path: str) -> None:
    """
    Save a list of generated melodies to a file, one melody per line.
    
    Args:
        melodies: List of melodies, where each melody is a list of note strings
        path: Path where the file should be saved
    """

    try:
        with open(path, 'w') as file:
            for melody in melodies:

                # Join notes with spaces and write one melody per line
                line = ' '.join(melody)
                file.write(line + '\n')
                
        print(f"Successfully saved {len(melodies)} melodies to {path}")
    
    except Exception as e:
        print(f"Error saving file: {e}")

# Test the function
test_melodies = [
    ['A#2_0.24', 'D3_0.25', 'F#3_0.25', 'B3_0.25'],
    ['G2_0.25', 'F2_0.25', 'C3_0.0'],
    ['C3_0.25', 'E3_0.25', 'G3_0.25']
]

save_melodies(test_melodies, 'output/test_output.txt')

Successfully saved 3 melodies to output/test_output.txt


### Helper Functions for Working with Note Format

In [17]:
def parse_note(note_str: str) -> tuple[str, float]:
    """
    Parse a note string into note name and duration.
    
    Args:
        note_str: Note in format "NOTE_DURATION" (e.g., "A#2_0.24")
        
    Returns:
        Tuple of (note_name, duration)
    """

    parts = note_str.split('_')

    if len(parts) == 2:
        note_name = parts[0]
        duration = float(parts[1])
        return note_name, duration
    
    else:
        raise ValueError(f"Invalid note format: {note_str}")

# Test the parser
test_notes = ["A#2_0.24", "D3_0.25", "F#3_0.25", "C3_0.0"]
print("Parsing notes:")

for note in test_notes:
    name, duration = parse_note(note)
    print(f"  {note} -> Note: {name}, Duration: {duration}")

Parsing notes:
  A#2_0.24 -> Note: A#2, Duration: 0.24
  D3_0.25 -> Note: D3, Duration: 0.25
  F#3_0.25 -> Note: F#3, Duration: 0.25
  C3_0.0 -> Note: C3, Duration: 0.0


In [18]:
def filter_rests(melody: list[str]) -> list[str]:
    """
    Remove notes with duration 0.0 (rests or very short notes).
    
    Args:
        melody: List of notes
        
    Returns:
        List of notes with non-zero duration
    """

    filtered = []

    for note in melody:
        _, duration = parse_note(note)

        if duration > 0.0:
            filtered.append(note)
            
    return filtered

# Test filtering
test_melody = ["A#2_0.24", "D3_0.25", "C3_0.0", "F#3_0.25", "G2_0.0"]
print("\nOriginal melody:", test_melody)
print("Filtered melody:", filter_rests(test_melody))
print(f"Removed {len(test_melody) - len(filter_rests(test_melody))} rests")


Original melody: ['A#2_0.24', 'D3_0.25', 'C3_0.0', 'F#3_0.25', 'G2_0.0']
Filtered melody: ['A#2_0.24', 'D3_0.25', 'F#3_0.25']
Removed 2 rests


### Example Usage: Complete Workflow

In [19]:
# Step 1: Load training data
print("=== Loading Training Data ===")
training_melodies = load_melodies('data/melodies.txt')
print(f"Loaded {len(training_melodies)} melodies from training data\n")

# Step 2: Display some training data statistics
if training_melodies:
    
    print("Training data statistics:")
    total_notes = sum(len(melody) for melody in training_melodies)
    print(f"  Total notes: {total_notes}")
    print(f"  Average notes per melody: {total_notes / len(training_melodies):.1f}")
    
    # Show first melody sample
    print(f"\nFirst melody ({len(training_melodies[0])} notes):")
    print(f"  First 10 notes: {training_melodies[0][:10]}")
    print()

# Step 3: Filter out rests (optional preprocessing)
print("=== Preprocessing: Removing Rests ===")
filtered_melodies = [filter_rests(melody) for melody in training_melodies]
total_filtered = sum(len(melody) for melody in filtered_melodies)
print(f"After removing 0.0 duration notes: {total_filtered} notes remain\n")

# Step 4: Save preprocessed melodies
print("=== Saving Preprocessed Melodies ===")
save_melodies(filtered_melodies[:5], 'output/preprocessed_sample.txt')

=== Loading Training Data ===
Loaded 20 melodies from training data

Training data statistics:
  Total notes: 299
  Average notes per melody: 14.9

First melody (27 notes):
  First 10 notes: ['C3_0.25', 'G3_0.25', 'C4_0.25', 'E4_0.25', 'F4_0.25', 'G4_0.25', 'C4_0.25', 'E4_0.25', 'F4_0.25', 'G4_0.25']

=== Preprocessing: Removing Rests ===
After removing 0.0 duration notes: 294 notes remain

=== Saving Preprocessed Melodies ===
Successfully saved 5 melodies to output/preprocessed_sample.txt


---

## Part 2: Testing with unittest

### Test File: test_models.py

In [20]:
import unittest
import os
import tempfile

# Functions to test
def load_melodies(path: str) -> list[list[str]]:

    try:
        with open(path, 'r') as file:
            melodies = []

            for line in file:
                line = line.strip()

                if line:
                    notes = line.split()
                    melodies.append(notes)

            return melodies
        
    except FileNotFoundError:
        print(f"File not found: {path}")
        print("Please make sure the dataset file exists.")
        return []
    
    except Exception as e:
        print(f"Error reading file: {e}")
        return []

def save_melodies(melodies: list[list[str]], path: str) -> None:

    try:
        with open(path, 'w') as file:
            for melody in melodies:

                line = ' '.join(melody)
                file.write(line + '\n')

    except Exception as e:
        print(f"Error saving file: {e}")

def parse_note(note_str: str) -> tuple[str, float]:
    parts = note_str.split('_')

    if len(parts) == 2:
        note_name = parts[0]
        duration = float(parts[1])
        return note_name, duration
    
    else:
        raise ValueError(f"Invalid note format: {note_str}")


class TestMelodyFunctions(unittest.TestCase):
    
    def test_load_melodies_valid_file(self):

        # Create a temporary file with test data in NES format
        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
            f.write('A#2_0.24 D3_0.25 F#3_0.25\n')
            f.write('G2_0.25 F2_0.25 C3_0.0\n')
            temp_path = f.name
        
        try:
            # Load the melodies
            melodies = load_melodies(temp_path)
            
            # Assert we got 2 melodies
            self.assertEqual(len(melodies), 2)
            
            # Assert first melody is correct
            self.assertEqual(melodies[0], ['A#2_0.24', 'D3_0.25', 'F#3_0.25'])
            
            # Assert second melody is correct
            self.assertEqual(melodies[1], ['G2_0.25', 'F2_0.25', 'C3_0.0'])
        
        finally:
            # Clean up temporary file
            os.unlink(temp_path)
    
    def test_load_melodies_nonexistent_file(self):
        melodies = load_melodies('nonexistent_file.txt')
        
        # Should return empty list when file not found
        self.assertEqual(melodies, [])
    
    def test_load_melodies_empty_lines(self):

        # Create a temporary file with empty lines
        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
            f.write('A#2_0.24 D3_0.25\n')
            f.write('\n')  # Empty line
            f.write('E3_0.25 F3_0.25\n')
            f.write('   \n')  # Line with only whitespace
            f.write('G3_0.25\n')
            temp_path = f.name
        
        try:
            melodies = load_melodies(temp_path)
            
            # Should only load 3 melodies (skipping empty lines)
            self.assertEqual(len(melodies), 3)
            self.assertEqual(melodies[0], ['A#2_0.24', 'D3_0.25'])
            self.assertEqual(melodies[1], ['E3_0.25', 'F3_0.25'])
            self.assertEqual(melodies[2], ['G3_0.25'])
        
        finally:
            os.unlink(temp_path)
    
    def test_save_melodies_creates_file(self):

        # Create temporary file path
        with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as f:
            temp_path = f.name
        
        try:
            # Test data in NES format
            test_melodies = [
                ['A#2_0.24', 'D3_0.25', 'F#3_0.25'],
                ['G2_0.25', 'F2_0.25']
            ]
            
            # Save melodies
            save_melodies(test_melodies, temp_path)
            
            # Read back the file
            with open(temp_path, 'r') as f:
                lines = f.readlines()
            
            # Check that file has correct number of lines
            self.assertEqual(len(lines), 2)
            
            # Check content of each line
            self.assertEqual(lines[0].strip(), 'A#2_0.24 D3_0.25 F#3_0.25')
            self.assertEqual(lines[1].strip(), 'G2_0.25 F2_0.25')
        
        finally:
            if os.path.exists(temp_path):
                os.unlink(temp_path)
    
    def test_save_and_load_round_trip(self):

        with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as f:
            temp_path = f.name
        
        try:
            # Original data in NES format
            original_melodies = [
                ['A#2_0.24', 'D3_0.25', 'F#3_0.25'],
                ['G2_0.25', 'F2_0.25'],
                ['C3_0.25', 'E3_0.25', 'G3_0.25']
            ]
            
            # Save and load
            save_melodies(original_melodies, temp_path)
            loaded_melodies = load_melodies(temp_path)
            self.assertEqual(original_melodies, loaded_melodies)
        
        finally:
            if os.path.exists(temp_path):
                os.unlink(temp_path)
    
    def test_parse_note_valid(self):

        # Test sharp note
        name, duration = parse_note('A#2_0.24')
        self.assertEqual(name, 'A#2')
        self.assertAlmostEqual(duration, 0.24)
        
        # Test natural note
        name, duration = parse_note('D3_0.25')
        self.assertEqual(name, 'D3')
        self.assertAlmostEqual(duration, 0.25)
        
        # Test rest (0.0 duration)
        name, duration = parse_note('C3_0.0')
        self.assertEqual(name, 'C3')
        self.assertAlmostEqual(duration, 0.0)
    
    def test_parse_note_invalid(self):
        
        with self.assertRaises(ValueError):
            parse_note('InvalidNote')
        
        with self.assertRaises(ValueError):
            parse_note('A#2')  # Missing duration


# Run the tests
if __name__ == '__main__':
    unittest.main(argv=[''], exit=False, verbosity=2)

test_load_melodies_empty_lines (__main__.TestMelodyFunctions.test_load_melodies_empty_lines) ... ok
test_load_melodies_nonexistent_file (__main__.TestMelodyFunctions.test_load_melodies_nonexistent_file) ... ok
test_load_melodies_valid_file (__main__.TestMelodyFunctions.test_load_melodies_valid_file) ... ok
test_parse_note_invalid (__main__.TestMelodyFunctions.test_parse_note_invalid) ... ok
test_parse_note_valid (__main__.TestMelodyFunctions.test_parse_note_valid) ... ok
test_save_and_load_round_trip (__main__.TestMelodyFunctions.test_save_and_load_round_trip) ... ok
test_save_melodies_creates_file (__main__.TestMelodyFunctions.test_save_melodies_creates_file) ... ok

----------------------------------------------------------------------
Ran 7 tests in 0.073s

OK


---

## Additional Helper Function: Flatten Melodies for Training

In [22]:
def flatten_melodies(melodies: list[list[str]]) -> list[str]:
    """
    Flatten a list of melodies into a single list of all notes.
    
    Args:
        melodies: List of melodies (each melody is a list of notes)
        
    Returns:
        Single flat list of all notes from all melodies
    """

    all_notes = []

    for melody in melodies:
        all_notes.extend(melody)
    return all_notes


def build_transitions(notes: list[str]) -> dict[str, list[str]]:
    """
    Build a dictionary of note transitions.
    
    Args:
        notes: Flat list of notes in format "NOTE_DURATION"
        
    Returns:
        Dictionary mapping each note to list of possible next notes
    """

    transitions = {}

    for i in range(len(notes) - 1):
        current_note = notes[i]
        next_note = notes[i + 1]
        
        if current_note not in transitions:
            transitions[current_note] = []

        transitions[current_note].append(next_note)
    
    return transitions


def get_note_statistics(melodies: list[list[str]]) -> dict:
    """
    Get statistics about the melodies.
    
    Returns:
        Dictionary with statistics about the dataset
    """

    all_notes = flatten_melodies(melodies)
    
    # Count total notes
    total_notes = len(all_notes)
    
    # Count unique notes
    unique_notes = len(set(all_notes))
    
    # Parse durations
    durations = [parse_note(note)[1] for note in all_notes]
    avg_duration = sum(durations) / len(durations) if durations else 0
    
    # Count rests (0.0 duration)
    rest_count = sum(1 for d in durations if d == 0.0)
    
    return {
        'total_notes': total_notes,
        'unique_notes': unique_notes,
        'total_melodies': len(melodies),
        'avg_duration': avg_duration,
        'rest_count': rest_count,
        'rest_percentage': (rest_count / total_notes * 100) if total_notes > 0 else 0
    }


# Test these functions
print("=== Testing Helper Functions ===\n")

# Sample melodies in NES format
sample_melodies = [
    ['A#2_0.24', 'D3_0.25', 'F#3_0.25', 'B3_0.25'],
    ['G2_0.25', 'F2_0.25', 'C3_0.0'],
    ['A#2_0.24', 'D3_0.25', 'E3_0.0']
]

# Flatten
flat = flatten_melodies(sample_melodies)
print(f"Flattened notes: {flat}\n")

# Build transitions
transitions = build_transitions(flat)
print("Transitions:")

for note, next_notes in list(transitions.items())[:5]:
    print(f"  {note} -> {next_notes}")

print()

# Statistics
stats = get_note_statistics(sample_melodies)

print("Statistics:")

for key, value in stats.items():
    print(f"  {key}: {value}")

=== Testing Helper Functions ===

Flattened notes: ['A#2_0.24', 'D3_0.25', 'F#3_0.25', 'B3_0.25', 'G2_0.25', 'F2_0.25', 'C3_0.0', 'A#2_0.24', 'D3_0.25', 'E3_0.0']

Transitions:
  A#2_0.24 -> ['D3_0.25', 'D3_0.25']
  D3_0.25 -> ['F#3_0.25', 'E3_0.0']
  F#3_0.25 -> ['B3_0.25']
  B3_0.25 -> ['G2_0.25']
  G2_0.25 -> ['F2_0.25']

Statistics:
  total_notes: 10
  unique_notes: 8
  total_melodies: 3
  avg_duration: 0.198
  rest_count: 2
  rest_percentage: 20.0


---

## Complete Code Template for Your Project


In [23]:
import random

def generate_melody(transitions: dict[str, list[str]], start_note: str, length: int) -> list[str]:
    """
    Generate a melody using the transition probabilities.
    
    Args:
        transitions: Dictionary mapping notes to possible next notes
        start_note: The note to start the melody with
        length: Number of notes to generate
        
    Returns:
        List of notes forming a melody in NES format
    """

    melody = [start_note]
    current_note = start_note
    
    for _ in range(length - 1):

        # If we have transitions for this note, pick one randomly
        if current_note in transitions:
            next_note = random.choice(transitions[current_note])
            melody.append(next_note)
            current_note = next_note

        else:
            # If no transitions available, stop
            break
    
    return melody


def main():
    """Main program to train and generate melodies with NES dataset."""
    
    print("=== MELODY GENERATOR (NES Format) ===\n")
    
    # Step 1: Load training data
    print("1. Loading training data...")
    training_melodies = load_melodies('data/melodies.txt')
    print(f"   Loaded {len(training_melodies)} melodies\n")
    
    if not training_melodies:
        print("No training data available. Exiting.")
        return
    
    # Step 2: Get statistics
    print("2. Analyzing training data...")
    stats = get_note_statistics(training_melodies)

    for key, value in stats.items():
        if isinstance(value, float):
            print(f"   {key}: {value:.2f}")
        else:
            print(f"   {key}: {value}")

    print()
    
    # Step 3: Process training data
    print("3. Building transition model...")
    all_notes = flatten_melodies(training_melodies)
    transitions = build_transitions(all_notes)
    print(f"   Learned transitions for {len(transitions)} unique notes\n")
    
    # Step 4: Generate new melodies
    print("4. Generating new melodies...")
    generated = []
    
    # Get possible starting notes (first note from each training melody)
    start_notes = [melody[0] for melody in training_melodies if melody]
    
    # Generate 5 melodies of 12 notes each
    for i in range(5):
        start = random.choice(start_notes)
        new_melody = generate_melody(transitions, start, length=12)
        generated.append(new_melody)
        
        print(f"   Melody {i+1} ({len(new_melody)} notes): {new_melody[:6]}...")
    
    print()
    
    # Step 5: Save generated melodies
    print("5. Saving generated melodies...")
    save_melodies(generated, 'output/generated_melodies.txt')
    
    print("\n=== DONE ===")


# Run the main program
main() 

=== MELODY GENERATOR (NES Format) ===

1. Loading training data...
   Loaded 20 melodies

2. Analyzing training data...
   total_notes: 299
   unique_notes: 25
   total_melodies: 20
   avg_duration: 0.30
   rest_count: 5
   rest_percentage: 1.67

3. Building transition model...
   Learned transitions for 25 unique notes

4. Generating new melodies...
   Melody 1 (12 notes): ['C3_0.5', 'G3_0.25', 'C4_0.25', 'E4_0.25', 'F4_0.25', 'G4_0.25']...
   Melody 2 (12 notes): ['G3_0.5', 'C3_0.25', 'E3_0.25', 'G3_0.25', 'C4_0.25', 'D4_0.25']...
   Melody 3 (12 notes): ['C3_0.25', 'G3_0.25', 'C4_0.25', 'E4_0.5', 'F4_0.25', 'E4_0.25']...
   Melody 4 (12 notes): ['G3_0.25', 'C4_0.25', 'E4_0.25', 'C4_0.25', 'Bb3_0.25', 'C4_0.25']...
   Melody 5 (12 notes): ['C3_0.25', 'G3_0.25', 'C4_0.25', 'G3_0.25', 'C4_0.25', 'Bb3_0.25']...

5. Saving generated melodies...

=== DONE ===


## Playing Generated Melodies

To play the generated melodies as audio, we'll use Python libraries that can convert our note data into sound.

In [24]:
# Simple audio playback without midiutil
import numpy as np
from IPython.display import Audio
import IPython.display as ipd

def note_to_frequency(note_name: str) -> float:

    note_map = {
        'C': -9, 'C#': -8, 'Db': -8,
        'D': -7, 'D#': -6, 'Eb': -6,
        'E': -5,
        'F': -4, 'F#': -3, 'Gb': -3,
        'G': -2, 'G#': -1, 'Ab': -1,
        'A': 0, 'A#': 1, 'Bb': 1,
        'B': 2
    }
    
    # Parse note
    if len(note_name) >= 3 and note_name[1] in ['#', 'b']:
        note = note_name[:2]
        octave = int(note_name[2:])

    else:
        note = note_name[0]
        octave = int(note_name[1:])
    
    # Calculate frequency
    # A4 is 440 Hz, calculate semitones away from A4
    semitones_from_a4 = note_map[note] + (octave - 4) * 12
    frequency = 440 * (2 ** (semitones_from_a4 / 12))
    
    return frequency


def melody_to_audio(melody: list[str], sample_rate: int = 44100):
    """
    Convert a melody to an audio waveform.
    
    Args:
        melody: List of notes in format "NOTE_DURATION"
        sample_rate: Audio sample rate (Hz)
    
    Returns:
        numpy array of audio samples
    """

    audio = []
    
    for note_str in melody:
        note_name, duration = parse_note(note_str)
        
        # Skip very short notes/rests
        if duration == 0.0:
            duration = 0.1
        
        # Convert to frequency
        freq = note_to_frequency(note_name)
        
        # Generate sine wave for this note
        num_samples = int(sample_rate * duration)
        t = np.linspace(0, duration, num_samples, False)
        wave = 0.3 * np.sin(2 * np.pi * freq * t)
        
        # Add to audio
        audio.extend(wave)
    
    return np.array(audio)


def play_melody(melody: list[str]):
    """
    Generate and play a melody directly in Jupyter.
    
    Args:
        melody: List of notes in format "NOTE_DURATION"
    """

    print(f"üéµ Generating audio for {len(melody)} notes...")
    
    # Convert melody to audio
    audio = melody_to_audio(melody)
    
    # Display audio player
    print("‚ñ∂Ô∏è  Click play below:")
    return Audio(audio, rate=44100, autoplay=True)


# Generate and play!
print("=" * 60)
print("GENERATING AND PLAYING MELODY")
print("=" * 60)

# Load and generate
training = load_melodies('data/melodies.txt')
all_notes = flatten_melodies(training)
transitions = build_transitions(all_notes)

# Generate one melody
start = 'C3_0.25' if 'C3_0.25' in transitions else list(transitions.keys())[0]
new_melody = generate_melody(transitions, start, length=20)

# Display the notes
print("\nGenerated melody:")
note_names = [parse_note(n)[0] for n in new_melody]

for i in range(0, len(note_names), 8):
    print('  ' + ' '.join(note_names[i:i+8]))

print("\n")

# Play it!
play_melody(new_melody)

GENERATING AND PLAYING MELODY

Generated melody:
  C3 C3 E3 C3 G3 C4 E4 F4
  G4 C5 G4 C5 E5 C5 G4 A4
  G4 F4 G4 F4


üéµ Generating audio for 20 notes...
‚ñ∂Ô∏è  Click play below:
