
# üéß Jonny's Musical Memory Project

This project uses machine learning and automation tools to process 13 hours of personal analog music recordings. The pipeline performs:

1. Transcription of MP3 audio files using Whisper
2. Song identification through Google lyrics search
3. Automatic Spotify playlist creation using your own Spotify Developer App
4. Predictive modeling of future musical preferences

> ‚ö†Ô∏è **Note**: All API credentials have been sanitized. Replace placeholders with your actual Spotify developer keys to run the code.

---


In [1]:
import sys
import os
import whisper
import warnings
import subprocess
from pydub import AudioSegment, silence
from googlesearch import search
import time
warnings.simplefilter("ignore")

In [2]:
input_file = "cinta_1_pista_1.mp3"
output_file = "cinta_1_pista_1.wav"

# Convert MP3 to WAV (Mono, 16kHz)
subprocess.run(["ffmpeg", "-i", input_file, "-ac", "1", "-ar", "16000", output_file])
print("Conversion complete. Try transcribing the WAV file.")

Conversion complete. Try transcribing the WAV file.


In [None]:
model = whisper.load_model("small")
result = model.transcribe("cinta_1_pista_1.wav")


# Save the transcription to a file
output_file = "cinta_1_pista_1_transcription.txt"
with open(output_file, "w", encoding="utf-8") as f:
    f.write(result['text'])

print(f"Transcription saved to {output_file}")

In [None]:
# Load the audio file (convert to WAV first if needed)
audio = AudioSegment.from_mp3("cinta_1_pista_1.wav")

# Detect silent parts (threshold in dB, min duration in ms)
silent_ranges = silence.detect_silence(audio, min_silence_len=2000, silence_thresh=-40)

# Convert to seconds
song_boundaries = [(start / 1000, end / 1000) for start, end in silent_ranges]

# Print detected song boundaries
for i, (start, end) in enumerate(song_boundaries):
    print(f"Potential song transition at {end:.2f} seconds")

# Save silence timestamps for reference
with open("silence_boundaries.txt", "w") as f:
    for start, end in song_boundaries:
        f.write(f"{start:.2f} - {end:.2f} seconds\n")

print("Silence detection complete. Check silence_boundaries.txt for timestamps.")


In [None]:
# Read the full transcription
with open("cinta_1_pista_1_transcription.txt", "r", encoding="utf-8") as f:
    full_text = f.readlines()

# Approximate word rate (words per second) - Adjust if needed
words_per_second = 2.5  # Assume 150 words per minute (slow speech)

# Extract lyrics based on timestamps
song_lyrics = []
for i, (start, end) in enumerate(song_boundaries):
    start_index = int(start * words_per_second)
    end_index = int(end * words_per_second)
    
    # Extract potential song lyrics
    song_snippet = " ".join(full_text[start_index:end_index])
    song_lyrics.append(song_snippet)

    print(f"\nüîπ **Potential Song {i+1} Lyrics:**\n{song_snippet[:300]}...\n")

# Save extracted song lyrics for reference
with open("extracted_songs.txt", "w", encoding="utf-8") as f:
    for i, lyrics in enumerate(song_lyrics):
        f.write(f"\n### Song {i+1}\n{lyrics}\n\n")

print("Lyrics extracted. Check extracted_songs.txt.")


In [None]:
# Read the full transcription
with open("cinta_1_pista_1_transcription.txt", "r", encoding="utf-8") as f:
    full_text = f.read()

# Split text into sentences (or paragraphs)
import re
sentence_chunks = re.split(r'[\n.!?]', full_text)  # Split at line breaks, periods, exclamation points

# Keep only meaningful chunks (remove very short lines)
lyrics_candidates = [chunk.strip() for chunk in sentence_chunks if len(chunk.strip()) > 20]

# Show first few samples
for i, snippet in enumerate(lyrics_candidates[:5]):
    print(f"\nüîπ Potential Lyrics Snippet {i+1}:\n{snippet[:200]}...")  # First 200 characters

# Save potential lyrics snippets for reference
with open("potential_lyrics_snippets.txt", "w", encoding="utf-8") as f:
    for snippet in lyrics_candidates:
        f.write(f"{snippet}\n\n")

print("\n‚úÖ Extracted lyrics snippets. Check potential_lyrics_snippets.txt.")


In [None]:
def find_song_by_lyrics(lyrics_snippet):
    query = f'"{lyrics_snippet}" song lyrics'
    print(f"\nüîç Searching: {query}")

    try:
        results = list(search(query, num_results=3))  # Top 3 results
        for url in results:
            print(f"üîó {url}")
    except Exception as e:
        print(f"‚ùå Google search failed: {e}")
    time.sleep(2)  # Delay to avoid blocking

# Search for first 5 lyrics snippets (to avoid too many requests)
for i, lyrics in enumerate(lyrics_candidates[:5]):
    print(f"\nüîπ Searching for Song {i+1} Lyrics:\n{lyrics[:100]}...")  # Show first 100 characters
    find_song_by_lyrics(lyrics)


In [None]:
import re

# Example: Extract song title from a Google result URL
def extract_song_title(url):
    match = re.search(r'\/([^\/]+)-lyrics', url)
    if match:
        title = match.group(1).replace("-", " ")
        return title.title()
    return None

# Process search results and extract song titles
song_titles = []
for i, lyrics in enumerate(lyrics_candidates[:5]):  # Process first 5 searches
    results = list(search(f'"{lyrics}" song lyrics', num_results=3))
    
    for url in results:
        song_title = extract_song_title(url)
        if song_title:
            song_titles.append(song_title)

# Print found songs
print("\nüéµ Possible Song Titles:")
for song in set(song_titles):  # Remove duplicates
    print(f"- {song}")

# Save song titles to a file
with open("found_songs.txt", "w", encoding="utf-8") as f:
    for song in set(song_titles):
        f.write(f"{song}\n")

print("\n‚úÖ Song identification complete. Check found_songs.txt.")


In [None]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id="YOUR_CLIENT_IDa5c7eba8ac5d4159876455bdd424492e",
    client_secret="YOUR_CLIENT_SECRET398722a12bc24837b5dea054240cf8e9",
    redirect_uri="YOUR_REDIRECT_URI",
    scope="playlist-modify-public",
    open_browser=False  # Prevents opening a local server
))


# Create a new playlist
user_id = sp.current_user()["id"]
playlist = sp.user_playlist_create(user_id, "Las Canciones de Jonny de las cintas", public=True)

# Read found song titles
with open("found_songs.txt", "r", encoding="utf-8") as f:
    song_titles = f.readlines()

# Search and add songs to Spotify playlist
track_uris = []
for song in song_titles:
    results = sp.search(q=song.strip(), type="track", limit=1)
    if results["tracks"]["items"]:
        track_uris.append(results["tracks"]["items"][0]["uri"])

# Add found songs to the playlist
sp.playlist_add_items(playlist["id"], track_uris)
print("\n‚úÖ Spotify playlist created successfully!")


Couldn't read cache at: .cache
