In [53]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
from category_encoders import BinaryEncoder

In [55]:
# Load and clean data
def load_data():
    df = pd.read_csv('Favorite_music_survery.csv')
    
    # Clean generations
    df['Generation'] = df['Which age group do you fall under?'].str.split('(').str[0].str.strip()
    
    # Clean years
    def clean_year(year_str):
        if pd.isna(year_str): return None
        if 'Before' in str(year_str): return 1970
        if '-' in str(year_str): return int(str(year_str).split('-')[0].strip())
        try: return int(year_str)
        except: return None
    
    df['Release_Year'] = df['Which year was your favorite song released?'].apply(clean_year)
    df['Genre'] = df['What is the genre of the music?'].astype(str).str.title().fillna('Unknown')
    
    return df.dropna(subset=['Generation', 'Release_Year'])

df = load_data()
df

Unnamed: 0,Timestamp,First Name,Which age group do you fall under?,Gender,Favourite Song,What is the genre of the music?,Which year was your favorite song released?,How often do you listen to music,Generation,Release_Year,Genre
0,3/6/2025 11:27:21,Isaac,Millennials (29 Years - 44 Years),Male,Mirror - Lil Wayne,Hip-Hop/Rap,2000 - 2009,Everyday,Millennials,2000.0,Hip-Hop/Rap
1,3/6/2025 18:32:17,Adedoyin Noutouglo,Millennials (29 Years - 44 Years),Female,So will I by Hillsong,Others,2010 -2019,A few times a week,Millennials,2010.0,Others
2,3/6/2025 18:35:52,Adedolapo,Millennials (29 Years - 44 Years),Male,Koloba by Adewale Ayuba,Afrobeats,2020 - 2025,Everyday,Millennials,2020.0,Afrobeats
3,3/6/2025 18:47:34,Olajimi Abioro,Millennials (29 Years - 44 Years),Male,Love me jeje by Tems,Afrobeats,2020 - 2025,Everyday,Millennials,2020.0,Afrobeats
4,3/6/2025 18:51:38,Temiloluwa Okusanya,Generation Z (13 Years - 28 Years),Female,Lagos love story b Ayra Starr,Afrobeats,2020 - 2025,A few times a week,Generation Z,2020.0,Afrobeats
...,...,...,...,...,...,...,...,...,...,...,...
194,3/24/2025 22:58:29,Bilal Hussain,Generation Z (13 Years - 28 Years),Male,"Boulevard of Broken Dreams"" by Green Day",Pop,2000 - 2009,A few times a month,Generation Z,2000.0,Pop
195,3/24/2025 22:59:01,Amna Zafar,Millennials (29 Years - 44 Years),Female,"Love Me Like You Do"" by Ellie",Pop,2010 -2019,A few times a month,Millennials,2010.0,Pop
196,3/24/2025 22:59:27,Danish Rehman,Millennials (29 Years - 44 Years),Male,"mells Like Teen Spirit"" by Nirvana",Pop,2000 - 2009,A few times a week,Millennials,2000.0,Pop
197,3/24/2025 22:59:57,Sara Naveed,Millennials (29 Years - 44 Years),Female,"Happier"" by Marshmello & Bastille",R&B/Soul,2000 - 2009,Rarely,Millennials,2000.0,R&B/Soul


In [57]:
# Prepare encoders
encoder = BinaryEncoder(cols=['Generation', 'Genre'])
scaler = MinMaxScaler()

In [59]:
# Transform features
features = encoder.fit_transform(df[['Generation', 'Genre']])
features['Release_Year'] = scaler.fit_transform(df[['Release_Year']].values)

In [61]:
# Train model
knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(features)

In [63]:
# Age to generation mapping
def get_generation(age):
    age = int(age)
    if age <= 12: return 'Generation Alpha'
    elif age <= 28: return 'Generation Z'
    elif age <= 44: return 'Millennials'
    elif age <= 65: return 'Generation X'
    return 'Baby Boomers+'

In [65]:
def clean_year(year_str):
    """Convert year strings to single numeric values"""
    if pd.isna(year_str) or not isinstance(year_str, str):
        return None
    try:
        if 'Before' in year_str:
            return 1970  # Default for "Before" years
        elif '-' in year_str:
            # Take the first year of a range
            return int(year_str.split('-')[0].strip())
        else:
            return int(year_str)
    except (ValueError, AttributeError):
        return None

In [73]:
# user input
def get_user_input():
    print("\n=== Music Recommendation System ===")
    
    # Age input
    while True:
        age_input = input("Enter your age: ")
        try:
            age = int(age_input)
            if 1 <= age <= 120:
                generation = get_generation(age)
                break
            else:
                print("Please enter an age between 1 and 120")
        except ValueError:
            print("Please enter a valid number for age (e.g. 25)")
    
    # Genre input
    genre = input("Enter preferred genre (leave blank for any): ").strip().title()
    if not genre:
        genre = None
    
    # Year input 
    year = None
    while True:
        year_input = input("Enter preferred release year/range (e.g. 2020 or 1990-1999): ").strip()
        if not year_input:
            break
            
        try:
            if '-' in year_input:
                year = clean_year(year_input.split('-')[0])
            else:
                year = clean_year(year_input)
            
            if year is not None:
                break
            print("Invalid year format. Try like '2020' or '1990-1999'")
        except:
            print("Invalid input. Please enter a year or range")
    
    # Number of recommendations
    while True:
        num_recs = input("How many recommendations? (default 5): ").strip()
        if not num_recs:
            num_recs = 5
            break
        try:
            num_recs = int(num_recs)
            if num_recs > 0:
                break
            else:
                print("Please enter a positive number")
        except ValueError:
            print("Please enter a valid number")
    
    return generation, genre, year, num_recs

In [75]:
def recommend_songs(generation, genre=None, year=None, n=5):
    # Prepare query
    query = pd.DataFrame({
        'Generation': [generation],
        'Genre': [genre if genre else 'Unknown'],
        'Release_Year': [year if year else df['Release_Year'].median()]
    })
    
    # Transform query
    query_encoded = encoder.transform(query[['Generation', 'Genre']])
    query_encoded['Release_Year'] = scaler.transform([[query['Release_Year'].iloc[0]]])
    
    # Get recommendations
    distances, indices = knn.kneighbors(query_encoded, n_neighbors=n)
    
    # Prepare results
    recommendations = df.iloc[indices[0]].copy()
    recommendations['Match_Score'] = 1 - distances[0]
    
    # Apply genre filter if specified
    if genre:
        recommendations = recommendations[recommendations['Genre'].str.contains(genre, case=False)]
    
    return recommendations[['Favourite Song', 'Genre', 'Release_Year', 'Match_Score']]

In [77]:
def main():
    generation, genre, year, num_recs = get_user_input()
    recommendations = recommend_songs(generation, genre, year, n=num_recs)
    
    print(f"\nYour top {num_recs} Recommended Songs:")
    if len(recommendations) > 0:
        print(recommendations.sort_values('Match_Score', ascending=False)
              [['Favourite Song', 'Genre', 'Release_Year']].to_string(index=False))
    else:
        print("No matching songs found. Try broadening your search criteria.")

if __name__ == "__main__":
    main()


=== Music Recommendation System ===


Enter your age:  25
Enter preferred genre (leave blank for any):  afrobeats
Enter preferred release year/range (e.g. 2020 or 1990-1999):  
How many recommendations? (default 5):  



Your top 5 Recommended Songs:
      Favourite Song     Genre  Release_Year
   Laho - Shalipoppi Afrobeats        2020.0
Joy is coming - Fido Afrobeats        2020.0
       Kese - Wizkid Afrobeats        2020.0
     Awake by davido Afrobeats        2020.0
        Ozeba - Rema Afrobeats        2020.0
