# Spotify Data Extraction and Recommendation System

## Authentication

In [ ]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors

# Spotify API credentials
CLIENT_ID = '380a6b3535dc420a905dccf328a0e165'
CLIENT_SECRET = 'b0ca7947448246d28aafc40f49610cf4'
REDIRECT_URI = 'http://localhost:8888/callback/'

# Authenticate with user permission
scope = 'user-top-read user-library-read user-read-recently-played'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=CLIENT_ID,
                                               client_secret=CLIENT_SECRET,
                                               redirect_uri=REDIRECT_URI,
                                               scope=scope))

## Function Definitions

In [ ]:
# Function to get user's top tracks
def get_user_top_tracks():
    results = sp.current_user_top_tracks(limit=50)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

# Function to get user's saved tracks
def get_user_saved_tracks():
    results = sp.current_user_saved_tracks(limit=50)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

# Function to get user's recently played tracks
def get_user_recently_played():
    results = sp.current_user_recently_played(limit=50)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

# Function to get playlist tracks
def get_playlist_tracks(username, playlist_id):
    try:
        results = sp.user_playlist_tracks(username, playlist_id)
        tracks = results['items']
        while results['next']:
            results = sp.next(results)
            tracks.extend(results['items'])
        return tracks
    except Exception as e:
        print(f"Error fetching playlist {playlist_id}: {e}")
        return []

# Function to extract track features
def get_track_features(track_ids):
    features = []
    for i in range(0, len(track_ids), 50):  # API allows max 50 track IDs per request
        audio_features = sp.audio_features(track_ids[i:i+50])
        features.extend(audio_features)
    return features

## Fetching Playlist Tracks

In [ ]:
# Example playlists and user
username = '31e7ktmeqdqix4gwr6urknhntqwa'
playlist_ids = [
    '27A58P3HJSPProVdIZRWg2',
    '5VuDPSYYfepqqqxQkRiz8K',
    '3oJVKoOfuetT3SRAywKf3r',
    '1fyjAq2jAnE3QOE84XKlRH'
]

all_tracks = []
all_features = []

# Iterate through each playlist
for playlist_id in playlist_ids:
    tracks = get_playlist_tracks(username, playlist_id)
    if tracks:  # Only proceed if tracks were successfully fetched
        track_ids = [track['track']['id'] for track in tracks if track and track['track']]
        all_tracks.extend(track_ids)

        # Fetch features for each track
        track_features = get_track_features(track_ids)
        all_features.extend(track_features)

# Convert to DataFrame
df_playlist_tracks = pd.DataFrame(all_features)

# Save to CSV
df_playlist_tracks.to_csv('playlist_tracks.csv', index=False)
print("Data saved to playlist_tracks.csv")

## Fetching User's Top, Saved, and Recently Played Tracks

In [ ]:
# Get user's top, saved, and recently played tracks
user_top_tracks = get_user_top_tracks()
user_saved_tracks = get_user_saved_tracks()
user_recently_played = get_user_recently_played()

# Extract track IDs
user_top_track_ids = [track['id'] for track in user_top_tracks]
user_saved_track_ids = [track['track']['id'] for track in user_saved_tracks]
user_recently_played_ids = [track['track']['id'] for track in user_recently_played]

user_track_ids = user_top_track_ids + user_saved_track_ids + user_recently_played_ids

# Fetch features for user's tracks
user_track_features = get_track_features(user_track_ids)

# Convert to DataFrame
df_user_tracks = pd.DataFrame(user_track_features)

# Save to CSV
df_user_tracks.to_csv('user_tracks.csv', index=False)
print("Data saved to user_tracks.csv")

## Data Analysis and Visualization

In [ ]:
# Load the data
df_playlist_tracks = pd.read_csv('playlist_tracks.csv')
df_user_tracks = pd.read_csv('user_tracks.csv')

# Check for missing values
print(df_playlist_tracks.isnull().sum())
print(df_user_tracks.isnull().sum())

# Fill or drop missing values if necessary
df_playlist_tracks.fillna(0, inplace=True)
df_user_tracks.fillna(0, inplace=True)

# Plot feature distributions for playlist tracks
features = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 
            'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

plt.figure(figsize=(15, 10))
for i, feature in enumerate(features):
    plt.subplot(4, 3, i+1)
    sns.histplot(df_playlist_tracks[feature], kde=True)
    plt.title(f'Distribution of {feature}')
plt.tight_layout()
plt.show()

## Normalizing Features

In [ ]:
# Normalize features if necessary
scaler = StandardScaler()
df_playlist_tracks[features] = scaler.fit_transform(df_playlist_tracks[features])
df_user_tracks[features] = scaler.transform(df_user_tracks[features])

## Training k-NN Model

In [ ]:
# Prepare data for training
X_playlist = df_playlist_tracks[features]
X_user = df_user_tracks[features]

# Split the data for evaluation
X_train, X_test = train_test_split(X_playlist, test_size=0.2, random_state=42)

# Train a k-NN model
knn = NearestNeighbors(n_neighbors=10, algorithm='auto')
knn.fit(X_train)

# Find nearest neighbors for user's tracks
distances, indices = knn.kneighbors(X_user)
print("Distances:", distances)
print("Indices:", indices)

# Get recommended track IDs
recommended_track_ids = X_train.iloc[indices.flatten()].index

# Fetch recommended track details from the playlist data
recommended_tracks = df_playlist_tracks.loc[recommended_track_ids]

# Display recommended tracks
print("Recommended Tracks:")
print(recommended_tracks[['id', 'uri', 'track_href', 'analysis_url', 'duration_ms']])

## Mood-Based Recommendation Engine

In [ ]:
# Function to recommend songs based on user-selected mood
def recommend_by_mood(mood, user_tracks, playlist_tracks):
    # User-selected mood
    user_features = user_tracks[['danceability', 'energy', 'valence']].values.reshape(1, -1)
    playlist_features = playlist_tracks[['danceability', 'energy', 'valence']].values

    # Option 1: Simple Distance-based Recommendation
    knn = NearestNeighbors(n_neighbors=10, algorithm='auto')
    knn.fit(playlist_features)
    distances, indices = knn.kneighbors(user_features)
    recommended_track_ids = playlist_tracks.iloc[indices.flatten()].index

    # Return recommended tracks
    return playlist_tracks.loc[recommended_track_ids]

# Example usage (replace with actual user and playlist data)
user_tracks = pd.DataFrame({'danceability': [0.5], 'energy': [0.8], 'valence': [0.7]})  # Example user tracks

# Get user's selected mood
user_mood = input("Enter your mood (e.g., happy, energetic, relaxed): ")

# Recommend songs based on mood
recommended_tracks = recommend_by_mood(user_mood, user_tracks, playlist_tracks)

# Display recommended tracks (modify as needed)
print("Recommended songs for", user_mood, ":")
print(recommended_tracks[['id', 'name', 'artists']].head())  # Assuming relevant columns in playlist_tracks