In [None]:
!pip install sdv




In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import TruncatedSVD

# Загрузка данных о треках
tracks = pd.read_csv('./spotify_tracks.csv')
audio_features = pd.read_csv('./low_level_audio_features.csv')
print(tracks.columns)
for index, row in tracks.iterrows():
    track_id = row['Unnamed: 0']  # Изменили с 'id' на 'Unnamed: 0'
    track_name = row['name']
    print(f"ID: {track_id}, Name: {track_name}")

# Выбираем важные параметры
features = ['popularity', 'acousticness', 'energy', 'valence', 'danceability']
scaler = StandardScaler()
track_features = scaler.fit_transform(tracks[features])

# Применяем SVD для уменьшения размерности признаков
svd = TruncatedSVD(n_components=3)  # Снижаем до 3 компонент для наглядности
track_features_svd = svd.fit_transform(track_features)

# Обучение модели kNN для поиска похожих треков
knn = NearestNeighbors(n_neighbors=5, metric='euclidean')
knn.fit(track_features_svd)

# Функция для поиска похожих треков по характеристикам CSV
def find_similar_tracks(track_id):
    track_idx = tracks[tracks['Unnamed: 0'] == track_id].index[0]
    distances, indices = knn.kneighbors([track_features_svd[track_idx]])
    return tracks.iloc[indices[0]]

# Функция для построения спектрограммы
def plot_spectrogram(audio_path):
    audio, sr = librosa.load(audio_path, sr=16000)
    spectrogram = librosa.stft(audio)
    spectrogram_db = librosa.amplitude_to_db(abs(spectrogram))
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram_db, sr=sr, x_axis='time', y_axis='log')
    plt.colorbar()
    plt.title('Spectrogram')
    plt.show()

# Загрузка предобученной модели VGGish
model_vggish = hub.load('https://tfhub.dev/google/vggish/1')

# Функция для извлечения признаков из аудиофайла
def extract_features(audio_path):
    audio, sr = librosa.load(audio_path, sr=16000)
    features = model_vggish(audio)
    features = np.mean(features, axis=0)
    return features

# Функция для загрузки признаков всех песен из базы данных
def load_database_features(database_path):
    database_features = {}
    for song_file in os.listdir(database_path):
        if song_file.endswith('.mp3'):
            song_path = os.path.join(database_path, song_file)
            features = extract_features(song_path)
            database_features[song_file] = features
    return database_features

# Применяем SVD к признакам аудио для базы данных
def apply_svd_to_audio_features(database_features):
    all_features = np.array(list(database_features.values()))
    svd = TruncatedSVD(n_components=3)  # Снижаем до 3 компонент для наглядности
    reduced_features = svd.fit_transform(all_features)
    return {list(database_features.keys())[i]: reduced_features[i] for i in range(len(reduced_features))}

# Функция для поиска наиболее похожих песен
def find_most_similar_songs(input_features, database_features, top_n=5):
    similarities = {}
    for song_name, song_features in database_features.items():
        similarity = cosine_similarity([input_features], [song_features])[0][0]
        similarities[song_name] = similarity

    sorted_songs = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
    return sorted_songs[:top_n]

# Ввод пользователем названий песен из папки audio или ID из CSV
selected_songs = input("Введите названия песен из папки audio или ID из CSV через запятую: ").split(',')
selected_songs = [song.strip() for song in selected_songs]

# Загрузка признаков всех песен в папке audio
user_audio_path = 'audio/'  # Папка с MP3-файлами
user_audio_features = load_database_features(user_audio_path)
user_audio_features_svd = apply_svd_to_audio_features(user_audio_features)

# Обработка выбора песен из CSV
selected_csv_tracks = [int(song) for song in selected_songs if song.isdigit()]
for track_id in selected_csv_tracks:
    print(f"Похожие песни для ID {track_id} из CSV:")
    print(find_similar_tracks(track_id))

# Поиск похожих песен для каждого выбранного трека из папки audio
selected_audio_tracks = [song for song in selected_songs if not song.isdigit()]
for song in selected_audio_tracks:
    if song in user_audio_features_svd:
        print(f"Похожие песни для {song}:")
        similar_songs = find_most_similar_songs(user_audio_features_svd[song], user_audio_features_svd)
        for similar_song, similarity in similar_songs:
            print(f"{similar_song} (Сходство: {similarity:.2f})")
        print()
    else:
        print(f"Песня {song} не найдена в папке audio.")



[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
ID: 97149, Name: Perfect
ID: 97150, Name: Therapy (feat. James Newman)
ID: 97151, Name: Mad World - Recorded at Metropolis Studios, London
ID: 97152, Name: Set Fire To The Rain
ID: 97153, Name: Playinwitme (feat. Kehlani)
ID: 97154, Name: Young Blood
ID: 97155, Name: Sexual - Recorded at Spotify Studios NYC
ID: 97156, Name: Me & Magdalena
ID: 97157, Name: I Could Be Wrong
ID: 97158, Name: Creep
ID: 97159, Name: String poetic: II. Nocturne
ID: 97160, Name: Concerto pour piano
ID: 97161, Name: Twelve Preludes For Piano (1953): I
ID: 97162, Name: Shakespeare
ID: 97163, Name: Leyendas, "An Andean Walkabout": Chasqui (version for string quartet)
ID: 97164, Name: P's and Q's
ID: 97165, Name: By the Streams of Babylon
ID: 97166, Name: Murda
ID: 97167, Name: At the Speed of Stillness
ID: 97168, Name: Booo! (feat. Ms Dynamite) - Original Dirty Mix
ID: 97169, Name: Violin Concerto: I. Allegro
ID: 97170, Name: Ina D