In [33]:
import utils as u
import discogs_api as d
import audio_processing as ap

import pandas as pd

In [34]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## there are two main issues with this process
### 1. conforming mp3 tags so they can query a single api
### 1. a. Releases that aren't on discogs (more of an issue with UTTU and its sublabels)
### 1. b. It's very clear that rules are hard to define for names and i'm not great at fuzzy search, some titles include things like 'EP and other titles don't in discogs/whatever database but are frequently named as such
### 2. Basically every source lists genre as a per-release feature when I, at least, conceptualize genre as something that can be "per song"

In [118]:
# start discogs api client
dc = d.get_discogs_client()
# verify it's working
dc.identity()

In [4]:
hessle_dir = "/mnt/d/MIR_music/Hessle/"
# hessle_dir = /mir_final_proj/Hessle #rough drive directory, i re-uploaded bc I retagged some mp3s to work with discogs better

In [85]:
# parse mp3s by tags into a dataframe and autofill 'label' column bc this label has no sublabels
df_hessle = u.process_files(hessle_dir, "Hessle Audio")

In [125]:
# query discogs for hessle's label id number and download discography to avoid
# querying genre and style info for each release individually
lbl_id = d.find_label_id(dc, "Hessle Audio")
# store release information
discog = d.get_label_discography(dc, lbl_id)

In [128]:
# set a new column for ints to relate df entries to discog release numbers
df_hessle['discogs_id'] = int('nan')

In [141]:
# initialize a dictionary to hold the selected ID for each album
selected_ids = {}

# iterate over the discography list
for entry in discog:
    album_title = entry['title']
    # If this is the first time we see this album add it, works for hessle
    if album_title not in selected_ids:
        selected_ids[album_title] = entry['id']

# Now update the DataFrame to pair release nos to releases
for album, discogs_id in selected_ids.items():
    df_hessle.loc[df_hessle['album'] == album, 'discogs_id'] = discogs_id


In [142]:
#test df against downloaded discography from discogs
for index, row in df_hessle.iterrows():
    # Check if the 'album' value in discog matches the 'album' in the DataFrame
    discog_entry = next((item for item in discog if item['title'] == row['album']), None)
    
    # if there's a matching entry in discog
    if discog_entry:
        expected_id = discog_entry['id']
        # check if the discogs_id in the DataFrame matches the expected ID
        if row['discogs_id'] != expected_id:
            print(f"Mismatch at index {index}: Album '{row['album']}' has discogs_id {row['discogs_id']}, expected {expected_id}")
    else:
        print(f"No discog entry found for album '{row['album']}' at index {index}")


In [143]:
len(df_hessle['album'].unique())

41

In [145]:
# use function in other file to get info per release only once
# discogs and essentially every other source only does per release genre info
hessle_with_discogs_info = d.process_dataframe(df_hessle, dc)

In [147]:
# save just in case
hessle_with_discogs_info.to_csv("good_hessle_df.csv")

In [None]:
# get file paths to pass to pooling for processing
file_paths = hessle_with_discogs_info['file_path'].tolist()

hessle_results = ap.process_files_parallel(file_paths)

In [163]:
#update df with corresponding analysis results to tracks or update with error
for i, features in enumerate(hessle_results):
    if features is not None:
        for key, value in features.items():
            hessle_with_discogs_info.loc[i, key] = value
    else:
        hessle_with_discogs_info.loc[i, 'processing_error'] = True

In [165]:
hessle_with_discogs_info['tempo']

0      135.999178
1      135.999178
2       92.285156
3       92.285156
4      135.999178
          ...    
162    107.666016
163    135.999178
164     89.102909
165    135.999178
166     80.749512
Name: tempo, Length: 167, dtype: float64

In [170]:
hessle_with_discogs_info.columns

Index(['title', 'artist', 'album', 'year', 'genre', 'label', 'file_path',
       'discogs_id', 'genres', 'styles', 'tempo', 'centroid_mean',
       'centroid_std', 'spread_mean', 'spread_std', 'rolloff_mean',
       'rolloff_std', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean',
       'rms_std', 'harmony_mean', 'harmony_var', 'percu_mean', 'percu_var',
       'mfcc_1_mean', 'mfcc_2_mean', 'mfcc_3_mean', 'mfcc_4_mean',
       'mfcc_5_mean', 'mfcc_6_mean', 'mfcc_7_mean', 'mfcc_8_mean',
       'mfcc_9_mean', 'mfcc_10_mean', 'mfcc_11_mean', 'mfcc_12_mean',
       'mfcc_13_mean', 'mfcc_1_var', 'mfcc_2_var', 'mfcc_3_var', 'mfcc_4_var',
       'mfcc_5_var', 'mfcc_6_var', 'mfcc_7_var', 'mfcc_8_var', 'mfcc_9_var',
       'mfcc_10_var', 'mfcc_11_var', 'mfcc_12_var', 'mfcc_13_var'],
      dtype='object')

In [168]:
hessle_with_discogs_info.to_csv('hessle_with_features.csv')

## uttu data isn't quite there yet, below here isn't functional

In [90]:
uttu_id = d.find_label_id('Unknown To The Unknown')
hothaus_id = 565738 # can't name search via discogs api
dancetrax_id = d.find_label_id('Dance Trax') #dance trax (4) on discogs..
soft_comp_id = d.find_label_id("Soft Computing")

In [106]:
hhrecs = d.find_label_id('Hot Haus Recs')

In [107]:
hhrecs

565738

In [116]:
uttu_discog = d.get_label_discography(uttu_id)
hothaus_discog = d.get_label_discography(hothaus_id)
dancetrax_discog = d.get_label_discography(dancetrax_id)

Request failed with status code 404. Response: {"message": "The requested resource was not found."}


In [167]:
hothaus_discog = d.get_label_discography(dc, hothaus_id)
len(hothaus_discog)

HTTPError: 404: The requested resource was not found.

In [102]:
softcomp_discog = d.get_label_discography(soft_comp_id)

In [98]:
len(uttu_discog)

221

In [99]:
len(hothaus_discog)

0

In [100]:
len(dancetrax_discog)

68

In [103]:
len(softcomp_discog)

22

UTTU093
UTTU093
UTTU093
UTTU093
WeMe313.20
WeMe313.20
DATARMX
DRUIDSDREAM1
ETU002 
ETU002 
ETU002BONUS
MINDGAME
none
none
none
none
none
none
none
none
TRIBAL_DATA_
TRIBALTRAX001
UTTU 003
UTTU 005
UTTU 007
UTTU 012
UTTU 014B
UTTU 018
UTTU 028
UTTU 054
UTTU 059
UTTU 062
UTTU 072 
UTTU 074 
UTTU 074 
UTTU 075
UTTU 077
UTTU 078
UTTU 079
UTTU 079
UTTU 080 
UTTU 081
UTTU 083
UTTU 088
UTTU 090
UTTU 090
UTTU 096 
UTTU 096 
UTTU 38 
UTTU 38 
UTTU FACT
UTTU LEGO RAVE 
UTTU_004
UTTU_005
UTTU_005
UTTU_006
UTTU_008
UTTU_010
UTTU_011
UTTU_013
UTTU_016
UTTU_017
UTTU_018
UTTU_019
UTTU_020
UTTU_020
UTTU_021
UTTU_022
UTTU_023
UTTU_024
UTTU_025
UTTU_025
UTTU_025
UTTU_026
UTTU_030
UTTU_031
UTTU_032
UTTU_034
UTTU_034
UTTU_035
 UTTU_036
UTTU_037
UTTU_037
UTTU_039
UTTU_039
UTTU_040
UTTU_041
UTTU_042
UTTU_042
UTTU_043
UTTU_045
UTTU_046
UTTU_048
UTTU_049_RMX
UTTU_051
UTTU_053
UTTU_15_B
UTTU_1UP
UTTU_GOGGLE
UTTU_GOGGLE
UTTU_GOGGLE
UTTU_LEGO
UTTU_LEGO
UTTU_LEGO
UTTU_LEGO
UTTU_MK
UTTU.CLUB_002
UTTU007
UTTU009
ut