In [1]:
import pandas as pd
import os
from chord_critic import ChordsMetrics
from datasets import load_dataset
from datasets import concatenate_datasets
from datasets import DatasetDict
from datasets import Dataset
from collections import OrderedDict


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class SongsDataset(object):
    def __init__(self, path='dataset/chords_and_lyrics_en.pkl'):
        self.english_chords = pd.read_pickle(path)

    def get_songs_with_genre(self, genre_name):
        indexes = self.english_chords['genres'].apply(lambda x: genre_name in x)
        return self.english_chords[indexes]
    
    def get_by_artist_and_song_name(self, artist_name_song_name):
        index_artist = self.english_chords['artist_name'] == artist_name_song_name[0]
        index_song = self.english_chords['song_name'] == artist_name_song_name[1]
        return self.english_chords[index_artist & index_song]

songs_dataset = SongsDataset()

In [3]:
def songs_entry_to_chords(entry):
    bad_words = ('intro', 'bridge', 'instrumental', '2x', 'solo', '(', ')', 'hide')
    d = OrderedDict(entry['chords'].values[0])
    chords_str = ' '.join([v.strip() for v in d.values()])
    chords_filtered_list = [c for c in chords_str.split() if not any([bad_word in c.lower() for bad_word in bad_words])]
    return chords_filtered_list

class GenresBenchmarks(object):
    def __init__(self, songs_dataset, genres_names):
        self.songs_dataset = songs_dataset
        self.genres_names = genres_names
        self.genres_datasets = {genre_name: songs_dataset.get_songs_with_genre(genre_name) for genre_name in genres_names}

    def print_stats(self):
        for genre, dataset in self.genres_datasets.items():
            print(f'Found {len(dataset)} {genre} songs')

    def get_samples(self, sample_size=10):
        sampled_genres = {genre_name: self.genres_datasets[genre_name].sample(sample_size) for genre_name in self.genres_names}
        return sampled_genres

    def sample_random_benchmarks(self):
        pass

    def _benchmarks_for_song(self, song):
        chords_list = songs_entry_to_chords(song)
        print(chords_list)
        metric = ChordsMetrics(chords_list)
        return metric.scores
    
class SongsBenchmarks(object):
    def __init__(self, songs_dataset, songs_names):
        self.songs_dataset = songs_dataset
        self.songs_names = songs_names
        self.songs_data = {artist_song: songs_dataset.get_by_artist_and_song_name(artist_song)
                           for artist_song in songs_names}

    def get_benchmarks(self):
        benchmarks = [[song_name, self._benchmarks_for_song(self.songs_data[song_name])] for song_name in self.songs_names]
        return benchmarks

    def _benchmarks_for_song(self, song):
        chords_list = songs_entry_to_chords(song)
        print(chords_list)
        metric = ChordsMetrics(chords_list)
        return metric.scores

In [4]:
from chord_critic import *
"""
genres_names = ['progressive rock', 'pop', 'blues', 'indie', 'punk', 'jazz', 'electronic']
genres_benchmarks = GenresBenchmarks(songs_dataset, genres_names)
genres_benchmarks.print_stats()
s1 = songs_dataset.english_chords[songs_dataset.english_chords['song_name'] == 'Heart Of The Sunrise']
# print(genres_benchmarks._benchmarks_for_song(s1))
"""
songs_names = [('The Beatles', 'Let It Be'), ('Radiohead', 'Creep'), ('The Fray', 'Never Say Never')]
sb = SongsBenchmarks(songs_dataset, songs_names)
benchmarks = sb.get_benchmarks()
for s,b in benchmarks:
    print(f'Benchmark for {" - ".join(s)}: {b}')
# m = ChordsMetrics(songs_entry_to_chords(s1))
# c = m._structure_criteria
# print(f'rep criteria: {RepetitionCoverageCriteria(c.scale, c.chords, c.cp).score}')
# print(f'prog criteria: {ChordsProgressionsCoverageCriteria(c.scale, c.chords, c.cp).score}')


['C', 'G', 'Am', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'C', 'G', 'Am', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'C', 'G', 'Am', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'C/B', 'Am', 'Am/G', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'C', 'G', 'Am', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'C', 'G', 'Am', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'C/B', 'Am', 'Am/G', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'Am', 'Am/G', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'F', 'Em', 'Dm', 'C', 'Bb', 'Am', 'G', 'F', 'C', 'C', 'G', 'Am', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'Am', 'Am/G', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'C', 'G', 'Am', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'C', 'G', 'Am', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'C/B', 'Am', 'Am/G', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'Am', 'Am/G', 'F', 'C', 'G', 'F', 'Em', 'Dm', 'C', 'F', 'Em', 'Dm', 'C', 'Bb', 'Am', 'G', 'F', 'C']
['this', 'tab', 'G', 'B', 'C', 'Cm', 'G', 'B', 'C', 'Cm', 'G', 'B', 'C', 'Cm', 'G', 'B', 'C', 'Cm', 'G', 'B', 'C', 'Cm', 'G', 'B', 'C', 'Cm', 'G', 'B'