------------------------------------------------
This file is created by Tomer Gabay in February 2020.
-------------------------------------

----------
Functions neccessary to import and filter the lyrics:
----

In [None]:
from collections import Counter

import lyricsgenius
import csv

def remove_invalid_lyrics(songs): # removes invalid lyrics
    return [song for song in songs if song.lyrics is not None]

def remove_featured_songs(songs): # removes songs with multiple artists 
    return [song for song in songs if song.featured_artists == []]

def remove_too_short_lyrics(songs): # removes songs with less than 250 words in the lyrics
    return [song for song in songs if len(song.lyrics.split()) >= 250]

def remove_no_album_songs(songs): # removes songs without an album, which usually aren't songs
    return [song for song in songs if song.album != None]

def remove_versions(songs): # removes different versions of the same song e.g. Goodmorning & Goodmorning (live)
    temp_one_version_list = []
    one_version_list = []
    for song in songs:
        if song.title.split('(')[0].lower().rstrip() not in temp_one_version_list and 'remix' not in song.title.lower() and '(live' not in song.title.lower():
            temp_one_version_list.append(song.title.split('(')[0].lower().rstrip())
            one_version_list.append(song)
    return one_version_list

def remove_spoken(songs): # removes inros, outros, interludes and skits
    filtered_songs = []
    for song in songs:
        title = song.title.lower()
        if "interlude" not in title and "outro" not in title and "intro" not in title and "skit" not in title:
            filtered_songs.append(song)
    return filtered_songs
    
def write_csv(songs, filename): # writes songs to a pickle file
    with open(filename, mode='w') as csv_file:
        fieldnames = ['lyrics','artist','song_title','featuring']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for song in songs:
            writer.writerow({'lyrics':song.lyrics,'artist':song.artist,'song_title':song.title,'featuring':", ".join([song.featured_artists[i]['name'] for i in range(len(song.featured_artists))])})

----------
Download all songs of a specific artist:
-----

In [None]:
genius = lyricsgenius.Genius("GENIUS TOKEN ACCESS KEY") # https://genius.com/api-clients
artist = genius.search_artist("ARTIST",sort="title")
songs = [song for song in artist.songs]

----------
Run the cell below to remove songs with invalid lyrics
-----

In [None]:
songs_new = remove_invalid_lyrics(songs)
print("{0:>3} songs removed.\n{1:>3} songs left.".format(len(songs)-len(songs_new),len(songs_new)))
songs = songs_new

----------
Run the cell below to remove songs with multiple artists
----

In [None]:
songs_new = remove_featured_songs(songs)
print("{0:>3} songs removed.\n{1:>3} songs left.".format(len(songs)-len(songs_new),len(songs_new)))
songs = songs_new

----------
Run the cell below to remove songs with no related album
------------------

In [None]:
songs_new = remove_no_album_songs(songs)
print("{0:>3} songs removed.\n{1:>3} songs left.".format(len(songs)-len(songs_new),len(songs_new)))
songs = songs_new

----------
Run the cell below to remove songs with too short lyrcs (less than 250 words)
------

In [None]:
songs_new = remove_too_short_lyrics(songs)
print("{0:>3} songs removed.\n{1:>3} songs left.".format(len(songs)-len(songs_new),len(songs_new)))
songs = songs_new

------------
Run the cell below to remove different versions of the same songs
------

In [None]:
songs_new = remove_versions(songs)
print("{0:>3} songs removed.\n{1:>3} songs left.".format(len(songs)-len(songs_new),len(songs_new)))
songs = songs_new

-------------
Run the cell below to remove spoken songs
-----

In [None]:
songs_new = remove_spoken(songs)
print("{0:>3} songs removed.\n{1:>3} songs left.".format(len(songs)-len(songs_new),len(songs_new)))
songs = songs_new

--------------------------------------------
Run the cell below to see all remaining songs
------

In [None]:
for i,song in enumerate(songs):
    print("{0:<3} {1:<30} {2}".format(i,song.title, song.album))

-----------
Run the cell below to write the remaining songs to a csv file
------

In [None]:
write_csv(songs,"PATH")