In [1]:
### import and initialize

import csv
import json

file_name_albums = 'data.csv'
file_name_songs = 'top-500-songs.txt'
file_name_json_data = 'track_data.json'


#{'number': '1', 'year': '1967', 'album': "Sgt. Pepper's Lonely Hearts Club Band", 
#'artist': 'The Beatles', 'genre': 'Rock', 'subgenre': 'Rock & Roll, Psychedelic Rock'}
def initialize_csv(file_name='data.csv'):
    alb = []
    with open(file_name) as f:
        # we are using DictReader because we want our information to be in dictionary format.
        reader = csv.DictReader(f)
        for row in reader: #each row is an orderered dictionary
            alb.append(dict(row))
 
    return alb

#songs is a list of dictionaries of the songs
# the output will look something like this:
#['1\tLike a Rolling Stone\tBob Dylan\t1965\n', '2\tSatisfaction\tThe Rolling Stones\t1965\n',
#'3\tImagine\tJohn Lennon\t1971\n', "4\tWhat's Going On\tMarvin Gaye\t1971\n", 
#'5\tRespect\tAretha Franklin\t1967\n', '6\tGood Vibrations\tThe Beach Boys\t1966\n', 
#'7\tJohnny B. Goode\tChuck Berry\t1958\n', '8\tHey Jude\tThe Beatles\t1968\n', ...]
def initialize_txt(file_name):
    # open the text file in read
    text_file = open(file_name, 'r')
    lines = text_file.readlines()
    s = []
    for line in lines:
        data = line.split('\n')[0].split('\t')
        keys = ['rank', 'name', 'artist', 'year']
        d = dict(zip(keys, data))
        s.append(d)
    return s


#
#[{'artist': 'The Beatles', '
#album': "Sgt. Pepper's Lonely Hearts Club Band",
# 'tracks': ["Sgt. Pepper's Lonely Hearts Club Band - Remix", 
#            'With A Little Help From My Friends - Remix', 'Lucy In The Sky With Diamonds - Remix', 'Getting Better - Remix', 'Fixing A Hole - Remix', "She's Leaving Home - Remix", 'Being For The Benefit Of Mr. Kite! - Remix', 'Within You Without You - Remix', "When I'm Sixty-Four - Remix", 'Lovely Rita - Remix', 'Good Morning Good Morning - Remix', "Sgt. Pepper's Lonely Hearts Club Band (Reprise) - Remix", 'A Day In The Life - Remix', "Sgt. Pepper's Lonely Hearts Club Band - Take 9 And Speech", 'With A Little Help From My Friends - Take 1 / False Start And Take 2 / Instrumental', 'Lucy In The Sky With Diamonds - Take 1', 'Getting Better - Take 1 / Instrumental And Speech At The End', 'Fixing A Hole - Speech And Take 3', "She's Leaving Home - Take 1 / Instrumental", 'Being For The Benefit Of Mr. Kite! - Take 4', 'Within You Without You - Take 1 / Indian Instruments', "When I'm Sixty-Four - Take 2", 'Lovely Rita - Speech And Take 9', 'Good Morning Good Morning - Take 8', "Sgt. Pepper's Lonely Hearts Club Band (Reprise) - Speech And Take 8", 'A Day In The Life - Take 1 With Hums', 'Strawberry Fields Forever - Take 7', 'Strawberry Fields Forever - Take 26', 'Strawberry Fields Forever - Stereo Mix 2015', 'Penny Lane - Take 6 / Instrumental', 
#             ...'Penny Lane - Stereo Mix 2017']},
def initialize_json(file_name):
    file = open(file_name, 'r')
    return json.load(file)


albums = initialize_csv(file_name_albums)
songs = initialize_txt(file_name_songs)
tracks = initialize_json(file_name_json_data)


### SEARCHING FUNCTIONS

#takes in string
#returns dictionary of album of string
def find_by_name(name, key='album', data=albums):
    return list(filter(lambda x: x[key] == name, data))
#find_by_name('the beatles', 'artist')

#takes in number for rank
#returns dictionary of albums 
#returns None if no album found
def find_by_rank(rank, key='number', data=albums):
    li = list(filter(lambda x: x[key] == str(rank), data))
    if (li == []): #nothing found
        return None
    return li

#given start rank and end rank
#return list of dictionary of albums between start and end
#return empty list if none found
def find_by_ranks(start, end, rank='number', data=albums):
    li = []
    li = list(map(lambda x: find_by_rank(x, rank, data), range(start,end)))
    return li


#
def find_by_year(year, key='album', data=albums):
    if type(year) != int: # this loop tests that the user inserted a number
        print("Please enter an integer year")
        sys.exit()
        
    album_list_for_that_year = []
    year_str = str(year)
    
    for album in data: # this loops through each year 
        if album['year'] == year_str:
            album_list_for_that_year.append(album[key])
    return album_list_for_that_year


def find_by_years(year1, year2, data=albums):
    if type(year1) != int or type(year2) != int: # this loop tests that the user inserted a number
        print("Please enter an integer year")
        sys.exit()
   
    full_list_of_albums_between_year = []
    # looping through each year (as an int) between year1 and year2
    
    for year in range(year1, year2 + 1):
        year_str1 = str(year1)
        year_str2 = str(year2)
        list_of_albums_for_one_year = find_by_year(year, data)
        full_list_of_albums_between_year.extend(list_of_albums_for_one_year)
    return full_list_of_albums_between_year
#


### ALL FUNCTIONS
def all_titles(key='album', data=albums):
    # using a list comprehension to get the name (which means album)
    title_list = [album[key] for album in data]
    return title_list

def all_artists(key='artist', data=albums):
    # using a list comprehension to get the name (which means album)
    artist_list = [album[key] for album in data]
    return artist_list

### Q & A FUNCTIONS
#
def artist_with_most_albums(key='artist',data=albums):
    artist_with_highest_number_of_albums = None
    #Get artist list
    artists = all_artists(key, data)
    #Add the artists to a dictionary and loop over the original data (albums)
    artist_dict_with_album_counts = {artist:0 for artist in artists}

    # Getting the album count for all of the artists in a dictionary
    for k,value in artist_dict_with_album_counts.items():
        album_count = len(list(filter(lambda x: x[key] == k, data)))
        artist_dict_with_album_counts[k] = album_count
    
    # pull highest value from dictionary
    all_values = list(artist_dict_with_album_counts.values())
    highest_number_of_albums = max(all_values)
    
    # limiting the list to keys with values equal to highest_number_of_albums
    highest_num_of_albums_dict = {k:value for k,value in artist_dict_with_album_counts.items() if value == highest_number_of_albums}
    
    # getting the keys from the highest_num_of_albums_dict dictionary
    artist_with_highest_number_of_albums = list(highest_num_of_albums_dict.keys())
    
    return artist_with_highest_number_of_albums

def most_popular_word(key='artist', data=albums):
    most_popular_word = ""
    album_titles_all = all_titles(key, data)
    #print(album_titles_all)
    word_list = []
    for item in album_titles_all:
        split_list = item.split()
        word_list.extend(split_list)
    
    # getting all of the words and their counts
    word_dictionary = {x:word_list.count(x) for x in word_list}
    
    # pull highest value from dictionary
    all_values = list(word_dictionary.values())
    most_used_word = max(all_values)
    print(most_used_word)
    
    # limiting the list to keys with values equal to highest_number_of_albums
    only_most_used_word_dict = {k:value for k,value in word_dictionary.items() if value == most_used_word}
    
    # getting the keys from the only_most_used_word_dict dictionary
    most_popular_word = list(only_most_used_word_dict.keys())
    
    return most_popular_word
#

#returns a histogram with each decade pointing to the 
#number of albums released
#imports needed, numpy, matplotlib

def histogram_albums_decade(data=albums):
    #list of years of all the albums
    years = sorted(list(map(lambda x: x['year'] , data)))
    #separate by the tens place
    decades = {}
    quantity = 0
    #list(filter(lambda year : year[2] , years))
    for year in years:
        if (decades.get(year[2]) == None): #create new key
            decades[year[2]] = 1
        else:
            decades[year[2]] += 1 #increment quantity
    import numpy as np
    import matplotlib.pyplot as plt
    bins = len(list(decades.keys())) #how many bars
    quantities = list(decades.values()) #values by bar
    keylist = list(decades.keys())
    print(quantities)
    #ax = plt.subplot()
    #ax.set_xticks(decades.keys)
    plt.hist(years, bins, edgecolor='black')
    plt.xlabel('Decade',)
    #ax.set_xticks(keylist)
    #ax.set_xticklabels(range(7))
    #ax.set_xticks()
    plt.ylabel('Quantity')
    plt.title('Quantity of Albums per Decade')
    #plt.xticks(np.arange(5), ('Tom', 'Dick', 'Harry', 'Sally', 'Sue'))
    plt.show()
histogram_albums_decade(albums)

# Histogram by genre - Returns a histogram with each genre pointing to the number of 
# albums that are categorized as being in that genre.
def histogram_by_genre(): 
    full_genre_list = [x['genre'] for x in albums]
    
     # splitting the original category names into single categories and double counting 
    # any albums that fall into both (e.g.: for 'Rock, Blues': 16, rock AND blues will get 16 each)
    ### split 1 
    full_genre_list_split_1 = [x.split(", ") for x in full_genre_list]
    
    #I checked for any items that are (",")
    full_genre_list_split_2 = [y.split(",") for x in full_genre_list_split_1 for y in x]
    
    # there are some cases where & is left in the genre list ("& Country") so I'm deleting that here
    # Also flattening the list
    flat_list = [item for sublist in full_genre_list_split_2 for item in sublist]
    better_flat_list = [x.strip(" &") for x in flat_list]
    
    #making a dictionary with counts of how many times a genre appears
    genre_count_dictionary = {x:better_flat_list.count(x) for x in better_flat_list}
    
    # I checked how many values there are now in the dictionary 
    # (how many double counts for an album)
    # There are 718 now, so there were 718 - 478 (slightly messy data) = 240 double counts

    # Plotting the data
    plt.bar(x = list(genre_count_dictionary.keys()), height = list(genre_count_dictionary.values()))
    plt.xticks(rotation=300)
    plt.xlabel('Genre')
    plt.ylabel('Quantity')
    plt.title('Quantity of Songs in Genres')
    plt.show()



### After JSON Import functions

#top songs format ['rank', 'name', 'artist', 'year']
#returns name of the artist and album with the most songs featured on top 500
def albumWithMostTopSongs():
    #songs is top songs
    #tracks 'artist' 'album' 'tracks'
    #top songs == tracks['key']'
    #nested for loops
    #get list of all tracks and all song names
    #get list of all artists
    #all_artists('artist', tracks) 
allalbums = [x['album'] for x in tracks]
alltracks = [x['tracks'] for x in tracks]
allsongs = [x['name'] for x in songs]
#create unique list of top tracks
toptracks = []
for track in alltracks:
    for song in allsongs:
        if (song in track):
            toptracks.append(song)

counter = {album:0 for album in allalbums}
#add up songs in top 100 for each artist

for album in tracks:
    for song in album['tracks']:
        #found song in top 500
        if song in toptracks:
            counter[album['album']] += 1

#highestartist = counter[allalbums[0]]
valueslist = list(counter.values())
highestvalue = max(valueslist)
index = valueslist.index(highestvalue)

keyslist = list(counter.keys())

   return keyslist[index]


#returns a list with the name of only the albums that have tracks featured
#on the list of top 500 songs
def albumsWithTopSongs():
    return
#returns a list of names of only the songs featured on the list of top albums
def songsThatAreOnTopAlbums():
    return
#returns a histogram with the 10 albums that have the most songs that appear in
#the top songs list
#album names should point to the number of songs that appear on the top 500 songs list
def top10AlbumsByTopSongs():
    return
#artist featured with the most songs and albums on the two lists
def topOverallArtist():
    return








[9, 99, 181, 83, 65, 39, 2]


<Figure size 640x480 with 1 Axes>

In [44]:
def albumWithMostTopSongs():
    #songs is top songs
    #tracks 'artist' 'album' 'tracks'
    #top songs == tracks['key']'
    #nested for loops
    #get list of all tracks and all song names
    #get list of all artists
    #all_artists('artist', tracks) 
    allalbums = [x['album'] for x in tracks]
    alltracks = [x['tracks'] for x in tracks]
    allsongs = [x['name'] for x in songs]
    #create unique list of top tracks
    toptracks = []
    for track in alltracks:
        for song in allsongs:
            if (song in track):
                toptracks.append(song)

    counter = {album:0 for album in allalbums}
    #add up songs in top 100 for each artist

    for album in tracks:
        for song in album['tracks']:
            #found song in top 500
            if song in toptracks:
                counter[album['album']] += 1

#highestartist = counter[allalbums[0]]
valueslist = list(counter.values())
highestvalue = max(valueslist)
index = valueslist.index(highestvalue)

keyslist = list(counter.keys())
print(keyslist[index])  
   # return keyslist[index]


Elvis Presley
