In [1]:
#importing necessary packages
import pandas as pd
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt
import json

In [2]:
#importing the Yacht Rock DataFrame
df = pd.read_csv("YR_v4.csv")

In [3]:
df

Unnamed: 0,Personnel,Artist,Album,Personnel Role,Number of Tracks,Recording Period,Date of Release,Label,Runtime,Singles,Link
0,Al Schmitt,Steely Dan,Aja (1977),Engineer,7,Late 1976 - Jul 1977,"September 23, 1977",ABC,0:39:56,"Peg (Nov. 1977), Deacon Blues (Mar. 1978), Jos...",https://www.discogs.com/master/16921-Steely-Da...
1,Bernard Purdie,Steely Dan,Aja (1977),Drums,7,Late 1976 - Jul 1977,"September 23, 1977",ABC,0:39:56,"Peg (Nov. 1977), Deacon Blues (Mar. 1978), Jos...",https://www.discogs.com/master/16921-Steely-Da...
2,Bernie Grundman,Steely Dan,Aja (1977),Mastering Engineer,7,Late 1976 - Jul 1977,"September 23, 1977",ABC,0:39:56,"Peg (Nov. 1977), Deacon Blues (Mar. 1978), Jos...",https://www.discogs.com/master/16921-Steely-Da...
3,Bill Schnee,Steely Dan,Aja (1977),Engineer,7,Late 1976 - Jul 1977,"September 23, 1977",ABC,0:39:56,"Peg (Nov. 1977), Deacon Blues (Mar. 1978), Jos...",https://www.discogs.com/master/16921-Steely-Da...
4,Chuck Findley,Steely Dan,Aja (1977),Brass,7,Late 1976 - Jul 1977,"September 23, 1977",ABC,0:39:56,"Peg (Nov. 1977), Deacon Blues (Mar. 1978), Jos...",https://www.discogs.com/master/16921-Steely-Da...
...,...,...,...,...,...,...,...,...,...,...,...
185,Marty Paich,Toto,Toto IV (1982),Orchestral Arrangements,10,June 1981 - January 1982,"April 8, 1982",Columbia,0:41:58,"Rosanna (Mar. 31, 1982), Africa (Oct. 1982), M...",https://www.discogs.com/master/30712-Toto-Toto-IV
186,Mike Porcaro,Toto,Toto IV (1982),Cello,10,June 1981 - January 1982,"April 8, 1982",Columbia,0:41:58,"Rosanna (Mar. 31, 1982), Africa (Oct. 1982), M...",https://www.discogs.com/master/30712-Toto-Toto-IV
187,Steve Lukather,Toto,Toto IV (1982),"Writer, Guitar, Lead Vocals, Backing Vocals, P...",10,June 1981 - January 1982,"April 8, 1982",Columbia,0:41:58,"Rosanna (Mar. 31, 1982), Africa (Oct. 1982), M...",https://www.discogs.com/master/30712-Toto-Toto-IV
188,Timothy B. Schmit,Toto,Toto IV (1982),Backing Vocals,10,June 1981 - January 1982,"April 8, 1982",Columbia,0:41:58,"Rosanna (Mar. 31, 1982), Africa (Oct. 1982), M...",https://www.discogs.com/master/30712-Toto-Toto-IV


In [4]:
def populate_dict(info):
    """
    This function takes in the dataframe and extracts the information for all of the albums.
    It loops through and appends to the Personnel list to include all of the people
    who worked on the album. 

    Parameters
    ----------
    info : DataFrame
        The DataFrame containing the albums information. 
    
    Returns
    -------
        Nested Dictionary
            A nested dictionary with all of the album's attributes and its personnel.
    """
    albums_dict = {}
    for _, row in info.iterrows():
        album = row['Album']
        if album not in albums_dict:
            albums_dict[album] = {
                'Artist': row['Artist'],
                'Label': row['Label'],
                'Recording Period': row['Recording Period'],
                'Release Date': row['Date of Release'],
                'Number of Tracks': row['Number of Tracks'],
                'Runtime': row['Runtime'],
                'Singles': row['Singles'],
                'Link':row['Link'],
                'Personnel': []
            }
        albums_dict[album]['Personnel'].append((row['Personnel'], {'role': row['Personnel Role']}))
    return albums_dict
album_data = populate_dict(df)

In [5]:
#writing album_data to a JSON file to cache
with open('YR.json', 'w') as f:
     json.dump(album_data, f, sort_keys = True, indent = 4,
               ensure_ascii = False)

In [6]:
#reading in the cached file + creating network
f = open('YR.json')
data = json.load(f)

G = nx.Graph()

for album, info in data.items():
    G.add_node(album, type='album',info=info)
    for personnel in info['Personnel']:
        G.add_node(personnel[0], type='person', role=personnel[1])
        G.add_edge(album, personnel[0],role=personnel[1]['role'])

In [7]:
#separating the nodes into albums and people and finding the personnel and the album with the highest degrees
albums=[]
people=[]

for node in G.degree():
    if "(" in node:
        albums.append(node)
    else:
        people.append(node)

people_degrees = [node for node in G.degree() if '(' not in node[0]] 
album_degrees = [node for node in G.degree() if '(' in node[0]] 

people_degrees = {person: degree for person, degree in people_degrees}
album_degrees = {album: degree for album, degree in album_degrees}

most_connected_person = max(people_degrees, key=people_degrees.get)
max_connections = people_degrees[most_connected_person]

print(f"The person who worked on the most albums is: {most_connected_person} with {max_connections} connections.")

most_connected_album = max(album_degrees, key=album_degrees.get)
max_connections = album_degrees[most_connected_album]

print(f"The album that has the most personnel: {most_connected_album} with {max_connections} connections.")

The person who worked on the most albums is: Tom Scott with 8 connections.
The album that has the most personnel: Aja (1977) with 34 connections.


In [8]:
people_degrees

{'Al Schmitt': 3,
 'Bernard Purdie': 2,
 'Bernie Grundman': 4,
 'Bill Schnee': 3,
 'Chuck Findley': 4,
 'Chuck Rainey': 3,
 'Clydie King': 2,
 'Dean Parks': 4,
 'Don Grolnick': 2,
 'Donald Fagen': 3,
 'Ed Greene': 2,
 'Elliot Scheiner': 2,
 'Gary Coleman': 2,
 'Gary Katz': 3,
 'Jackie Kelso': 2,
 'Jay Graydon': 3,
 'Jim Horn': 5,
 'Jim Keltner': 2,
 'Joe Sample': 2,
 'Larry Carlton': 4,
 'Lenise Bent': 2,
 'Linda Tyler': 2,
 'Michael McDonald': 6,
 'Michael Omartian': 5,
 'Paul Griffin': 2,
 'Plas Johnson': 2,
 'Rick Marotta': 2,
 'Roger Nichols': 3,
 'Steve Gadd': 4,
 'Steve Khan': 2,
 'Timothy B. Schmit': 2,
 'Tom Scott': 8,
 'Victor Feldman': 3,
 'Walter Becker': 2,
 'James Newton Howard': 4,
 'Abraham Laboriel': 2,
 'David Foster': 3,
 'Jeff Porcaro': 5,
 'Jerry Hey': 4,
 'Michael Boddicker': 2,
 'Richard Page': 3,
 'Steve George': 3,
 'Steve Lukather': 5,
 'David Leonard': 2,
 'Nathan East': 2,
 'Bobby Hata': 2,
 'Christopher Cross': 2,
 'JD Souther': 2,
 'Lenny Castro': 5,
 'Mart

In [9]:
#creating a new album list (without degrees) and prompting the user to choose two albums to view their 
#number of common personnel (common neighbors)

albums = [node for node in G.nodes if isinstance(node, str) and '(' in node]

print("Albums to choose from:")
for index, album in enumerate(albums, 1):
    print(f"{index}. {album}")

try:
    firstChoice = int(input("Enter the number of your first choice: "))
    secondChoice = int(input("Enter the number of your second choice: "))

    if 1 <= firstChoice <= len(albums) and 1 <= secondChoice <= len(albums) and firstChoice != secondChoice:
        firstAlbum = albums[firstChoice - 1]
        secondAlbum = albums[secondChoice - 1]

        firstNeighbors = set(G.neighbors(firstAlbum))
        secondNeighbors = set(G.neighbors(secondAlbum))

        common_neighbors = firstNeighbors & secondNeighbors
        print(f"\nThe common personnel between '{firstAlbum}' and '{secondAlbum}' are:")
        if common_neighbors:
            for neighbor in common_neighbors:
                print(f"- {neighbor}")
            print(f"\nNumber of common personnel: {len(common_neighbors)}")
        else:
            print("No common personnel.")
    else:
        print("Invalid, please enter valid album numbers!")
except ValueError:
    print("Invalid, please enter numbers only.")

Albums to choose from:
1. Aja (1977)
2. Ambrosia (1975)
3. Breakin' Away (1981)
4. Can't Hold Back (1986)
5. Christopher Cross (1979)
6. Gaucho (1980)
7. High Adventure (1982)
8. If That's What It Takes (1982)
9. Love Will Keep Us Together (1975)
10. Mannequin (1978)
11. Minute by Minute (1978)
12. Nights Are Forever (1976)
13. No Secrets (1972)
14. Partners in Crime (1979)
15. Phoenix (1979)
16. Player (1977)
17. Silk Degrees (1976)
18. Toto IV (1982)
Invalid, please enter numbers only.


In [10]:
#using a similar method to the previous cell, prompting the user to choose an album to return the link
#to its Discogs listing
print("Albums to choose from:")
for index, album in enumerate(albums, 1):
    print(f"{index}. {album}")

try:
    choice = int(input("Enter number for Discogs link: "))

    if 1 <= choice <= len(albums):
        selectedAlbum = albums[choice - 1]
        albumLink = G.nodes[selectedAlbum]['info'].get('Link', 'No link available')
        print(f"\nYou selected '{selectedAlbum}'.")
        print(f"Link: {albumLink}")
    else:
        print("Invalid, please enter valid album numbers!")
except ValueError:
    print("Invalid, please enter numbers only.")

Albums to choose from:
1. Aja (1977)
2. Ambrosia (1975)
3. Breakin' Away (1981)
4. Can't Hold Back (1986)
5. Christopher Cross (1979)
6. Gaucho (1980)
7. High Adventure (1982)
8. If That's What It Takes (1982)
9. Love Will Keep Us Together (1975)
10. Mannequin (1978)
11. Minute by Minute (1978)
12. Nights Are Forever (1976)
13. No Secrets (1972)
14. Partners in Crime (1979)
15. Phoenix (1979)
16. Player (1977)
17. Silk Degrees (1976)
18. Toto IV (1982)
Invalid, please enter numbers only.


In [11]:
#creating the interactive visualization of the network using Pyvis & adding the annotations for the nodes and edges
yr_net = Network(bgcolor="#283044", font_color="white",filter_menu=True) 

yr_net.barnes_hut(gravity=-30000, central_gravity=0.2, spring_length=200, spring_strength=0.05, damping=0.9)

for node, data in G.nodes(data=True):
    degree = G.degree[node]
    if data['type'] == 'album':
        album_info = data['info']
        title_text = (
            f"Album: {node}\n"
            f"Artist: {album_info['Artist']}\n"
            f"Label: {album_info['Label']}\n"
            f"Recording Period: {album_info['Recording Period']}\n"
            f"Release Date: {album_info['Release Date']}\n"
            f"Number of Tracks: {album_info['Number of Tracks']}\n"
            f"Runtime: {album_info['Runtime']}\n"
            f"Singles: {album_info['Singles']}")
        yr_net.add_node(node, label=node, title=title_text, color="#6A97B4", size=40)
    else:
        yr_net.add_node(node, label=node, title=f"{node}\nNumber of albums: {degree}", color="#FE5F55", size=30)

for source, target, data in G.edges(data=True):
    yr_net.add_edge(source, target, title=data['role'], color="#FFFBDB",width=1)

yr_net.show("yr_network.html",notebook=False)

yr_network.html
