Pip Install Commands

In [None]:
%pip install shapely

Libraries

In [None]:
import json
import requests
import numpy as np
import pandas as pd
import networkx as nx
from shapely.prepared import prep
from shapely.geometry import mapping, shape, Point

Const Values

In [None]:
YEAR_COLUMN_NAME = "year"
DECADE_COLUMN_NAME = "decade"
SONG_TITLE_COLUMN_NAME = "song_title"
COUNTRY_COLUMN_NAME = "country"
ARTIST_LONGITUDE_COLUMN_NAME = "artist_longitude"
ARTIST_LATITUDE_COLUMN_NAME = "artist_latitude"
ARTIST_LOCATION_COLUMN_NAME = "artist_location"

Loading Songs Dataset

In [None]:
raw_songs_dataset = pd.read_csv("../Data/songs_dataset.csv")

In [None]:
raw_songs_dataset.isna().sum()

In [None]:
raw_songs_dataset.isna().sum().sum()

Shartil: For now I am going to delete all rows with missing data.<br>
This is an initial approach, let's discuss it together with Elisa.

In [None]:
songs_dataset = raw_songs_dataset.dropna()

In [None]:
len(songs_dataset)

Shartil: Adding year column to dataset

In [None]:
songs_dataset = songs_dataset.assign(decade=lambda row: (row[YEAR_COLUMN_NAME].astype(int) // 10) * 10)

In [None]:
min_decade = songs_dataset[DECADE_COLUMN_NAME].min()
max_decade = songs_dataset[DECADE_COLUMN_NAME].max()

decade_array = np.linspace(min_decade, max_decade, 10, dtype=int)

Najeeb: Introducing a new column "country" based on Latitude and Longitude.

In [None]:
# Fetch and process the geojson data from a local file
with open(r'..\Data\countries.geojson.json', 'r') as file:
    geojson_data = json.load(file)

countries = {}
for feature in geojson_data["features"]:
    geom = feature["geometry"]
    country = feature["properties"]["ADMIN"]
    countries[country] = prep(shape(geom))

# Function to get country name from latitude and longitude
def get_country(lon, lat):
    point = Point(lon, lat)
    for country, geom in countries.items():
        if geom.contains(point):
            return country

    return "unknown"

# Apply the function to create a new 'country' column
songs_dataset[COUNTRY_COLUMN_NAME] = songs_dataset.apply(
    lambda row: get_country(row[ARTIST_LONGITUDE_COLUMN_NAME], 
    row[ARTIST_LATITUDE_COLUMN_NAME]), 
    axis=1
    )

Shartil: Deleting redundant columns 

In [None]:
songs_dataset = songs_dataset.drop(
    [
        ARTIST_LATITUDE_COLUMN_NAME,
        ARTIST_LONGITUDE_COLUMN_NAME,
        ARTIST_LOCATION_COLUMN_NAME
    ], 
    axis=1)

songs_dataset.head()

Shartil: Now I am going to create the graph

In [None]:
decade_graph = nx.DiGraph()

In [None]:
decade_graph.add_nodes_from(decade_array.tolist())

decade_graph.add_nodes_from(songs_dataset[SONG_TITLE_COLUMN_NAME].tolist())

In [None]:
relationships = []
for index, row in songs_dataset.iterrows():
    current_song_title = row[SONG_TITLE_COLUMN_NAME]
    current_decade = row[DECADE_COLUMN_NAME]

    relationships.append((current_decade, current_song_title, {"label": "release_decade"}))

decade_graph.add_edges_from(relationships)

In [None]:
print(decade_graph)

In [None]:
decade_input = 1920

songs_from_given_decade = [ song for song in decade_graph[decade_input].keys()]
songs_from_given_decade