I'll build a proper data set for Tableau Analysis in this notebook.

In [1]:
# import packages
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator
from matplotlib import font_manager
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import warnings
from wordcloud import WordCloud
from matplotlib.colors import to_rgba

In [2]:
ts = pd.read_csv('data/TheStreets.csv')
atw = pd.read_csv('data/AllThemWitches.csv')
rs = pd.read_csv('data/RivalSons.csv')
# add band names column
ts['band_name'] = 'The Streets'
atw['band_name'] = 'All Them Witches'
rs['band_name'] = 'Rival Sons'

In [3]:
music = pd.concat([ts, atw, rs], axis=0)

In [4]:
music.head(5)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,analysis_url,duration_ms,time_signature,name,popularity,explicit,uri_y,album_name,album_release_date,band_name
0,0.649,0.526,8,-6.933,1,0.315,0.536,0.0,0.0965,0.355,...,https://api.spotify.com/v1/audio-analysis/2BhQ...,221143,4,Too Much Yayo,38,False,spotify:track:2BhQL5O24XJWjIjR9LRMxZ,The Darker The Shadow The Brighter The Light,2023-10-13,The Streets
1,0.825,0.552,5,-6.82,0,0.0892,0.0697,0.0,0.181,0.509,...,https://api.spotify.com/v1/audio-analysis/37OM...,176000,4,Money Isn’t Everything (feat. Teef),37,True,spotify:track:37OMb2IjwLPJT7iDpYpowk,The Darker The Shadow The Brighter The Light,2023-10-13,The Streets
2,0.5,0.536,2,-7.119,1,0.292,0.2,0.0,0.172,0.236,...,https://api.spotify.com/v1/audio-analysis/7Dsh...,176571,4,Walk of Shame,35,True,spotify:track:7DshwYInfWfxHpZZOxFIjj,The Darker The Shadow The Brighter The Light,2023-10-13,The Streets
3,0.875,0.515,9,-7.789,1,0.302,0.117,0.0,0.0761,0.327,...,https://api.spotify.com/v1/audio-analysis/0HEq...,193846,4,Something to Hide,34,True,spotify:track:0HEqIGeJleguoiStN2EHSS,The Darker The Shadow The Brighter The Light,2023-10-13,The Streets
4,0.681,0.683,2,-8.479,1,0.335,0.0694,0.0,0.115,0.0874,...,https://api.spotify.com/v1/audio-analysis/14Fk...,206263,4,Shake Hands With Shadows,33,False,spotify:track:14Fk6QB4PfXUPrYhPukMCb,The Darker The Shadow The Brighter The Light,2023-10-13,The Streets


In [5]:
music = music[['name', 'band_name', 'album_name', 'album_release_date', 'popularity', 'explicit',  'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'duration_ms', 'time_signature']]

In [6]:
music = music[~music['album_name'].isin([
    'Live In Brussels', 'Live On The Internet', 'Great Western Valkyrie (Tour Edition)', 
    'Live from the Haybale Studio at The Bonnaroo Music & Arts Festival', 
    'Too Bad (Acoustic) [Live from the Haybale Studio at The Bonnaroo Music & Arts Festival]',
    'One Live in Nottingham, 31-10-02', 'Remixes & B-Sides Too', 'Remixes & B-Sides',
    'Each Day Gives', 'Troubled Waters (Zed Bias Remixes)', 'Turn Your Face Into the Sun',
    'Too Much Yayo', 'Troubled Waters', 'Mike (desert island duvet)', "Brexit at Tiffany's",
    'Wrong Answers Only', 'Free My People (The Streets Remix)', "Who's Got The Bag (The Remixes)", 'Mercy',
    'Sweet Life', 'Guillotine', 'Bird in the Hand', 'Rapture', 'Nobody Wants to Die',
    'Shooting Stars (in the Woods)', 'Back In The Woods', 'Do Your Worst', 'Tied Up', 'At the Garage'
])]

In [7]:
music.loc[music['album_name'] == "A Grand Don't Come for Free", 'album_release_date'] = '2004-05-17'
music.loc[music['album_name'] == 'Everything Is Borrowed', 'album_release_date'] = '2008-09-15'
music.loc[music['album_name'] == 'Computers and Blues', 'album_release_date'] = '2011-02-07'

In [8]:
music['album_release_date'] = pd.to_datetime(music['album_release_date'])

In [9]:
music.loc[(music['album_release_date'].dt.year == 2022) & (music['band_name'] == 'All Them Witches'), 'album_name'] = "Baker's Dozen"

In [10]:
albums = music.groupby('band_name')['album_name'].unique()
for band, albums_list in albums.items():
    print(f"{band}:")
    for album in albums_list:
        print(album)
    print("\n")

All Them Witches:
Nothing as the Ideal
ATW
Sleeping Through The War
Dying Surfer Meets His Maker
Lightning At The Door
Our Mother Electricity
Baker's Dozen


Rival Sons:
LIGHTBRINGER
DARKFIGHTER
Feral Roots
Hollow Bones
Head Down
Pressure & Time
Before the Fire


The Streets:
The Darker The Shadow The Brighter The Light
None Of Us Are Getting Out Of This Life Alive
Computers and Blues
Everything Is Borrowed
The Hardest Way to Make an Easy Living
A Grand Don't Come for Free
Original Pirate Material




In [11]:
music.info()

<class 'pandas.core.frame.DataFrame'>
Index: 224 entries, 0 to 83
Data columns (total 20 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   name                224 non-null    object        
 1   band_name           224 non-null    object        
 2   album_name          224 non-null    object        
 3   album_release_date  224 non-null    datetime64[ns]
 4   popularity          224 non-null    int64         
 5   explicit            224 non-null    bool          
 6   danceability        224 non-null    float64       
 7   energy              224 non-null    float64       
 8   key                 224 non-null    int64         
 9   loudness            224 non-null    float64       
 10  mode                224 non-null    int64         
 11  speechiness         224 non-null    float64       
 12  acousticness        224 non-null    float64       
 13  instrumentalness    224 non-null    float64       
 14  

In [12]:
music.to_csv('data/music_data.csv', index=False)