In [1]:
import pandas as pd
import plotly.express as px

In [2]:
df = pd.read_csv('dataset.csv')
df.head(5)

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,...,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,...,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,...,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,...,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,...,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [3]:
# Calculate average liveness by genre
liveness_by_genre = df.groupby('track_genre')['liveness'].mean().reset_index()

# Sort by liveness (optional, makes it easier to read)
liveness_by_genre = liveness_by_genre.sort_values('liveness', ascending=False)

# Create bar chart
fig = px.bar(liveness_by_genre, 
             x='track_genre', 
             y='liveness',
             title='Average Liveness by Genre',
             labels={'track_genre': 'Genre', 'liveness': 'Average Liveness'})

# Rotate x-axis labels for readability (there are many genres)
fig.update_layout(xaxis_tickangle=-45)

# Alternative: Horizontal bar chart (easier to read with many genres):
pythonfig = px.bar(liveness_by_genre, 
             x='liveness', 
             y='track_genre',
             orientation='h',
             title='Average Liveness by Genre',
             labels={'track_genre': 'Genre', 'liveness': 'Average Liveness'})

fig.show()

# Get top 20 genres by liveness
top_genres = liveness_by_genre.head(20)

fig = px.bar(top_genres, 
             x='liveness', 
             y='track_genre',
             orientation='h',
             title='Top 20 Genres by Average Liveness',
             labels={'track_genre': 'Genre', 'liveness': 'Average Liveness'})

fig.show()

In [8]:
# Group genres into categories for readability and meaningful analysis
genre_dict = {
'Rock & Alternative': [
        'alt-rock', 'alternative', 'grunge', 'hard-rock', 'indie', 
        'psych-rock', 'punk-rock', 'punk', 'rock', 'rock-n-roll', 
        'guitar', 'goth', 'emo', 'garage', 'industrial', 'grindcore', 
        'hardcore'
    ],
    
    'Metal': [
        'black-metal', 'death-metal', 'heavy-metal', 'metal', 
        'metalcore', 'power-pop'
    ],
    
    'Electronic & Dance': [
        'breakbeat', 'chicago-house', 'club', 'dance', 'deep-house', 
        'detroit-techno', 'disco', 'drum-and-bass', 'dubstep', 'edm', 
        'electro', 'electronic', 'hardstyle', 'house', 'idm', 
        'minimal-techno', 'progressive-house', 'techno', 'trance', 
        'synth-pop', 'groove'
    ],
    
    'Hip-Hop & Urban': [
        'hip-hop', 'r-n-b', 'soul', 'funk', 'dub', 'trip-hop'
    ],
    
    'Pop': [
        'pop', 'indie-pop', 'pop-film', 'party'
    ],
    
    'Country & Folk Americana': [
        'country', 'folk', 'bluegrass', 'honky-tonk', 
        'singer-songwriter', 'songwriter', 'acoustic'
    ],
    
    'Jazz & Blues': [
        'jazz', 'blues', 'gospel'
    ],
    
    'Latin & Caribbean': [
        'latin', 'latino', 'reggae', 'reggaeton', 'dancehall', 
        'salsa', 'samba', 'tango', 'brazil', 'forro', 'pagode'
    ],
    
    'Brazilian': [
        'mpb', 'sertanejo'
    ],
    
    'Asian Pop & Regional': [
        'j-pop', 'j-rock', 'j-dance', 'j-idol', 'k-pop', 
        'cantopop', 'mandopop', 'indian', 'malay'
    ],
    
    'European & Regional': [
        'french', 'german', 'british', 'swedish', 'turkish', 
        'iranian', 'spanish'
    ],
    
    'Classical & Sophisticated': [
        'classical', 'opera', 'new-age', 'piano'
    ],
    
    'Mood & Ambient': [
        'ambient', 'chill', 'happy', 'sad', 'sleep', 'study'
    ],
    
    'World & Traditional': [
        'world-music', 'afrobeat', 'ska'
    ],
    
    'Specialty & Novelty': [
        'anime', 'children', 'kids', 'comedy', 'disney', 
        'show-tunes', 'romance', 'rockabilly'
    ]
}

In [9]:
# Redo visualization

# Create a mapping from individual genres to categories
genre_to_category = {}
for category, genres in genre_dict.items():
    for genre in genres:
        genre_to_category[genre] = category

# Add a new column with the broader category
df['genre_category'] = df['track_genre'].map(genre_to_category)

# Calculate average liveness by genre category
liveness_by_category = df.groupby('genre_category')['liveness'].mean().reset_index()

# Sort by liveness
liveness_by_category = liveness_by_category.sort_values('liveness', ascending=False)

# Create bar chart with categories
fig = px.bar(liveness_by_category, 
             x='genre_category', 
             y='liveness',
             title='Average Liveness by Genre Category',
             labels={'genre_category': 'Genre Category', 'liveness': 'Average Liveness'})

# Rotate x-axis labels for readability
fig.update_layout(xaxis_tickangle=-45)

fig.show()

# Alternative: Horizontal bar chart (easier to read)
fig = px.bar(liveness_by_category, 
             x='liveness', 
             y='genre_category',
             orientation='h',
             title='Average Liveness by Genre Category',
             labels={'genre_category': 'Genre Category', 'liveness': 'Average Liveness'})

fig.show()

In [4]:
df['track_genre'].unique()

array(['acoustic', 'afrobeat', 'alt-rock', 'alternative', 'ambient',
       'anime', 'black-metal', 'bluegrass', 'blues', 'brazil',
       'breakbeat', 'british', 'cantopop', 'chicago-house', 'children',
       'chill', 'classical', 'club', 'comedy', 'country', 'dance',
       'dancehall', 'death-metal', 'deep-house', 'detroit-techno',
       'disco', 'disney', 'drum-and-bass', 'dub', 'dubstep', 'edm',
       'electro', 'electronic', 'emo', 'folk', 'forro', 'french', 'funk',
       'garage', 'german', 'gospel', 'goth', 'grindcore', 'groove',
       'grunge', 'guitar', 'happy', 'hard-rock', 'hardcore', 'hardstyle',
       'heavy-metal', 'hip-hop', 'honky-tonk', 'house', 'idm', 'indian',
       'indie-pop', 'indie', 'industrial', 'iranian', 'j-dance', 'j-idol',
       'j-pop', 'j-rock', 'jazz', 'k-pop', 'kids', 'latin', 'latino',
       'malay', 'mandopop', 'metal', 'metalcore', 'minimal-techno', 'mpb',
       'new-age', 'opera', 'pagode', 'party', 'piano', 'pop-film', 'pop',
       'pow

Here's a consolidated grouping of these 125 genres into 15 broader categories:
1. Rock & Alternative (17 genres)
Heavy guitar-driven music spanning from classic to modern alternative styles.

alt-rock, alternative, grunge, hard-rock, indie, psych-rock, punk-rock, punk, rock, rock-n-roll, guitar, goth, emo, garage, industrial, grindcore, hardcore

2. Metal (7 genres)
Aggressive, distorted guitar music with various subgenres from melodic to extreme.

black-metal, death-metal, heavy-metal, metal, metalcore, power-pop (misplaced but power-metal adjacent)

3. Electronic & Dance (22 genres)
Electronic production-based music for clubs, festivals, and home listening.

breakbeat, chicago-house, club, dance, deep-house, detroit-techno, disco, drum-and-bass, dubstep, edm, electro, electronic, hardstyle, house, idm, minimal-techno, progressive-house, techno, trance, synth-pop, groove

4. Hip-Hop & Urban (6 genres)
Rap, beats, and urban music traditions.

hip-hop, r-n-b, soul, funk, dub, trip-hop

5. Pop (4 genres)
Mainstream, accessible, melodic music designed for broad appeal.

pop, indie-pop, pop-film, party

6. Country & Folk Americana (7 genres)
Acoustic storytelling traditions from American roots music.

country, folk, bluegrass, honky-tonk, singer-songwriter, songwriter, acoustic

7. Jazz & Blues (3 genres)
American improvisational and expressive musical traditions.

jazz, blues, gospel

8. Latin & Caribbean (11 genres)
Rhythmic music from Latin America and the Caribbean islands.

latin, latino, reggae, reggaeton, dancehall, salsa, samba, tango, brazil, forro, pagode

9. Brazilian (2 genres)
Specific Brazilian styles (beyond samba/forro already in Latin).

mpb, sertanejo

10. Asian Pop & Regional (9 genres)
Popular music from East and Southeast Asian markets.

j-pop, j-rock, j-dance, j-idol, k-pop, cantopop, mandopop, indian, malay

11. European & Regional (6 genres)
Music identified by European national/regional origins.

french, german, british, swedish, turkish, iranian

12. Classical & Sophisticated (4 genres)
Formal, composed music from Western classical traditions.

classical, opera, new-age, piano

13. Mood & Ambient (6 genres)
Atmospheric music defined by emotional tone or function.

ambient, chill, happy, sad, sleep, study

14. World & Traditional (3 genres)
Global traditional and folk music outside Western categories.

world-music, afrobeat, ska

15. Specialty & Novelty (5 genres)
Music for specific contexts or audiences.

anime, children, kids, comedy, disney, show-tunes, romance, rockabilly

This reduces 125 genres to 15 manageable categories while preserving the essential distinctions between musical styles. Some genres could fit multiple categories (like "acoustic" or "british"), but I placed them where they're most commonly understood.