In [40]:
from itertools import chain
from collections import Counter
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html, Input, Output
from sqlalchemy import create_engine

# 1. Conectar a PostgreSQL y cargar el dataset
engine = create_engine('postgresql://postgres:postgres@localhost:5432/postgres')
df = pd.read_sql("SELECT * FROM music_dataset", engine)

# 2. Preprocesar los géneros
df['track_genre'] = df['track_genre'].fillna('unknown').str.lower()
df['genre_list'] = df['track_genre'].str.split(r',\s*')

# 3. Inicializar la app
app = Dash(__name__)
app.title = "🎵 Music Analytics Dashboard"

# 4. Layout
app.layout = html.Div([
    html.H1("🎶 Spotify x Billboard x Grammy Dashboard", style={'textAlign': 'center', 'color': '#2c3e50'}),
    html.Hr(),

    html.Div([
        html.Label("📅 Select Year Range:", style={'fontWeight': 'bold', 'marginLeft': '20px'}),
        dcc.RangeSlider(
            min=df['year'].min(),
            max=df['year'].max(),
            step=1,
            value=[df['year'].min(), df['year'].max()],
            marks={int(y): str(int(y)) for y in sorted(df['year'].dropna().unique())},
            id='year-filter',
            tooltip={"placement": "bottom", "always_visible": True},
            allowCross=False
        ),
        html.Br(),
        html.Div(id="billboard-percentage", style={'fontSize': '18px', 'paddingBottom': '10px', 'marginLeft': '20px'}),
        html.Div(id="grammy-percentage", style={'fontSize': '18px', 'paddingBottom': '20px', 'marginLeft': '20px'}),
        dcc.Graph(id='danceability-boxplot'),
        dcc.Graph(id='duration-vs-danceability'),
        dcc.Graph(id='top-genre-combinations')
    ], style={'padding': '20px'})
])

# 5. Callback principal
@app.callback(
    Output("billboard-percentage", "children"),
    Output("grammy-percentage", "children"),
    Output("danceability-boxplot", "figure"),
    Output("duration-vs-danceability", "figure"),
    Output("top-genre-combinations", "figure"),
    Input("year-filter", "value")
)
def update_dashboard(selected_years):
    filtered = df[(df['year'] >= selected_years[0]) & (df['year'] <= selected_years[1])]
    total = len(filtered)
    billboard = filtered['billboard_peak'].notna().sum()
    grammy = filtered['category'].notna().sum()

    billboard_pct = f"🎯 {billboard / total * 100:.2f}% of songs also appear on Billboard charts." if total else "No data in selection."
    grammy_pct = f"🏆 {grammy / total * 100:.2f}% of songs were nominated for the Grammys." if total else ""

    # Boxplot de Danceability (ordenado por mediana)
    genre_order = (
        filtered.groupby('track_genre')['danceability']
        .median()
        .sort_values(ascending=False)
        .index.tolist()
    )

    boxplot = px.box(
        filtered,
        x="track_genre",
        y="danceability",
        category_orders={"track_genre": genre_order},
        title="🎵 Danceability by Genre (Sorted by Median)",
        labels={"track_genre": "Genre", "danceability": "Danceability"},
        template="plotly_white"
    )

    # Scatterplot duración vs. danceability
    scatter = px.scatter(
        filtered.sort_values("danceability", ascending=False),
        x="duration_minutes",
        y="danceability",
        color="track_genre",
        title="🎚️ Duration vs. Danceability",
        labels={"duration_minutes": "Duration (min)", "danceability": "Danceability"},
        template="plotly_white"
    )

    # Top 10 combinaciones de géneros
    combos = df['genre_list'].dropna().apply(lambda x: tuple(sorted(x))).tolist()
    combo_counts = Counter(combos).most_common(10)
    combo_df = pd.DataFrame(combo_counts, columns=['Genre Combo', 'Count'])
    combo_df['Genre Combo'] = combo_df['Genre Combo'].apply(lambda x: ', '.join(x))

    combo_bar = px.bar(
        combo_df.sort_values("Count", ascending=False),
        x='Count',
        y='Genre Combo',
        orientation='h',
        title="🔥 Top 10 Genre Combinations (Most Common First)",
        template="plotly_white"
    )

    return billboard_pct, grammy_pct, boxplot, scatter, combo_bar

# 6. Ejecutar la app
if __name__ == "__main__":
    app.run(debug=True)


In [27]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html
from sqlalchemy import create_engine

DB_NAME = os.getenv("DB_NAME", "postgres")
DB_USER = os.getenv("DB_USER", "postgres")
DB_PASSWORD = os.getenv("DB_PASSWORD", "postgres")
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "5432")

engine = create_engine(f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}')
df = pd.read_sql("SELECT * FROM music_dataset", engine)

COLOR_GREEN = "#57b660"
COLOR_SEQ = [COLOR_GREEN]

if 'explicit' in df.columns:
    explicit_counts = df['explicit'].value_counts()
    pie_chart = px.pie(
        names=explicit_counts.index.map(lambda x: 'Explicit' if x else 'Non-Explicit'),
        values=explicit_counts.values,
        title="Explicit vs Non-Explicit Songs",
        color_discrete_sequence=COLOR_SEQ
    )
    pie_chart.update_layout(template="plotly_dark", paper_bgcolor="#0C0C0C", plot_bgcolor="#0C0C0C", font_color="white")
else:
    pie_chart = go.Figure()

popularity_hist = px.histogram(
    df,
    x="popularity",
    nbins=20,
    title="Popularity Distribution",
    labels={"popularity": "Popularity", "count": "Number of Songs"},
    template="plotly_dark",
    color_discrete_sequence=COLOR_SEQ
)
popularity_hist.update_layout(paper_bgcolor="#0C0C0C", plot_bgcolor="#0C0C0C")

top_billboard = (
    df[df['total_weeks_on_chart'].notna() & (df['total_weeks_on_chart'] < 500)]
    .sort_values('total_weeks_on_chart', ascending=False)
    .head(20)
)
top_billboard_sorted = top_billboard.sort_values("total_weeks_on_chart", ascending=True)

top_billboard_chart = px.bar(
    top_billboard_sorted,
    x="total_weeks_on_chart",
    y="song_name",
    orientation='h',
    title="Songs with Most Weeks on Billboard",
    labels={"total_weeks_on_chart": "Weeks on Billboard", "song_name": "Song"},
    template="plotly_dark",
    color_discrete_sequence=COLOR_SEQ
)
top_billboard_chart.update_layout(paper_bgcolor="#0C0C0C", plot_bgcolor="#0C0C0C")

top_number_one = (
    df[(df['billboard_peak'] == 1) & df['total_weeks_on_chart'].notna() & (df['total_weeks_on_chart'] < 500)]
    .sort_values('total_weeks_on_chart', ascending=False)
    .head(10)
)
top_number_one_sorted = top_number_one.sort_values("total_weeks_on_chart", ascending=True)

top_number_one_chart = px.bar(
    top_number_one_sorted,
    x="total_weeks_on_chart",
    y="song_name",
    orientation='h',
    title="Songs that Stayed #1 on Billboard the Longest",
    labels={"total_weeks_on_chart": "Weeks at #1", "song_name": "Song"},
    template="plotly_dark",
    color_discrete_sequence=COLOR_SEQ
)
top_number_one_chart.update_layout(paper_bgcolor="#0C0C0C", plot_bgcolor="#0C0C0C")

top_grammy_artists = (
    df[df['category'].notna()]
    .groupby('artist')
    .size()
    .reset_index(name='grammy_wins')
    .sort_values('grammy_wins', ascending=False)
    .head(10)
)
top_grammy_sorted = top_grammy_artists.sort_values("grammy_wins", ascending=True)

top_grammy_chart = px.bar(
    top_grammy_sorted,
    x='grammy_wins',
    y='artist',
    orientation='h',
    title="Grammy-Winning Artists",
    template="plotly_dark",
    color_discrete_sequence=COLOR_SEQ
)
top_grammy_chart.update_layout(paper_bgcolor="#0C0C0C", plot_bgcolor="#0C0C0C")

treemap = px.treemap(
    top_billboard,
    path=[px.Constant("Songs"), 'artist', 'song_name'],
    values='total_weeks_on_chart',
    color='total_weeks_on_chart',
    color_continuous_scale=px.colors.sequential.Greens,
    color_continuous_midpoint=np.average(top_billboard['total_weeks_on_chart']),
    title="Treemap of Songs by Billboard Weeks",
    template="plotly_dark"
)
treemap.update_layout(paper_bgcolor="#0C0C0C", plot_bgcolor="#0C0C0C")

danceability_by_genre_mean = (
    df[df['danceability'].notna() & df['track_genre'].notna()]
    .groupby('track_genre')['danceability']
    .mean()
    .sort_values(ascending=False)
    .reset_index()
)

danceability_mean_chart = px.bar(
    danceability_by_genre_mean,
    x='danceability',
    y='track_genre',
    orientation='h',
    title='Average Danceability by Genre',
    color='danceability',
    color_continuous_scale=px.colors.sequential.Greens,
    template='plotly_dark'
)
danceability_mean_chart.update_layout(
    yaxis={'categoryorder': 'total ascending'},
    paper_bgcolor="#0C0C0C",
    plot_bgcolor="#0C0C0C"
)

correlation_features = [
    'popularity', 'tempo', 'valence', 'energy', 'danceability', 'acousticness', 'duration_minutes'
]
correlation_df = df[correlation_features].dropna()
correlation_matrix = correlation_df.corr()

correlation_heatmap = px.imshow(
    correlation_matrix,
    text_auto=True,
    color_continuous_scale=[[0, "#b1e5c4"], [0.5, "#57b660"], [1, "#2f8e50"]],
    title='Feature Correlation Heatmap',
    template='plotly_dark'
)
correlation_heatmap.update_layout(paper_bgcolor="#0C0C0C", plot_bgcolor="#0C0C0C")

df['source_coverage'] = df.apply(lambda row: (
    'Spotify + Grammy + Billboard' if pd.notna(row['category']) and pd.notna(row['billboard_peak']) else
    'Spotify + Grammy' if pd.notna(row['category']) else
    'Spotify + Billboard' if pd.notna(row['billboard_peak']) else
    'Spotify only'
), axis=1)

source_counts = df['source_coverage'].value_counts().reset_index()
source_counts.columns = ['Source', 'Count']

source_coverage_chart = px.bar(
    source_counts,
    x='Source',
    y='Count',
    text='Count',
    title='Songs by Source Coverage',
    template='plotly_dark',
    color_discrete_sequence=COLOR_SEQ
)
source_coverage_chart.update_layout(paper_bgcolor="#0C0C0C", plot_bgcolor="#0C0C0C")
source_coverage_chart.update_traces(textposition='outside')

grammy_by_year = (
    df[df['category'].notna()]
    .groupby(['year', 'source_coverage'])
    .size()
    .reset_index(name='count')
)

grammy_year_chart = px.bar(
    grammy_by_year,
    x='year',
    y='count',
    color='source_coverage',
    title='Grammy-Nominated Songs per Year by Source',
    template='plotly_dark',
    color_discrete_sequence=COLOR_SEQ
)
grammy_year_chart.update_layout(paper_bgcolor="#0C0C0C", plot_bgcolor="#0C0C0C")

def styled_container(graph):
    return html.Div(
        dcc.Graph(figure=graph),
        style={
            "width": "48%",
            "display": "inline-block",
            "padding": "10px",
            "backgroundColor": "#0C0C0C",
            "border": "1px solid gray",
            "borderRadius": "8px"
        }
    )

def full_width_container(graph):
    return html.Div(
        dcc.Graph(figure=graph),
        style={
            "width": "96%",
            "margin": "20px auto",
            "padding": "10px",
            "backgroundColor": "#0C0C0C",
            "border": "1px solid gray",
            "borderRadius": "8px"
        }
    )

app = Dash(__name__)
app.title = "Music Insights"

app.layout = html.Div([
    html.H1("Music Industry Dashboard", style={"textAlign": "center", "color": "white"}),

    html.Div([styled_container(pie_chart), styled_container(popularity_hist)]),
    html.Div([styled_container(top_number_one_chart), styled_container(top_grammy_chart)]),
    html.Div([styled_container(treemap), styled_container(danceability_mean_chart)]),
    html.Div([styled_container(source_coverage_chart), styled_container(grammy_year_chart)]),
    full_width_container(correlation_heatmap)

], style={"backgroundColor": "#0C0C0C", "padding": "20px"})

if __name__ == '__main__':
    app.run(debug=True)


In [26]:
import os
from PIL import Image

os.makedirs("charts", exist_ok=True)

# Exportar cada gráfico como imagen PNG
pie_chart.write_image("charts/pie_chart.png")
popularity_hist.write_image("charts/popularity_hist.png")
top_number_one_chart.write_image("charts/top_number_one.png")
top_grammy_chart.write_image("charts/top_grammy.png")
treemap.write_image("charts/treemap.png")
danceability_mean_chart.write_image("charts/danceability.png")
source_coverage_chart.write_image("charts/source_coverage.png")
grammy_year_chart.write_image("charts/grammy_year.png")
correlation_heatmap.write_image("charts/correlation.png")

# Listado de archivos en orden
image_files = [
    "charts/pie_chart.png",
    "charts/popularity_hist.png",
    "charts/top_number_one.png",
    "charts/top_grammy.png",
    "charts/treemap.png",
    "charts/danceability.png",
    "charts/source_coverage.png",
    "charts/grammy_year.png",
    "charts/correlation.png"
]

# Cargar imágenes
images = [Image.open(img).convert("RGB") for img in image_files]

# Guardar como PDF
images[0].save("dashboard.pdf", save_all=True, append_images=images[1:])

# Crear una imagen PNG larga unida verticalmente
widths, heights = zip(*(i.size for i in images))
max_width = max(widths)
total_height = sum(heights)

combined_image = Image.new("RGB", (max_width, total_height), color="#0C0C0C")
y_offset = 0
for im in images:
    combined_image.paste(im, (0, y_offset))
    y_offset += im.size[1]

combined_image.save("dashboard.png")


ValueError: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido
