In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
def creating_visualisation(data: pd.DataFrame, nom_commune: str) -> go.Figure:
    # Prepare data for visualization
    df = data[data["nom_commune"] == nom_commune].copy()
    
    # Create a summary dataframe for visualization
    viz_data = df.groupby(['categorie', 'type_infrastructure']).size().reset_index(name='Nombre_categorie')
    
    # Sunburst Chart
    fig_sunburst = px.sunburst(
        data_frame=viz_data,
        path=["categorie", "Type_equipement"],
        values=(viz_data['Nombre_categorie'] / viz_data['Nombre_categorie'].sum()) * 100,
        color="Type_equipement",  
        color_discrete_sequence=px.colors.qualitative.Pastel,
        hover_data={'Nombre_categorie': False}
    )

    fig_sunburst.update_traces(
        texttemplate="%{label}<br>%{value:,.1f}%"

    )

    # Bar Chart
    fig_bar = px.bar(viz_data.sort_values(by=['Nombre_categorie', 'categorie'], ascending=[False, True]), 
                    x='Type_equipement', 
                    y='Nombre_categorie', 
                    color='categorie', 
                    labels={'Nombre_categorie': 'Event Count', 'Type_equipement': 'Equipment Type'},
                    color_discrete_sequence=px.colors.qualitative.Pastel)

    fig_bar.update_traces(text=viz_data.sort_values(by=['Nombre_categorie', 'categorie'], ascending=[False, True])['Nombre_categorie'], 
                        textposition='outside', insidetextfont=dict(size=20))

    # Create a subplot with two rows and one column
    fig = make_subplots(
        rows=2, cols=1, 
        row_heights=[0.7, 0.3],  # Adjust size ratio for sunburst and bar chart
        shared_xaxes=True,
        vertical_spacing=0.1,
        specs=[[{"type": "sunburst"}], [{"type": "bar"}]]
    )

    # Add sunburst and bar chart traces to the subplot
    for trace in fig_sunburst['data']:
        fig.add_trace(trace, row=1, col=1)

    for trace in fig_bar['data']:
        fig.add_trace(trace, row=2, col=1)

    # Add Commune title annotation
    commune_title = f"<b>Commune:</b> {viz_data['Commune'].iloc[0]}"
    fig.add_annotation(
        text=commune_title,  
        x=0.5,  
        y=1.15,  
        showarrow=False,
        font=dict(size=22, color="black", family="Arial"), 
        align="center", 
        xref="paper", 
        yref="paper"  
    )

    # Add Population annotation below the Commune title
    population_number = int(viz_data['Population_Totale'].iloc[0]) 
    fig.add_annotation(
        text=f"<b>Population:</b> {population_number:,}",  
        x=0.5,  
        y=1.09,  
        showarrow=False,
        font=dict(size=22, color="black", family="Arial"), 
        align="center", 
        xref="paper", 
        yref="paper"  
    )

    # Update layout for the entire figure
    fig.update_layout(
        margin=dict(t=80, l=50, r=50, b=50),
        title_font=dict(
            family="Arial, sans-serif",  
            size=22,  
            color="black"  
        ),
        font=dict(
            family="Arial, sans-serif",  
            size=14,  
            color="black" 
        ),
        height=600,
        width=500, 
        showlegend=True, 
        legend=dict(
            orientation="h",  
            yanchor="top",  
            y=-0.2,  
            xanchor="center", 
            x=0.5 
        )
    )
    
    return fig

In [2]:
df = pd.read_csv('../data/cultural_data.csv', sep=",")

In [3]:
df.head()

Unnamed: 0,nom_commune,code_postal,type_infrastructure,nom_infrastructure,latitude,longitude,population,categorie,cultural_density
0,Abancourt,60001.0,Bibliothèque,Médiathèque,49.69658,1.764826,606.0,vivant,1.650165
1,Abbans-Dessous,25001.0,Monument,Prieuré de Lieu-Dieu,47.134924,5.873069,251.0,patrimonial,3.984064
2,Abbans-Dessus,25002.0,Monument,Château de Jouffroy-D'abbans,47.118458,5.878254,295.0,patrimonial,3.389831
3,Abbaretz,44001.0,Bibliothèque,Bibliothèque La Mine Du Livre,47.551352,-1.532295,2083.0,vivant,0.960154
4,Abbaretz,44001.0,Monument,Anciennes Forges de La Jahotière,47.556401,-1.493338,2083.0,patrimonial,0.960154


In [4]:
#type of the data in columns

df.dtypes

Commune                     object
PTOT                       float64
REG                        float64
DEP                         object
Région                      object
latitude                   float64
longitude                  float64
code_insee                  object
Type équipement ou lieu     object
count                        int64
categorie                   object
dtype: object

In [13]:
df.head()

Unnamed: 0,nom_commune,code_postal,type_infrastructure,nom_infrastructure,latitude,longitude,population,categorie,cultural_density
0,Abancourt,60001.0,Bibliothèque,Médiathèque,49.69658,1.764826,606.0,vivant,1.650165
1,Abbans-Dessous,25001.0,Monument,Prieuré de Lieu-Dieu,47.134924,5.873069,251.0,patrimonial,3.984064
2,Abbans-Dessus,25002.0,Monument,Château de Jouffroy-D'abbans,47.118458,5.878254,295.0,patrimonial,3.389831
3,Abbaretz,44001.0,Bibliothèque,Bibliothèque La Mine Du Livre,47.551352,-1.532295,2083.0,vivant,0.960154
4,Abbaretz,44001.0,Monument,Anciennes Forges de La Jahotière,47.556401,-1.493338,2083.0,patrimonial,0.960154


In [18]:
# filter df by Nom commune principale = 80485.0

df_filtered= df[df['code_postal'] == '13056]

SyntaxError: unterminated string literal (detected at line 3) (1329029573.py, line 3)

In [None]:
df_filtered

Unnamed: 0,nom_commune,code_postal,type_infrastructure,nom_infrastructure,latitude,longitude,population,categorie,cultural_density
39985,Martigues,13056.0,Cinéma,Cinéma La Cascade,43.402944,5.054684,49131.0,vivant,0.529197
39986,Martigues,13056.0,Bibliothèque,Bibliothèque De Canto Perdrix,43.419337,5.045934,49131.0,vivant,0.529197
39987,Martigues,13056.0,Bibliothèque,Bibliothèque de Jonquières:centre de Ressource...,43.39885,5.056137,49131.0,vivant,0.529197
39988,Martigues,13056.0,Bibliothèque,Bibliothèque du service archéologique,43.378374,5.053392,49131.0,vivant,0.529197
39989,Martigues,13056.0,Bibliothèque,Médiabus,43.404807,5.052823,49131.0,vivant,0.529197
39990,Martigues,13056.0,Bibliothèque,Médiathèque Louis Aragon,43.404807,5.052823,49131.0,vivant,0.529197
39991,Martigues,13056.0,Bibliothèque,Site Picasso,43.40433,5.051482,49131.0,vivant,0.529197
39992,Martigues,13056.0,Cinéma,Jean Renoir,43.410526,5.052683,49131.0,vivant,0.529197
39993,Martigues,13056.0,Cinéma,Majestic Palace,43.43362,5.050227,49131.0,vivant,0.529197
39994,Martigues,13056.0,Conservatoire,Conservatoire,43.397846,5.040233,49131.0,vivant,0.529197


In [8]:
fig = px.sunburst(
    data_frame=df_filtered,
    path=["categorie", "Type équipement ou lieu"],
    values="count",
    title=f"<b>Commune:</b> {df_filtered['Commune'].iloc[0]}",
    color="Type équipement ou lieu",  
    color_discrete_sequence=px.colors.qualitative.Pastel
)

fig.update_traces(
    texttemplate="%{label}<br>%{value:,}"
)

population_number = int(df_filtered['PTOT'].iloc[0]) 
fig.add_annotation(
    text=f"<b>Population:</b>{population_number:,}",  
    x=0.5,  
    y=1.10,  
    showarrow=False,
    font=dict(size=22, color="black", family="Arial"), 
    align="center", 
    xref="paper", 
    yref="paper"  
)


fig.update_layout(
    margin=dict(t=100, l=0, r=0, b=0),
    title_font=dict(
        family="Arial, sans-serif",  
        size=22,  
        color="black"  
    ),
    font=dict(
        family="Arial, sans-serif",  
        size=14,  
        color="black" 
    )
)

fig.show()


In [9]:
fig = px.bar(df_filtered.sort_values(by=['count', 'categorie'], ascending=[False, True]), 
             x='Type équipement ou lieu', 
             y='count', 
             color='categorie', 
             title="Equipment Type Distribution by Category",
             labels={'count': 'Event Count', 'Type équipement ou lieu': 'Equipment Type'},
             color_discrete_sequence=px.colors.qualitative.Pastel)

fig.update_traces(text=df_filtered.sort_values(by=['count', 'categorie'], ascending=[False, True])['count'], textposition='inside', insidetextanchor='middle')


fig.show()


In [None]:
total_count = df_filtered['count'].sum()

In [19]:
df_filtered

Unnamed: 0,Commune,Population_Totale,Num_Region,Num_Dep,Region,latitude,longitude,code_insee,Type_equipement,Nombre_categorie,categorie
4612,Martigues,49131.0,93.0,13.0,Provence-Alpes-Côte d'Azur,43.379892,5.049454,13056.0,Bibliothèque,6,vivant
4613,Martigues,49131.0,93.0,13.0,Provence-Alpes-Côte d'Azur,43.379892,5.049454,13056.0,Cinéma,3,vivant
4614,Martigues,49131.0,93.0,13.0,Provence-Alpes-Côte d'Azur,43.379892,5.049454,13056.0,Conservatoire,1,vivant
4615,Martigues,49131.0,93.0,13.0,Provence-Alpes-Côte d'Azur,43.379892,5.049454,13056.0,Librairie,1,vivant
4616,Martigues,49131.0,93.0,13.0,Provence-Alpes-Côte d'Azur,43.379892,5.049454,13056.0,Lieu archéologique,1,patrimoine
4617,Martigues,49131.0,93.0,13.0,Provence-Alpes-Côte d'Azur,43.379892,5.049454,13056.0,Monument,11,patrimoine
4618,Martigues,49131.0,93.0,13.0,Provence-Alpes-Côte d'Azur,43.379892,5.049454,13056.0,Musée,1,vivant
4619,Martigues,49131.0,93.0,13.0,Provence-Alpes-Côte d'Azur,43.379892,5.049454,13056.0,Scène,1,vivant
4620,Martigues,49131.0,93.0,13.0,Provence-Alpes-Côte d'Azur,43.379892,5.049454,13056.0,Service d'archives,1,patrimoine


In [22]:
# Sunburst Chart
fig_sunburst = px.sunburst(
    data_frame=df_filtered,
    path=["categorie", "Type_equipement"],
    values=(df_filtered['Nombre_categorie'] / df_filtered['Nombre_categorie'].sum()) * 100,
    color="Type_equipement",  
    color_discrete_sequence=px.colors.qualitative.Pastel,
    hover_data={'Nombre_categorie': False}
)

fig_sunburst.update_traces(
    texttemplate="%{label}<br>%{value:,.1f}%"

)

# Bar Chart
fig_bar = px.bar(df_filtered.sort_values(by=['Nombre_categorie', 'categorie'], ascending=[False, True]), 
                 x='Type_equipement', 
                 y='Nombre_categorie', 
                 color='categorie', 
                 labels={'Nombre_categorie': 'Event Count', 'Type_equipement': 'Equipment Type'},
                 color_discrete_sequence=px.colors.qualitative.Pastel)

fig_bar.update_traces(text=df_filtered.sort_values(by=['Nombre_categorie', 'categorie'], ascending=[False, True])['Nombre_categorie'], 
                      textposition='outside', insidetextfont=dict(size=20))

# Create a subplot with two rows and one column
fig = make_subplots(
    rows=2, cols=1, 
    row_heights=[0.7, 0.3],  # Adjust size ratio for sunburst and bar chart
    shared_xaxes=True,
    vertical_spacing=0.1,
    specs=[[{"type": "sunburst"}], [{"type": "bar"}]]
)

# Add sunburst and bar chart traces to the subplot
for trace in fig_sunburst['data']:
    fig.add_trace(trace, row=1, col=1)

for trace in fig_bar['data']:
    fig.add_trace(trace, row=2, col=1)

# Add Commune title annotation
commune_title = f"<b>Commune:</b> {df_filtered['Commune'].iloc[0]}"
fig.add_annotation(
    text=commune_title,  
    x=0.5,  
    y=1.15,  
    showarrow=False,
    font=dict(size=22, color="black", family="Arial"), 
    align="center", 
    xref="paper", 
    yref="paper"  
)

# Add Population annotation below the Commune title
population_number = int(df_filtered['Population_Totale'].iloc[0]) 
fig.add_annotation(
    text=f"<b>Population:</b> {population_number:,}",  
    x=0.5,  
    y=1.09,  
    showarrow=False,
    font=dict(size=22, color="black", family="Arial"), 
    align="center", 
    xref="paper", 
    yref="paper"  
)

# Update layout for the entire figure
fig.update_layout(
    margin=dict(t=80, l=50, r=50, b=50),
    title_font=dict(
        family="Arial, sans-serif",  
        size=22,  
        color="black"  
    ),
    font=dict(
        family="Arial, sans-serif",  
        size=14,  
        color="black" 
    ),
    height=600,
    width=500, 
    showlegend=True, 
    legend=dict(
        orientation="h",  
        yanchor="top",  
        y=-0.2,  
        xanchor="center", 
        x=0.5 
    )
)

fig.show()


In [12]:
total_count = df_filtered['count'].sum()

In [13]:
total_count

np.int64(26)

In [14]:
fig = px.treemap(df_filtered, 
                 path=['categorie', 'Type équipement ou lieu'], 
                 values='count', 
                 labels={'count': 'Event Count'},
                 color='count',
                 color_continuous_scale='Purp',
                 hover_data={'count': True})

fig.update_traces(textinfo="label+value", textfont_size=18)

fig.update_layout(
    coloraxis_colorbar=dict(
        title="Event Count",
        thicknessmode="pixels", thickness=20,
        lenmode="pixels", len=300,
        yanchor="top", y=1,
        ticks="outside", ticksuffix=" events",
        dtick=5
    )
)

fig.show()

In [8]:
import plotly.express as px
from plotly.subplots import make_subplots

# Sunburst Chart
fig_sunburst = px.sunburst(
    data_frame=df_filtered,
    path=["categorie", "Type équipement ou lieu"],
    values=(df_filtered['count'] / df_filtered['count'].sum()) * 100,
    color="Type équipement ou lieu",  
    color_discrete_sequence=px.colors.qualitative.Pastel,
    hover_data={'count': False}
)

fig_sunburst.update_traces(
    texttemplate="%{label}<br>%{value:,.1f}%"
)

# Treemap Chart
fig_treemap = px.treemap(df_filtered, 
                         path=['categorie', 'Type équipement ou lieu'], 
                         values='count', 
                         labels={'count': 'Event Count'},
                         color='count',
                         color_continuous_scale='Purp',
                         hover_data={'count': True})

fig_treemap.update_traces(textinfo="label+value", textfont_size=18)

# Create a subplot with two rows and one column
fig = make_subplots(
    rows=2, cols=1, 
    row_heights=[0.7, 0.3],  # Adjust size ratio for sunburst and treemap
    shared_xaxes=True,
    vertical_spacing=0.1,
    specs=[[{"type": "sunburst"}], [{"type": "treemap"}]]
)

# Add sunburst and treemap chart traces to the subplot
for trace in fig_sunburst['data']:
    fig.add_trace(trace, row=1, col=1)

for trace in fig_treemap['data']:
    fig.add_trace(trace, row=2, col=1)

# Add Commune title annotation
commune_title = f"<b>Commune:</b> {df_filtered['Commune'].iloc[0]}"
fig.add_annotation(
    text=commune_title,  
    x=0.5,  
    y=1.15,  
    showarrow=False,
    font=dict(size=22, color="black", family="Arial"), 
    align="center", 
    xref="paper", 
    yref="paper"  
)

# Add Population annotation below the Commune title
population_number = int(df_filtered['PTOT'].iloc[0]) 
fig.add_annotation(
    text=f"<b>Population:</b> {population_number:,}",  
    x=0.5,  
    y=1.09,  
    showarrow=False,
    font=dict(size=22, color="black", family="Arial"), 
    align="center", 
    xref="paper", 
    yref="paper"  
)

# Update layout for the entire figure
fig.update_layout(
    margin=dict(t=80, l=50, r=50, b=50),
    title_font=dict(
        family="Arial, sans-serif",  
        size=22,  
        color="black"  
    ),
    font=dict(
        family="Arial, sans-serif",  
        size=14,  
        color="black" 
    ),
    height=800,  # Adjust height for two charts
    width=500, 
    showlegend=True, 
    legend=dict(
        orientation="h",  
        yanchor="top",  
        y=-0.2,  
        xanchor="center", 
        x=0.5 
    ),
    coloraxis_colorbar=dict(
        title="Event Count",
        thicknessmode="pixels", thickness=20,
        lenmode="pixels", len=300,
        yanchor="top", y=1,
        ticks="outside", ticksuffix=" events",
        dtick=5
    )
)

# Show the figure with both sunburst and treemap charts
fig.show()


NameError: name 'df_filtered' is not defined

In [32]:
def creating_visualisation(data: pd.DataFrame, nom_commune: str = None) -> go.Figure:
    
    # Define the TYPE_STYLES
    TYPE_STYLES = {
        "Monument": {"color": "#e41a1c", "radius": 8},  # Red
        "Musée": {"color": "#377eb8", "radius": 8},  # Blue
        "Bibliothèque": {"color": "#4daf4a", "radius": 8},  # Green
        "Théâtre": {"color": "#984ea3", "radius": 8},  # Purple
        "Cinéma": {"color": "#ff7f00", "radius": 8},  # Orange
        "Conservatoire": {"color": "#ffff33", "radius": 8},  # Yellow
        "Scène": {"color": "#a65628", "radius": 8},  # Brown
        "Musique": {"color": "#f781bf", "radius": 8},  # Pink
        "Lieu archéologique": {"color": "#999999", "radius": 8},  # Gray
        "Service d'archives": {"color": "#a6cee3", "radius": 8},  # Light blue
        "Parc et jardin": {"color": "#b2df8a", "radius": 8},  # Light green
        "Espace protégé": {"color": "#fb9a99", "radius": 8},  # Light red
        "Spectacle vivant": {"color": "#fdbf6f", "radius": 8},  # Light orange
        "Pluridisciplinaire": {"color": "#cab2d6", "radius": 8},  # Light purple
        "Cinéma, audiovisuel": {"color": "#ff7f00", "radius": 8},  # Orange
        "Livre, littérature": {"color": "#4daf4a", "radius": 8},  # Green
    }
    
    # Prepare data for visualization
    if nom_commune:
        df = data[data["nom_commune"] == nom_commune].copy()
    else:
        df = data.copy()  # Use all data if no commune is selected
    
    # Create a summary dataframe for visualization
    viz_data = df.groupby(['categorie', 'type_infrastructure']).size().reset_index(name='count')
    
    type_colors = {key: TYPE_STYLES[key]["color"] for key in TYPE_STYLES}
    viz_data['color'] = viz_data['type_infrastructure'].map(type_colors)

    # Sunburst Chart
    fig_sunburst = px.sunburst(
        data_frame=viz_data,
        path=["categorie", "type_infrastructure"],
        values=(viz_data['count'] / viz_data['count'].sum()) * 100,
        color="type_infrastructure",
        color_discrete_map=type_colors,
        hover_data={"count": True},
    )

    fig_sunburst.update_traces(
        texttemplate="%{label}<br>%{value:,.1f}%"
    )

    # If a commune is selected, add a bar chart and detailed information
    if nom_commune:
        # Bar Chart
        fig_bar = px.bar(
            viz_data.sort_values(by=["count", "categorie"], ascending=[False, True]),
            x="type_infrastructure",
            y="count",
            color="categorie",
            labels={"count": "Nombre d'équipements", "type_infrastructure": "Type d'équipement"},
            color_discrete_sequence=px.colors.qualitative.Pastel,
        )

        fig_bar.update_traces(
            text=viz_data.sort_values(by=["count", "categorie"], ascending=[False, True])["count"],
            textposition="outside",
            insidetextfont=dict(size=20),
        )

        # Create a subplot with two rows and one column
        fig = make_subplots(
            rows=2,
            cols=1,
            row_heights=[0.7, 0.3],  # Adjust size ratio for sunburst and bar chart
            shared_xaxes=True,
            vertical_spacing=0.1,
            specs=[[{"type": "sunburst"}], [{"type": "bar"}]],
        )

        # Add traces to the subplots
        for trace in fig_sunburst.data:
            fig.add_trace(trace, row=1, col=1)
        for trace in fig_bar.data:
            fig.add_trace(trace, row=2, col=1)

        # Add Commune title annotation
        commune_title = f"<b>Commune:</b> {df['nom_commune'].iloc[0]}"
        fig.add_annotation(
            text=commune_title,  
            x=0.5,  
            y=1.15,  
            showarrow=False,
            font=dict(size=22, color="black", family="Arial"), 
            align="center", 
            xref="paper", 
            yref="paper"  
        )

        # Add Population annotation below the Commune title
        population_number = int(df['population'].iloc[0]) 
        fig.add_annotation(
            text=f"<b>Population:</b> {population_number:,}",  
            x=0.5,  
            y=1.09,  
            showarrow=False,
            font=dict(size=22, color="black", family="Arial"), 
            align="center", 
            xref="paper", 
            yref="paper"  
        )
    
    # If no specific commune is selected, show only the sunburst and summary for all communes
    else:
        fig = make_subplots(
            rows=1,
            cols=1,
            row_heights=[1],  
            vertical_spacing=0.1,
            specs=[[{"type": "sunburst"}]],
        )

        # Add Sunburst trace
        for trace in fig_sunburst.data:
            fig.add_trace(trace, row=1, col=1)

        # Add title for "Toute la France"
        fig.add_annotation(
            text="<b>Commune:</b> Toute la France",
            x=0.5,
            y=1.15,
            showarrow=False,
            font=dict(size=22, color="black", family="Arial"),
            align="center",
            xref="paper",
            yref="paper"
        )

        # Sum the population across all communes
        df_grouped = df.groupby('code_postal')['population'].unique().reset_index()
        total_population = int(df_grouped['population'].apply(lambda x: x[0]).sum())

        fig.add_annotation(
            text=f"<b>Population:</b> {total_population:,}",  
            x=0.5,  
            y=1.09,  
            showarrow=False,
            font=dict(size=22, color="black", family="Arial"), 
            align="center", 
            xref="paper", 
            yref="paper"  
        )

    # Update layout for the entire figure
    fig.update_layout(
        margin=dict(t=80, l=50, r=50, b=50),
        title_font=dict(
            family="Arial, sans-serif",  
            size=22,  
            color="black"  
        ),
        font=dict(
            family="Arial, sans-serif",  
            size=14,  
            color="black" 
        ),
        height=600,
        width=500, 
        showlegend=True, 
        legend=dict(
            orientation="h",  
            yanchor="top",  
            y=-0.2,  
            xanchor="center", 
            x=0.5 
        )
    )

    return fig

In [34]:
creating_visualisation(df, 'Martigues')

In [13]:
total_population = int(df['population'].sum())
total_population

1887218025

In [29]:
df_grouped = df.groupby('code_postal')['population'].unique().reset_index()
total_population = df_grouped['population'].apply(lambda x: x[0]).sum()
total_population

np.float64(60177968.0)

In [24]:
df.groupby('code_postal')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x72f17c063e60>