In [1]:
import time
start = time.time()

In [2]:
import polars as pl
import plotly.express as px
import numpy as np

![img](https://inforiver.com/wp-content/uploads/paired-column-charts-vs-marimekko-charts.png "https://inforiver.com/wp-content/uploads/paired-column-charts-vs-marimekko-charts.png")

In [180]:
# source data based on the above image
df0 = pl.DataFrame({
                    "Age Group": ["18-24 years", "25-29 years", "30-49 years", "50-64 years", "65+ years"],
                    "YouTube": [90, 93, 87, 70, 38],
                    "Facebook": [76, 84, 79, 68, 46],
                    "Instagram": [75, 73, 57, 23, 8],
                    "Snapchat": [73, 47, 25, 9, 3],
                    "Twitter": [44, 31, 26, 17, 7]
                })
df0

Age Group,YouTube,Facebook,Instagram,Snapchat,Twitter
str,i64,i64,i64,i64,i64
"""18-24 years""",90,76,75,73,44
"""25-29 years""",93,84,73,47,31
"""30-49 years""",87,79,57,25,26
"""50-64 years""",70,68,23,9,17
"""65+ years""",38,46,8,3,7


In [181]:
# format data as a list of numbers
data = []
for i in list(df0['Age Group']):
    for j in df0.columns[1:]:
        # append dict to a list
        data.append({"Age Group" : i,
                     "Platform" : j,
                     "Percentage" : df0.filter(pl.col('Age Group')==i)[j].item()})
data

[{'Age Group': '18-24 years', 'Platform': 'YouTube', 'Percentage': 90},
 {'Age Group': '18-24 years', 'Platform': 'Facebook', 'Percentage': 76},
 {'Age Group': '18-24 years', 'Platform': 'Instagram', 'Percentage': 75},
 {'Age Group': '18-24 years', 'Platform': 'Snapchat', 'Percentage': 73},
 {'Age Group': '18-24 years', 'Platform': 'Twitter', 'Percentage': 44},
 {'Age Group': '25-29 years', 'Platform': 'YouTube', 'Percentage': 93},
 {'Age Group': '25-29 years', 'Platform': 'Facebook', 'Percentage': 84},
 {'Age Group': '25-29 years', 'Platform': 'Instagram', 'Percentage': 73},
 {'Age Group': '25-29 years', 'Platform': 'Snapchat', 'Percentage': 47},
 {'Age Group': '25-29 years', 'Platform': 'Twitter', 'Percentage': 31},
 {'Age Group': '30-49 years', 'Platform': 'YouTube', 'Percentage': 87},
 {'Age Group': '30-49 years', 'Platform': 'Facebook', 'Percentage': 79},
 {'Age Group': '30-49 years', 'Platform': 'Instagram', 'Percentage': 57},
 {'Age Group': '30-49 years', 'Platform': 'Snapchat',

In [183]:
df0 = pl.DataFrame(data)
df0

Age Group,Platform,Percentage
str,str,i64
"""18-24 years""","""YouTube""",90
"""18-24 years""","""Facebook""",76
"""18-24 years""","""Instagram""",75
"""18-24 years""","""Snapchat""",73
"""18-24 years""","""Twitter""",44
…,…,…
"""65+ years""","""YouTube""",38
"""65+ years""","""Facebook""",46
"""65+ years""","""Instagram""",8
"""65+ years""","""Snapchat""",3


In [290]:
# get average across all ages for each Platform and add a new column
df0 = df0.join(df0.group_by('Platform').agg(pl.col('Percentage').mean().alias('Avg')),on=['Platform'],how='left')
df0

Age Group,Platform,Percentage,Avg,Avg_right
str,str,i64,f64,f64
"""18-24 years""","""YouTube""",90,75.6,75.6
"""18-24 years""","""Facebook""",76,70.6,70.6
"""18-24 years""","""Instagram""",75,47.2,47.2
"""18-24 years""","""Snapchat""",73,31.4,31.4
"""18-24 years""","""Twitter""",44,25.0,25.0
…,…,…,…,…
"""65+ years""","""YouTube""",38,75.6,75.6
"""65+ years""","""Facebook""",46,70.6,70.6
"""65+ years""","""Instagram""",8,47.2,47.2
"""65+ years""","""Snapchat""",3,31.4,31.4


In [345]:
bar_gap = 0.2
fig = px.bar(df0,x='Platform',y='Percentage',color='Age Group',barmode='group', text_auto=True)
fig.update_traces(textfont_size=10, textangle=0, textposition="outside", texttemplate='%{y}%' ,cliponaxis=False)
fig.update_layout({'plot_bgcolor': 'rgba(255, 255, 255, 255)','paper_bgcolor': 'rgba(255, 255, 255, 255)'},
                  yaxis_visible=False,xaxis_showticklabels=True,xaxis_title='',xaxis=dict(tickfont=dict(size=16)),bargap=bar_gap)
fig.show()

In [346]:
x_count = df0['Platform'].n_unique()
total_width = 1/x_count
# The relative starting position of each bar (i.e., considering the width of the chart as 1)
bar_starts = np.cumsum([total_width] * x_count) - 1 / x_count - 0.01


In [347]:
dict_avg = dict(df0.group_by('Platform').agg(pl.col('Percentage').mean().alias('Avg')).sort(by='Avg',descending=True).iter_rows())
dict_avg

{'YouTube': 75.6,
 'Facebook': 70.6,
 'Instagram': 47.2,
 'Snapchat': 31.4,
 'Twitter': 25.0}

In [348]:
list_x1 = []
for bs, ave in zip(bar_starts, list(dict_avg.values())):
    line_st = bs + total_width * bar_gap / 2
    fig.add_shape(
        type='line',
        x0=line_st,
        y0=ave,
        x1=line_st + total_width * (1 - bar_gap) + 0.02,
        y1=ave,
        line=dict(color="grey", width=2),
        xref='paper',  # relative x position
        yref='y',  # exact y position as shown on the y-axis
    )
    list_x1.append(line_st + total_width * (1 - bar_gap))

fig.add_trace(go.Scatter(
    #x=list(dict_avg.keys()),
    x0='YouTube', dx=1,
    y=[i+2 for i in dict_avg.values()],
    text=list(dict_avg.values()),
    mode="text",showlegend=False,textposition="top right"
))
fig.show()
fig.data = []