<h1>Preparing Data</h1>

In [4]:
import json
import os


hashes = []
unique_stats = []

with open(os.path.abspath("resources/datasets/plain.json"), "r", encoding="utf-8") as f:
    stats = json.load(f)

for st in stats:
    if st["policy_hash"] not in hashes and st["statistics"] is not None:
        hashes.append(st["policy_hash"])
        unique_stats.append(st["statistics"])

map_names = {
    "ordered lists": "Ordered lists",
    "unordered lists": "Unordered lists",
    "tables": "Tables",
    "paragraphs": "Paragraphs",
    "headings": "Headings"
}
        
s = {
    "ordered lists": 0,
    "unordered lists": 0,
    "tables": 0,
    "paragraphs": 0,
    "headings": 0
}

for st in unique_stats:
    for k in s.keys():
        s[k] += st[k]
        
elements_sum = sum(s.values())

<h1>Chart</h1>

In [16]:
import plotly.express as px
import plotly.graph_objects as go


fig = go.Figure()

fig.update_layout(
    font=dict(
        size=23,
        color="#000000",
    ),
    colorway=px.colors.qualitative.Dark24,
    showlegend=False,
    legend=dict(
        orientation="h",
        yanchor="top",
        xanchor="left",
        x=1,
        y=1
    ),
    barmode="stack",
    width=900,
    height=900
)

fig.add_bar(
    x=[map_names[k] for k in s.keys()],
    y=[v / elements_sum * 100 for v in list(s.values())],
)

fig.update_xaxes(showgrid=True, title_text='Structure element')
fig.update_yaxes(showgrid=True, title_text='Percent of element in corpus')

pass

In [17]:
fig.write_image("pictures/StructureBars.png")
fig