# Make plot of percentage of sequences versus cases from Huanan Seafood market according to [joint WHO China report](https://www.who.int/publications/i/item/who-convened-global-study-of-origins-of-sars-cov-2-china-part)

In [1]:
import altair as alt

import pandas as pd

df = pd.DataFrame(
    [
        # according to page 76, 12 of 13 pre Dec-31-2019 onset cases from Market
        ("sequences from cases with onset before Dec-31-2019", "No", 1 / 13 * 100),
        ("sequences from cases with onset before Dec-31-2019", "Yes", 12 / 13 * 100),
        # according to page 44 of report, 33% of all cases in Dec 2019 had exposure to Huanan Market
        ("overall cases with Dec-2019 onset", "No", 67),
        ("overall cases with Dec-2019 onset", "Yes", 33),
    ],
    columns=["statistic", "exposure to Huanan Market", "percent of total"],
)

display(df)

chart = (
    alt.Chart(df)
    .encode(
        alt.X("percent of total", axis=alt.Axis(titleFontSize=15, titleFontWeight="normal", labelFontSize=12)),
        alt.Y("statistic", title=None, axis=alt.Axis(labelLimit=500, labelFontSize=15), scale=alt.Scale(padding=0.25)),
        alt.Color(
            "exposure to Huanan Market",
            legend=alt.Legend(orient="top", titleLimit=500, labelFontSize=15, titleFontSize=15, titleFontWeight="normal", titleOrient="left"),
            scale=alt.Scale(range=["#619cff", "#f8766d"]),
        ),
    )
    .mark_bar(stroke="black")
    .configure_axis(grid=False)
    .properties(height=alt.Step(32), width=390)
)

chart.save("WHO_case_vs_sequence_plot.png", ppi=350)
chart.save("WHO_case_vs_sequence_plot.svg")

chart

Unnamed: 0,statistic,exposure to Huanan Market,percent of total
0,sequences from cases with onset before Dec-31-...,No,7.692308
1,sequences from cases with onset before Dec-31-...,Yes,92.307692
2,overall cases with Dec-2019 onset,No,67.0
3,overall cases with Dec-2019 onset,Yes,33.0
