In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
KIO3_MOLARITY = 0.00148
VITAMIN_C_RECC_MILLIGRAMS = 82.5

In [3]:
data = {
    'Sample': ["Cheerios", "Red Pepper Juice", "Orange Juice", "Tea"],
    'Sample Units': ["g", "mL", "mL", "mL"],
    'Volume (mL) / Mass (g) of Sample': [1.4896, 10, 5.2, 10],
    'KIO3 Titrated (mL)': [8.5, 6.0, 3.5, 0.4]
}

df = pd.DataFrame(data=data, index=data["Sample"])
df.drop(columns="Sample", inplace=True)
df.head()

Unnamed: 0,Sample Units,Volume (mL) / Mass (g) of Sample,KIO3 Titrated (mL)
Cheerios,g,1.4896,8.5
Red Pepper Juice,mL,10.0,6.0
Orange Juice,mL,5.2,3.5
Tea,mL,10.0,0.4


In [4]:
mols_I2_to_mols_KI = (3/5)
molar_mass_vit_c = 176.12

def determine_mass_of_Vitamin_C(KIO3_titrated_mL: float) -> float:
    mols_KIO3 = KIO3_MOLARITY * (KIO3_titrated_mL / 1000)

    mass_vit_c = mols_KIO3 * mols_I2_to_mols_KI * molar_mass_vit_c

    return mass_vit_c

In [5]:
df["Mass Vitamin C (g)"] = df["KIO3 Titrated (mL)"].apply(lambda x: determine_mass_of_Vitamin_C(x))
df["Mass Vitamin C (g) per Sample Unit"] = df["Mass Vitamin C (g)"] / df["Volume (mL) / Mass (g) of Sample"]
df["Sample Units Needed to Consume Recommendation"] = (VITAMIN_C_RECC_MILLIGRAMS / 1000) / df["Mass Vitamin C (g) per Sample Unit"]
df.to_csv("base.csv")
df.head()

Unnamed: 0,Sample Units,Volume (mL) / Mass (g) of Sample,KIO3 Titrated (mL),Mass Vitamin C (g),Mass Vitamin C (g) per Sample Unit,Sample Units Needed to Consume Recommendation
Cheerios,g,1.4896,8.5,0.001329,0.000892,92.444918
Red Pepper Juice,mL,10.0,6.0,0.000938,9.4e-05,879.186591
Orange Juice,mL,5.2,3.5,0.000547,0.000105,783.732047
Tea,mL,10.0,0.4,6.3e-05,6e-06,13187.798859


In [6]:
df_fig1 = df.sort_values(by="Mass Vitamin C (g) per Sample Unit", ascending=False)
df_fig1["Mass Vitamin C (mg) per Sample Unit"] = df_fig1["Mass Vitamin C (g) per Sample Unit"] * 1000

fig = px.bar(
    df_fig1,
    x = df.index,
    y = "Mass Vitamin C (mg) per Sample Unit",
    color = df.index,
    log_y=True
)

fig.update_layout(
    height=750,
    width=1000,
    title="Comparison of Vitamin C Content in Various Food Types",
    xaxis_title="Samples",
    yaxis_title="Observed Mass Vitamin C (mg) per Sample Unit (Log<sub>10</sub> Scale)",
    showlegend=False
)

fig

In [7]:
df["Reported Vitamin C (g) Per Sample Unit"] = [
    ((VITAMIN_C_RECC_MILLIGRAMS * 0.1) / 39) / 1000, #https://www.cheerios.com/products/original-cheerios
    (15 / 240) / 1000, #https://www.carbmanager.com/food-detail/md:2172b3032605265c2ae66438d08a25de/tomato-red-bell-pepper-juice
    (50 / 100) / 1000, #https://fdc.nal.usda.gov/food-details/169098/nutrients
    0 #https://fdc.nal.usda.gov/food-details/174873/nutrients
]
df.head()

Unnamed: 0,Sample Units,Volume (mL) / Mass (g) of Sample,KIO3 Titrated (mL),Mass Vitamin C (g),Mass Vitamin C (g) per Sample Unit,Sample Units Needed to Consume Recommendation,Reported Vitamin C (g) Per Sample Unit
Cheerios,g,1.4896,8.5,0.001329,0.000892,92.444918,0.000212
Red Pepper Juice,mL,10.0,6.0,0.000938,9.4e-05,879.186591,6.3e-05
Orange Juice,mL,5.2,3.5,0.000547,0.000105,783.732047,0.0005
Tea,mL,10.0,0.4,6.3e-05,6e-06,13187.798859,0.0


In [8]:
x = (df["Mass Vitamin C (g) per Sample Unit"] * 1000).to_list()
y = (df["Reported Vitamin C (g) Per Sample Unit"] * 1000).to_list()

fig2 = px.scatter(
    df,
    x = x,
    y = y,
)

fig2.add_shape(
    type="line",
    x0=0,
    y0=0,
    x1=1,
    y1=1,
    line=dict(color="red", width=2)
)

fig2.add_annotation(
    x=0.475,  # Position the label at a high x-value
    y=0.50,  # Match y=x at this point
    text="y=x",
    showarrow=False,
    font=dict(color="red", size=12),
    align="center",
    textangle=-40
)

# Calculate residuals and percent errors
for xi, yi in zip(x, y):
    # Residual = distance from y=x line
    y_line = xi  # y value on the y=x line
    residual = yi - y_line

    if (-0.001 < residual < 0.001):
        continue

    # Add a line for the residual
    fig2.add_shape(
        type="line",
        x0=xi, y0=y_line,
        x1=xi, y1=yi,
        line=dict(color="red", width=1, dash="dot")
    )

    fig2.add_trace(
        go.Scatter(
            x=[xi],
            y=[(yi + y_line) / 2],  # Position text midway along the residual
            text=[f"{residual:.3f}"],
            mode="text",
            textposition="middle right",
            textfont=dict(color="red", size=10)
        )
    )

fig2.update_layout(
    title="Comparison between Reported and Observed Vitamin C Content, with Residuals",
    xaxis_title="Observed Mass of Vitamin C per Sample Unit (mg or mL)",
    yaxis_title="Reported Mass of Vitamin C per Sample Unit (mg or mL)",
    width=800,
    height=600,
    showlegend=False
)
fig2.show()