In [7]:


import pandas as pd
import numpy as np
import scipy.stats as stats
import statsmodels.api as sm
import plotly.graph_objects as go

def plot_calibration(fileName):
    # Load data from Excel
    df = pd.read_excel(fileName, sheet_name="Results")

    # Extract x values (independent variable)
    x_original = df["Concentration_(g/L)"]

    # Extract y values (dependent variables from multiple trials)
    y1 = df["Conductivity_A_(mS)"]
    y2 = df["Conductivity_B_(mS)"]
    y3 = df["Conductivity_C_(mS)"]

    # Combine all y values into one dataset for regression
    all_y = np.concatenate([y1, y2, y3])
    all_x = np.tile(x_original, 3)  # Repeat x-values to match y-values

    # Prepare data for statsmodels regression
    x = sm.add_constant(all_x)  # Add intercept term
    y = pd.concat([y1, y2, y3]).reset_index(drop=True)  # Flatten y values

    # Fit Ordinary Least Squares (OLS) model
    model = sm.OLS(y, x).fit()

    # Extract regression parameters
    slope = model.params.iloc[1]  # Slope
    intercept = model.params.iloc[0]  # Intercept
    equation_text = f"y = {slope:.4f}x + {intercept:.4f}"  # Equation text for display

    # Print model summary
    # print(model.summary())

    # Compute and print 95% confidence intervals
    conf_int = model.conf_int(alpha=0.05)
    print("95% Confidence Intervals for Regression Coefficients:")
    print(f"Intercept: {conf_int.iloc[0, 0]:.5f} to {conf_int.iloc[0, 1]:.5f}")
    print(f"Slope: {conf_int.iloc[1, 0]:.5f} to {conf_int.iloc[1, 1]:.5f}")

    # Compute linear regression using SciPy (for comparison)
    scipy_slope, scipy_intercept, _, _, _ = stats.linregress(all_x, all_y)

    # Generate x values for plotting regression lines
    x_linespace = np.linspace(x_original.min(), x_original.max(), 10000)  # Smooth fit

    # Create Plotly figure
    fig = go.Figure()

    # Scatter plots for individual trials
    fig.add_trace(go.Scatter(x=x_original, y=y1, mode='markers', marker=dict(color='black', symbol='x', size=6),
                             name="Trial A"))
    fig.add_trace(go.Scatter(x=x_original, y=y2, mode='markers', marker=dict(color='red', symbol='circle', size=6),
                             name="Trial B"))
    fig.add_trace(go.Scatter(x=x_original, y=y3, mode='markers', marker=dict(color='blue', size=6),
                             name="Trial C"))

    # Plot regression lines
    fig.add_trace(go.Scatter(x=x_linespace, y=slope * x_linespace + intercept, mode='lines', line=dict(color='blue'),
                             name="Statsmodels Fit"))
    fig.add_trace(go.Scatter(x=x_linespace, y=scipy_slope * x_linespace + scipy_intercept, mode='lines', 
                             line=dict(color='red', dash='dash'), name="SciPy Fit"))

    # Add equation annotation
    fig.add_annotation(
        x=x_original.max(),  # Place annotation towards the right side of the graph
        y=slope * x_original.max() + intercept,
        text=f"<b>{equation_text}</b>",
        showarrow=False,
        font=dict(size=14, color="black"),
        align="right",
        xanchor="right",
        yanchor="bottom",
        bgcolor="white"
    )

    # Layout customization
    fig.update_layout(
        title=f"Calibration Curve - {fileName}",
        xaxis_title="Concentration (g/L)",
        yaxis_title="Conductivity (mS)",
        legend=dict(x=0, y=1),
        template="plotly_white",
        autosize=False,  # Disable autosizing
        width=600,  # Adjust width to make it more square
        height=600
    )

    # Show interactive plot
    fig.show()

plot_calibration("RO_Week_1_Data.xlsx")
plot_calibration("RO_Week_2_Data.xlsx")



95% Confidence Intervals for Regression Coefficients:
Intercept: 0.05302 to 0.17128
Slope: 1.78515 to 1.83261


95% Confidence Intervals for Regression Coefficients:
Intercept: 0.12023 to 0.30762
Slope: 1.86871 to 1.94391
