In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
sns.set_theme()
plt.rcParams['figure.figsize'] = [8,8]

In [None]:
concrete = pd.read_csv("../datasets/concrete.csv")
concrete.columns

In [None]:
concrete = concrete.query("Age == 28").filter(["Cement","Water","Strength"])
concrete.shape

In [None]:
concrete = concrete.assign(Ratio = concrete.Water / concrete.Cement)
concrete.head()

In [None]:
sns.regplot(data=concrete, x="Ratio", y="Strength", 
            ci=None, line_kws={"color":"black"})
plt.title("Compressive strength versus water:cement ratio")
# plt.savefig("concrete_wc.png")

In [None]:
concrete_model = smf.ols("Strength ~ Ratio", data=concrete)
concrete_fit = concrete_model.fit()
concrete_fit.params

In [None]:
np.sqrt( concrete_fit.scale )

In [None]:
concrete.Strength.corr( concrete.Ratio )

In [None]:
concrete_fit.rsquared

In [None]:
sns.residplot(data=concrete, x="Ratio", y="Strength", 
              line_kws = {"color" : "black"})
plt.title("Residuals: Strength vs water:cement ratio")
# plt.savefig("concrete_wc_res.png")

In [None]:
concrete = concrete.assign(Ratio = concrete.Cement / concrete.Water )
concrete.head()

In [None]:
sns.regplot(data=concrete, x="Ratio", y="Strength", 
            ci=None, line_kws={"color":"black"})
plt.title("Compressive strength versus cement:water ratio")
# plt.savefig("concrete_cw.png")

In [None]:
concrete.Strength.corr( concrete.Ratio )

In [None]:
concrete_model = smf.ols("Strength ~ Ratio", data=concrete)
concrete_fit = concrete_model.fit()
concrete_fit.params

In [None]:
concrete_rse = np.sqrt( concrete_fit.scale )
concrete_rse

In [None]:
concrete_fit.rsquared

In [None]:
sns.residplot(data=concrete, x="Ratio", y="Strength", 
              line_kws = {"color" : "black"})
plt.axhline( 2*concrete_rse, color="black", linestyle=":")
plt.axhline( -2*concrete_rse, color="black", linestyle=":")
plt.title("Residuals: Strength vs cement:water ratio")
# plt.savefig("concrete_cw_res.png")

In [None]:
sns.histplot( data=concrete, x="Strength")
# plt.savefig("concrete_strength.png")

In [None]:
print(concrete_fit.summary() )

In [None]:
concrete_fit.conf_int()

In [None]:
sns.regplot( data=concrete, x="Ratio", y="Strength", 
             line_kws = {"color" : "black"})
plt.title("Compressive strength versus ratio with confidence bands")
# plt.savefig("concrete_cw_with_ci.png")