In [1]:
import numpy as np
import pandas as pd
from IPython.display import display, HTML
import plotly.express as px

In [2]:
data = pd.read_csv('anscombes.csv')
display(data.head(5))
print("Hay", data["dataset"].nunique(), "datasets diferentes: ", data["dataset"].unique())

Unnamed: 0,id,dataset,x,y
0,0,I,10.0,8.04
1,1,I,8.0,6.95
2,2,I,13.0,7.58
3,3,I,9.0,8.81
4,4,I,11.0,8.33


Hay 4 datasets diferentes:  ['I' 'II' 'III' 'IV']


In [3]:
metrics = []

for ds in data['dataset'].unique():
    subset = data[data['dataset'] == ds]
    mean_x = round(subset['x'].mean(), 3)
    mean_y = round(subset['y'].mean(), 3)
    var_x = round(subset['x'].var(), 2)
    var_y = round(subset['y'].var(), 2)
    corr = round(subset[['x', 'y']].corr().loc['x', 'y'], 3)
    metrics.append({'Dataset': ds, 'Mean X': mean_x, 'Mean Y': mean_y, 'Variance X': var_x, 'Variance Y': var_y, 'Correlation': corr})

metrics_df = pd.DataFrame(metrics)
display(HTML('<h3>Métricas Principales por Dataset</h3>'))
display(metrics_df)


Unnamed: 0,Dataset,Mean X,Mean Y,Variance X,Variance Y,Correlation
0,I,9.0,7.501,11.0,4.13,0.816
1,II,9.0,7.501,11.0,4.13,0.816
2,III,9.0,7.5,11.0,4.12,0.816
3,IV,9.0,7.501,11.0,4.12,0.817


In [4]:
fig = px.scatter(data, x="x", y="y", color="dataset", facet_col="dataset", facet_col_wrap=2, title="Cuartero de Anscombe")
fig.update_traces(marker=dict(size=10, opacity=0.9))
fig.update_layout(height=500, width=1000)

for ann in fig.layout.annotations:
    if ann['xref'] == 'paper':
        ann.text = ""

metrics_text = {}
for _, row in metrics_df.iterrows():
    metrics_text[row['Dataset']] = (
        f"Mean X: {row['Mean X']}<br>"
        f"Mean Y: {row['Mean Y']}<br>"
        f"Var X: {row['Variance X']}<br>"
        f"Var Y: {row['Variance Y']}<br>"
        f"Corr: {row['Correlation']}"
    )

fig.add_annotation(x=0.95, y=0.05, xref="x3 domain", yref="y3 domain",
                   text=metrics_text['I'], showarrow=False, align="right", font=dict(size=10))
fig.add_annotation(x=0.95, y=0.05, xref="x4 domain", yref="y4 domain",
                   text=metrics_text['II'], showarrow=False, align="right", font=dict(size=10))
fig.add_annotation(x=0.95, y=0.05, xref="x domain", yref="y domain",
                   text=metrics_text['III'], showarrow=False, align="right", font=dict(size=10))
fig.add_annotation(x=0.95, y=0.05, xref="x2 domain", yref="y2 domain",
                   text=metrics_text['IV'], showarrow=False, align="right", font=dict(size=10))

fig.for_each_xaxis(lambda axis: axis.update(title=""))
fig.for_each_yaxis(lambda axis: axis.update(title=""))

fig.show()
