# <span style='color:#00f2ff'>Classical</span> vs <span style='color:#ff0099'>Random</span> Correlation

Welcome to an interactive exploration of statistical correlation. This notebook is designed to visually contrast **true relationships** against **spurious patterns** that emerge from pure noise.

### <span style='color:#b3ff00'>Interactive Controls</span>
Use the dashboard below to tweak sample sizes, dimensions, and correlation strengths in real-time.

In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import ipywidgets as widgets
from scipy.stats import pearsonr
from IPython.display import display, HTML, clear_output

# CUSTOM  THEME
BACKGROUND_COLOR = '#0b0f19'  
TEXT_COLOR = '#e0e0e0'
ACCENT_CYAN = '#00f2ff'
ACCENT_PINK = '#ff0099'
ACCENT_LIME = '#b3ff00'
GRID_COLOR = '#1f293a'

# Matplotlib/Seaborn Configuration
plt.style.use('dark_background')
sns.set_context("notebook", font_scale=1.2)
plt.rcParams.update({
    'figure.facecolor': BACKGROUND_COLOR,
    'axes.facecolor': BACKGROUND_COLOR,
    'axes.edgecolor': GRID_COLOR,
    'axes.labelcolor': TEXT_COLOR,
    'xtick.color': TEXT_COLOR,
    'ytick.color': TEXT_COLOR,
    'grid.color': GRID_COLOR,
    'text.color': TEXT_COLOR,
    'figure.dpi': 120
})

# Plotly Custom Template
import plotly.io as pio
pio.templates["cyberpunk"] = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor=BACKGROUND_COLOR,
        plot_bgcolor=BACKGROUND_COLOR,
        font={'color': TEXT_COLOR},
        xaxis={'gridcolor': GRID_COLOR, 'zerolinecolor': GRID_COLOR},
        yaxis={'gridcolor': GRID_COLOR, 'zerolinecolor': GRID_COLOR},
        colorway=[ACCENT_CYAN, ACCENT_PINK, ACCENT_LIME, '#9d00ff', '#ffae00']
    )
)
pio.templates.default = "cyberpunk"

# Custom CSS for Jupyter Interface
display(HTML(f"""
<style>
    .widget-label {{ color: {TEXT_COLOR} !important; }}
    .widget-readout {{ color: {TEXT_COLOR} !important; }}
    .jupyter-widgets {{ color: {TEXT_COLOR}; }}
    div.output_area pre {{ color: {TEXT_COLOR}; }}
</style>
"""))

In [2]:

def generate_classical_data(n_samples, correlation):
    """Generates two variables with a specific true correlation."""
    mean = [0, 0]
    cov = [[1, correlation], [correlation, 1]]
    
    try:
        data = np.random.multivariate_normal(mean, cov, n_samples)
    except ValueError:
        
        data = np.random.multivariate_normal(mean, [[1, 0], [0, 1]], n_samples)
    
    df = pd.DataFrame(data, columns=['X', 'Y'])
    return df

def generate_random_noise(n_samples, n_features):
    
    data = np.random.randn(n_samples, n_features)
    columns = [f'Feat_{i}' for i in range(n_features)]
    df = pd.DataFrame(data, columns=columns)
    return df

In [3]:
def plot_classical(df, correlation, n_samples):
    
    r, _ = pearsonr(df['X'], df['Y'])

    fig = px.scatter(
        df, x='X', y='Y', 
        trendline='ols',
        title=f"<b>Classical Correlation</b><br><span style='font-size:14px; color:{ACCENT_CYAN}'>Target: {correlation} | Actual: {r:.3f} | N: {n_samples}</span>",
        opacity=0.7,
        color_discrete_sequence=[ACCENT_CYAN]
    )
    fig.update_traces(marker=dict(size=8, line=dict(width=1, color=BACKGROUND_COLOR)))
    fig.update_layout(height=450, margin=dict(l=20, r=20, t=60, b=20))
    return fig

def plot_random_heatmap(df_noise):
    corr = df_noise.corr()
    
    
    mask = np.triu(np.ones_like(corr, dtype=bool))
    
    fig = go.Figure(data=go.Heatmap(
        z=corr.where(~mask).values,
        x=corr.columns,
        y=corr.columns,
        colorscale='RdBu',
        zmid=0,
        zmin=-1, zmax=1,
        showscale=True,
        colorbar=dict(title='Corr')
    ))
    
    fig.update_layout(
        title="<b>Random Noise Correlation Matrix</b><br><span style='font-size:14px; color:{ACCENT_PINK}'>Searching for patterns in chaos</span>",
        height=450,
        margin=dict(l=20, r=20, t=60, b=20),
        xaxis_showgrid=False,
        yaxis_showgrid=False
    )
    return fig

def plot_distribution_comparison(df_noise):
    
    corr_matrix = df_noise.corr()
    values = corr_matrix.values
    values = values[np.triu_indices_from(values, k=1)]
    
    
    max_corr = np.max(np.abs(values))
    
    fig = px.histogram(
        x=values, 
        nbins=30,
        title=f"<b>Distribution of Random Correlations</b><br><span style='font-size:14px; color:{ACCENT_LIME}'>Max spurious correlation found: {max_corr:.3f}</span>",
        color_discrete_sequence=[ACCENT_PINK],
        opacity=0.8
    )
    fig.add_vline(x=0, line_dash="dash", line_color="white", opacity=0.5)
    fig.update_layout(height=400, showlegend=False, xaxis_title="Correlation Coefficient", yaxis_title="Count")
    return fig

def plot_constrained_analysis(df_continuous, df_binary, true_corr, n_samples):
    
    r_cont = df_continuous.corr().iloc[0, 1]
    r_bin = df_binary.corr().iloc[0, 1]
    
    
    x_range = np.linspace(-1, 1, 100)
    y_theoretical = (2/np.pi) * np.arcsin(x_range)
    
    
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        x=x_range, y=y_theoretical,
        mode='lines',
        name='Classical Limit (Arcsin Law)',
        line=dict(color=ACCENT_LIME, width=2, dash='dash')
    ))
    
    
    fig.add_trace(go.Scatter(
        x=[r_cont], y=[r_bin],
        mode='markers+text',
        name='Your Sample',
        marker=dict(size=15, color=ACCENT_PINK, symbol='star'),
        text=[f"Continuous: {r_cont:.2f}<br>Binary: {r_bin:.2f}"],
        textposition="top center"
    ))
    
    fig.update_layout(
        title=f"<b>Constraint Analysis (Bell Preview)</b><br><span style='font-size:14px; color:{ACCENT_LIME}'>Binary Correlation Limit vs Continuous</span>",
        xaxis_title="Continuous Correlation (Input)",
        yaxis_title="Binary Correlation (Output)",
        height=450,
        yaxis=dict(range=[-1.1, 1.1]),
        xaxis=dict(range=[-1.1, 1.1]),
        showlegend=True
    )
    return fig

In [4]:

style = {'description_width': 'initial'}
w_samples = widgets.IntSlider(value=50, min=10, max=1000, step=10, description='Sample Size (N):', style=style)
w_corr = widgets.FloatSlider(value=0.7, min=-1, max=1, step=0.05, description='True Correlation:', style=style)
w_features = widgets.IntSlider(value=20, min=5, max=100, step=5, description='Random Features:', style=style)


ui = widgets.HBox([w_samples, w_corr, w_features], 
                  layout=widgets.Layout(justify_content='space-around', margin='20px 0px 20px 0px'))

out = widgets.Output()

def on_change(change):
    with out:
        clear_output(wait=True)
        
        
        N = w_samples.value
        R = w_corr.value
        F = w_features.value
        
        
        df_class = generate_classical_data(N, R)
        df_noise = generate_random_noise(N, F)
        
        
        df_binary = np.sign(df_class)
        
        
        fig_class = plot_classical(df_class, R, N)
        fig_heat = plot_random_heatmap(df_noise)
        fig_dist = plot_distribution_comparison(df_noise)
        fig_constraint = plot_constrained_analysis(df_class, df_binary, R, N)
        
        
        display(widgets.HBox([widgets.Label("Row 1: Signal & Constraints", style={'font_weight':'bold', 'font_size':'16px'})]))
        fig_class.show()
        fig_constraint.show()
        
        display(widgets.HBox([widgets.Label("Row 2: Random Noise Search", style={'font_weight':'bold', 'font_size':'16px'})]))
        fig_heat.show()
        fig_dist.show()


w_samples.observe(on_change, names='value')
w_corr.observe(on_change, names='value')
w_features.observe(on_change, names='value')


display(ui, out)
on_change(None)

HBox(children=(IntSlider(value=50, description='Sample Size (N):', max=1000, min=10, step=10, style=SliderStylâ€¦

Output()