In [None]:
import pandas as pd
import numpy as np

# generate "messy" quantum experiment data
np.random.seed(42)
n = 1000  # number of rows to generate

# create DataFrame with some "quantum" measurements
data = {
    'experiment_id': np.repeat(range(1, 6), n//5),
    'qubit_count': np.random.choice([2, 3, 5, 7, 10], n),
    'fidelity': np.random.normal(0.85, 0.12, n),
    'coherence_time_us': np.random.exponential(50, n),
    'gate_error_rate': np.random.normal(0.03, 0.01, n),
    'temperature_k': np.random.normal(0.015, 0.005, n),
    'measurement_method': np.random.choice(['direct', 'tomography', 'randomized'], n)
}

df = pd.DataFrame(data)

# adding missing values randomly
for col in ['fidelity', 'coherence_time_us', 'gate_error_rate']:
    mask = np.random.random(n) < 0.1
    df.loc[mask, col] = np.nan

# adding outliers
outlier_idx = np.random.choice(range(n), 20, replace=False)
df.loc[outlier_idx, 'fidelity'] = np.random.uniform(1.2, 1.5, len(outlier_idx))
df.loc[outlier_idx, 'gate_error_rate'] = np.random.uniform(
    0.2, 0.3, len(outlier_idx))

# saving to CSV

df.to_csv('quantum_data.csv', index=False)