In [2]:
import numpy as np
import pandas as pd
import os

def generate_simple_regression_data(num_samples=5000, min_x=-2.0, max_x=2.0):
    """
    Generates a simple dataset (X, Y) where Y = X^2.

    Args:
        num_samples (int): Total number of independent (x, x^2) instances.
        min_x (float): Minimum value for the input X.
        max_x (float): Maximum value for the input X.

    Returns:
        pd.DataFrame: A DataFrame with two columns: 'X_Input' and 'Y_Target'.
    """

    # 1. Generate 5000 random scalar inputs (X)
    # Shape: (5000,)
    X_scalars = np.random.uniform(low=min_x, high=max_x, size=num_samples)

    # 2. Calculate the scalar targets (Y = X^2)
    # Shape: (5000,)
    Y_scalars = X_scalars ** 2
    
    # 3. Combine into a single DataFrame
    data = {
        'X_Input': X_scalars.astype(np.float32),
        'Y_Target': Y_scalars.astype(np.float32)
    }
    unified_df = pd.DataFrame(data)

    return unified_df

# --- Configuration ---
NUM_SAMPLES = 5000
INPUT_RANGE = (-2.0, 2.0)
OUTPUT_FILENAME = 'regression_data.csv'

# --- 1. Generate the Dataset DataFrame ---
unified_df = generate_simple_regression_data(
    num_samples=NUM_SAMPLES,
    min_x=INPUT_RANGE[0],
    max_x=INPUT_RANGE[1]
)

# --- 2. Save the Unified DataFrame to CSV ---
unified_df.to_csv(OUTPUT_FILENAME, index=False)

print("Preview of the Dataset (first 5 samples):")
print(unified_df.head())

Preview of the Dataset (first 5 samples):
    X_Input  Y_Target
0  0.436213  0.190282
1  1.425251  2.031340
2  0.590926  0.349193
3 -0.970133  0.941159
4 -1.960613  3.844003
