# Circle Classification Model

This notebook generates data points that fall roughly on the unit circle and classifies them based on a line defined by the parameter theta.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D # Needed for custom legend

In [None]:
# Global parameter theta (angle in radians)
theta = np.pi/4  # 45 degrees as default

In [None]:
def generate_circle_data(n_samples, sigma=0.1):
    """
    Generates synthetic binary classification data where points fall roughly on the unit circle.
    Classification is determined by whether points fall above or below a line with angle theta.
    
    Args:
        n_samples (int): Total number of data points to generate.
        sigma (float): Standard deviation of the normal noise added to the unit circle.
        
    Returns:
        pd.DataFrame: DataFrame containing features (x1, x2) and labels (y).
    """
    # Generate random angles uniformly distributed from 0 to 2π
    theta_r = np.random.uniform(0, 2*np.pi, n_samples)
    
    # Generate points on the unit circle
    x1 = np.cos(theta_r)
    x2 = np.sin(theta_r)
    
    # Add random noise
    epsilon = np.random.normal(0, sigma, size=(n_samples, 2))
    
    # Create the data points with noise
    X = np.column_stack([x1, x2]) + epsilon
    
    # Determine labels based on the line with angle theta
    # The line equation is: x2 = tan(theta) * x1
    # Points above the line: x2 > tan(theta) * x1
    slope = np.tan(theta)
    y = (X[:, 1] > slope * X[:, 0]).astype(int)
    
    # Create Pandas DataFrame
    df = pd.DataFrame({
        'x1': X[:, 0],
        'x2': X[:, 1],
        'y': y
    })
    
    return df

In [None]:
# --- Example Usage ---
N_SAMPLES = 1000
SIGMA = 0.1

# Generate the data using the function
circle_data = generate_circle_data(N_SAMPLES, SIGMA)

# --- Display Sample Output ---
print("Generated Data Head:")
print(circle_data.head())
print(f"\nTotal samples generated: {len(circle_data)}")
print("\nValue Counts for Labels (y):")
print(circle_data['y'].value_counts(normalize=True))

In [None]:
# --- Visualize the Data using Matplotlib ---
fig, ax = plt.subplots(figsize=(10, 8))

# Define colors based on label
color_map = {
    0: 'skyblue',
    1: 'firebrick'
}

# Use a consistent marker
marker = 'o'

# Scatter plot points by label
for label_val, group_df in circle_data.groupby('y'):
    color = color_map[label_val]
    ax.scatter(group_df['x1'], group_df['x2'],
               c=color,        # Use the color from the map
               marker=marker,  # Use consistent marker
               s=50, alpha=0.7) # Adjust size and transparency

# Add titles and labels
ax.set_title(f'Circle Classification Data (theta={theta:.2f} radians, {theta*180/np.pi:.1f} degrees)')
ax.set_xlabel('Feature x1')
ax.set_ylabel('Feature x2')

# Add grid
ax.grid(True, linestyle='--', alpha=0.6)

# Plot the decision boundary line
# Line with angle theta passing through origin
x_vals = np.array([-1.5, 1.5])
y_vals = np.tan(theta) * x_vals
ax.plot(x_vals, y_vals, color='black', linestyle='--', label='Decision Boundary')

# Plot the unit circle for reference
circle = plt.Circle((0, 0), 1, fill=False, color='gray', linestyle='-', linewidth=1)
ax.add_patch(circle)

# Set equal aspect ratio to make the circle look circular
ax.set_aspect('equal')

# Set axis limits
ax.set_xlim(-1.5, 1.5)
ax.set_ylim(-1.5, 1.5)

# Create a custom legend manually for clarity
legend_elements = [
    Line2D([0], [0], marker=marker, color='w', label='Label 0', markerfacecolor=color_map[0], markersize=8),
    Line2D([0], [0], marker=marker, color='w', label='Label 1', markerfacecolor=color_map[1], markersize=8),
    Line2D([0], [0], color='black', lw=2, linestyle='--', label='Decision Boundary (theta)'),
    Line2D([0], [0], color='gray', lw=1, linestyle='-', label='Unit Circle')
]
ax.legend(handles=legend_elements, title='Legend', loc='best')

# Show the plot
plt.show()

## Experimenting with Different Theta Values

You can change the global `theta` parameter and re-run the cells to see how the decision boundary changes.

In [None]:
# Try a different theta value
theta = np.pi/6  # 30 degrees

# Generate new data
circle_data = generate_circle_data(N_SAMPLES, SIGMA)

# Re-run the visualization cell above to see the new plot