In [28]:
import numpy as np
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


# Generate synthetic data
def generate_data(n_samples=50):
    np.random.seed(42)
    X = np.linspace(-1, 1, n_samples).reshape(-1, 1)
    y = np.sin(X*np.pi) + np.random.normal(0, 0.2, size=X.shape)
    return X, y

# Train model and create visualizations
def train_and_visualize(degree):
    # Convert degree to int
    degree = int(degree)
    
    # Generate and split data
    X, y = generate_data()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Create polynomial features
    poly = PolynomialFeatures(degree=degree)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.transform(X_test)
    
    # Train model
    model = LinearRegression()
    model.fit(X_train_poly, y_train)
    
    # Generate predictions for smooth curve
    X_plot = np.linspace(-1, 1, 200).reshape(-1, 1)
    X_plot_poly = poly.transform(X_plot)
    y_plot = model.predict(X_plot_poly)
    
    # Calculate scores
    train_rmse = np.sqrt(mean_squared_error(y_train, model.predict(X_train_poly)))
    test_rmse = np.sqrt(mean_squared_error(y_test, model.predict(X_test_poly)))
    
    
    # Create visualization
    plt.figure(figsize=(10, 6))
    plt.scatter(X_train, y_train, color='blue', label='Training data')
    plt.scatter(X_test, y_test, color='red', label='Test data')
    plt.plot(X_plot, y_plot, color='green', label=f'Polynomial (degree={degree})')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.title(f'Polynomial (degree={degree})\nTrain RMSE: {train_rmse:.3f}\nTest RMSE: {test_rmse:.3f}')
    plt.legend()
    plt.grid(True)
    
    return plt.gcf(), f'{train_rmse:.3f}', f'{test_rmse:.3f}'

with gr.Blocks() as iface:
    gr.Markdown("# Overfitting vs Underfitting Demonstration")
    gr.Markdown("Adjust the polynomial degree to see how model complexity affects performance on training and test data.")
    
    with gr.Column():
        degree_slider = gr.Slider(minimum=1, maximum=15, step=1, value=1, label="Polynomial Degree")
        plot_output = gr.Plot(label="Model Visualization")
        train_score = gr.Textbox(label="Training RMSE")
        test_score = gr.Textbox(label="Test RMSE")
        
        degree_slider.change(
            fn=train_and_visualize,
            inputs=[degree_slider],
            outputs=[plot_output, train_score, test_score],
        )

# Launch the interface
iface.launch()

Running on local URL:  http://127.0.0.1:7885

To create a public link, set `share=True` in `launch()`.


