# Simple Linear to Polynomial Regression (NumPy ndarray )
This notebook implements linear and polynomial regression using NumPy arrays.

## Data Prep

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import linear_model

In [None]:
# Load data
fish_df = pd.read_csv('https://raw.githubusercontent.com/kjmobile/data/refs/heads/main/ml/fish_l.csv')
fish_df.head()

In [None]:
# Convert to NumPy array
fish = fish_df[["Length", "Weight"]].to_numpy()
print("Data shape:", fish.shape)
print("\nFirst 5 rows:")
print(fish[:5])

### Why reshape(-1, 1)?

scikit-learn expects 2D arrays for features (X), even if there's only one feature.
- `fish[:, 0]` creates a 1D array: `[25.4, 26.3, 26.5, ...]`
- `fish[:, 0].reshape(-1, 1)` creates a 2D array: `[[25.4], [26.3], [26.5], ...]`

In [None]:
# Train-test split (NumPy version)
train_X, test_X, train_y, test_y = train_test_split(
    fish[:, 0].reshape(-1, 1),  # Length as 2D array
    fish[:, 1],                  # Weight as 1D array
    test_size=0.25, 
    random_state=0
)

print(f"Train X shape: {train_X.shape}")
print(f"Train y shape: {train_y.shape}")
print(f"Test X shape: {test_X.shape}")
print(f"Test y shape: {test_y.shape}")

## Simple Linear Regression

Model equation: $Weight = \beta_1 \times Length + \beta_0$

In [None]:
# Train model
m1 = linear_model.LinearRegression()
m1.fit(train_X, train_y)

In [None]:
# Check coefficients
print(f"Coefficient (β1): {m1.coef_[0]:.2f}")
print(f"Intercept (β0): {m1.intercept_:.2f}")
print(f"\nModel Equation: Weight = {m1.coef_[0]:.2f} × Length + {m1.intercept_:.2f}")

### Evaluate Model Performance: m1

In [None]:
# Evaluate model with R² score
train_r2 = m1.score(train_X, train_y)
test_r2 = m1.score(test_X, test_y)

print(f"Train R²: {train_r2:.4f}")
print(f"Test R²: {test_r2:.4f}")

In [None]:
# Prediction
length_60_pred = m1.predict([[60]])[0]
print(f"Predicted weight for 60 inch fish: {length_60_pred:.2f} lbs")

In [None]:
# Visualization
plt.figure(figsize=(10, 6))
plt.scatter(train_X[:, 0], train_y, edgecolor='w', alpha=0.7)
plt.plot([8.4, 60], m1.predict([[8.4], [60]]), ls='--', color='green', linewidth=2, label='Regression Line')
plt.scatter(60, m1.predict([[60]]), color='r', marker="o", s=200, label='60 inch prediction', zorder=5)
plt.xlabel('Length (inch)')
plt.ylabel('Weight (lbs)')
plt.title('Simple Linear Regression')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Polynomial Regression

Model equation: $Weight = \beta_2 \times Length^2 + \beta_1 \times Length + \beta_0$

In [None]:
# Add polynomial features (NumPy version)
train_poly = np.column_stack([train_X ** 2, train_X])
test_poly = np.column_stack([test_X ** 2, test_X])

print("Train poly shape:", train_poly.shape)
print("\nFirst 5 rows:")
print(train_poly[:5])

### Understanding np.column_stack

`np.column_stack` combines arrays as columns:
- Input: `train_X = [[25.4], [26.3]]` and `train_X**2 = [[645.16], [691.69]]`
- Output: `[[645.16, 25.4], [691.69, 26.3]]`

In [None]:
# Train model
m2 = linear_model.LinearRegression()
m2.fit(train_poly, train_y)

In [None]:
# Check coefficients
print(f"Coefficient for Length² (β2): {m2.coef_[0]:.2f}")
print(f"Coefficient for Length (β1): {m2.coef_[1]:.2f}")
print(f"Intercept (β0): {m2.intercept_:.2f}")
print(f"\nModel Equation: Weight = {m2.coef_[0]:.2f} × Length² + {m2.coef_[1]:.2f} × Length + {m2.intercept_:.2f}")

### Evaluate Model Performance: m2

In [None]:
# Evaluate model with R² score
train_r2_poly = m2.score(train_poly, train_y)
test_r2_poly = m2.score(test_poly, test_y)

print(f"Train R²: {train_r2_poly:.4f}")
print(f"Test R²: {test_r2_poly:.4f}")

print("\n=== Model Comparison ===")
print(f"Linear Regression Test R²: {test_r2:.4f}")
print(f"Polynomial Regression Test R²: {test_r2_poly:.4f}")
print(f"Improvement: {(test_r2_poly - test_r2):.4f}")

In [None]:
# Prediction
length_60_pred_poly = m2.predict([[60**2, 60]])[0]
print(f"Predicted weight for 60 inch fish (polynomial): {length_60_pred_poly:.2f} lbs")
print(f"Predicted weight for 60 inch fish (linear): {length_60_pred:.2f} lbs")
print(f"Difference: {abs(length_60_pred_poly - length_60_pred):.2f} lbs")

In [None]:
# Visualization
plt.figure(figsize=(10, 6))
plt.scatter(train_X[:, 0], train_y, alpha=0.7)

# Draw polynomial regression line
point = np.arange(8.4, 60, 0.1)
predictions = m2.coef_[0] * point**2 + m2.coef_[1] * point + m2.intercept_
plt.plot(point, predictions, color='blue', linewidth=2, label='Polynomial Regression')

plt.scatter(60, m2.predict([[60**2, 60]]), color='r', marker="s", s=200, label='60 inch prediction', zorder=5)
plt.xlabel('Length (inch)')
plt.ylabel('Weight (lbs)')
plt.title('Polynomial Regression')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Compare Both Models

In [None]:
# Side-by-side comparison
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Linear Regression
axes[0].scatter(train_X[:, 0], train_y, edgecolor='w', alpha=0.7)
axes[0].plot([8.4, 60], m1.predict([[8.4], [60]]), ls='--', color='green', linewidth=2)
axes[0].scatter(60, m1.predict([[60]]), color='r', marker="o", s=200, zorder=5)
axes[0].set_xlabel('Length (inch)')
axes[0].set_ylabel('Weight (lbs)')
axes[0].set_title(f'Linear Regression (R² = {test_r2:.4f})')
axes[0].grid(True, alpha=0.3)

# Polynomial Regression
axes[1].scatter(train_X[:, 0], train_y, alpha=0.7)
point = np.arange(8.4, 60, 0.1)
predictions = m2.coef_[0] * point**2 + m2.coef_[1] * point + m2.intercept_
axes[1].plot(point, predictions, color='blue', linewidth=2)
axes[1].scatter(60, m2.predict([[60**2, 60]]), color='r', marker="s", s=200, zorder=5)
axes[1].set_xlabel('Length (inch)')
axes[1].set_ylabel('Weight (lbs)')
axes[1].set_title(f'Polynomial Regression (R² = {test_r2_poly:.4f})')
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()