In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, r2_score

# 1. Load real dataset from GitHub
url = "https://raw.githubusercontent.com/marcopeix/ISL-linear-regression/master/data/Advertising.csv"
data = pd.read_csv(url)

# Quick check (optional – helps debug)
print("Columns in the file:", data.columns.tolist())
# Expected output: ['TV', 'radio', 'newspaper', 'sales']

# 2. Features (X) and target (y)
X = data[['TV', 'radio', 'newspaper']]
y = data['sales']

# 3. Train / test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.25,
    random_state=42
)

# 4. Scale features (very important for SVR!)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# 5. Create and train SVR model
model = SVR(kernel='rbf', C=10, epsilon=0.5)
model.fit(X_train, y_train)

# 6. Predict and evaluate
predictions = model.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
r2  = r2_score(y_test, predictions)

print(f"Average error (MAE)  : {mae:.2f}")
print(f"R² score             : {r2:.3f}")
print(f"Number of test points: {len(y_test)}")

Columns in the file: ['Unnamed: 0', 'TV', 'radio', 'newspaper', 'sales']
Average error (MAE)  : 0.60
R² score             : 0.977
Number of test points: 50
