In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR

# Load and Sample Data
print("Loading data...")
df = pd.read_csv("energy.csv")

print("Original shape:", df.shape)

# Use a smaller sample for debugging/training speed
df = df.sample(n=5000, random_state=42)
print("Sampled shape:", df.shape)

# Select Features and Target
print("Selecting features and target...")
feature_cols = ['temp']
target_cols = ['load']

x = df[feature_cols].values
y = df[target_cols].values

# Train/Test Split
print("Splitting data...")
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

# Feature Scaling
print("Scaling features...")
scaler_x = StandardScaler()
x_train = scaler_x.fit_transform(x_train)
x_test = scaler_x.transform(x_test)

# Model Training
print("Training SVR model...")
model = SVR(kernel='rbf', gamma=0.5, C=10, epsilon=0.05)
model.fit(x_train, y_train.ravel())

# Prediction
print("Predicting on train and test sets...")
y_train_pred = model.predict(x_train).reshape(-1, 1)
y_test_pred = model.predict(x_test).reshape(-1, 1)

# Output Shapes
print("Train prediction shape:", y_train_pred.shape)
print("Test prediction shape:", y_test_pred.shape)

Loading data...
Original shape: (26304, 3)
Sampled shape: (5000, 3)
Selecting features and target...
Splitting data...
Scaling features...
Training SVR model...
Predicting on train and test sets...
Train prediction shape: (4000, 1)
Test prediction shape: (1000, 1)
