## Support Vector Machine

In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

# Load your dataset
# Replace 'YOUR_DATASET.csv' with the actual file path
df = pd.read_csv('PEA01.20231009T211012.csv')

# Step 1: Data Preprocessing
# Drop rows with missing values
df.dropna(subset=['VALUE'], inplace=True)

# Remove the 'UNIT' column
df_encoded = df.drop('UNIT', axis=1)

# One-Hot Encoding for other categorical columns
df_encoded = pd.get_dummies(df_encoded, columns=['STATISTIC Label', 'Age Group', 'Sex'], drop_first=True)

# Step 2: Data Splitting (80% train, 20% test)
X = df_encoded.drop('VALUE', axis=1)  # Features
y = df_encoded['VALUE']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Model Selection (Support Vector Regressor)
svr_model = SVR()

# Step 4: Hyperparameter Tuning with GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],  # Regularization parameter
    'epsilon': [0.1, 0.2, 0.3],  # Epsilon parameter
    'kernel': ['linear', 'rbf', 'poly']  # Kernel type
}

grid_search = GridSearchCV(estimator=svr_model, param_grid=param_grid, cv=5, scoring='neg_mean_absolute_error')
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_

# Step 5: Train the Model with Best Hyperparameters
best_svr_model = SVR(
    C=best_params['C'],
    epsilon=best_params['epsilon'],
    kernel=best_params['kernel']
)

best_svr_model.fit(X_train, y_train)

# Step 6: Model Evaluation
predictions = best_svr_model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error (MAE): {mae}')

# You can also further evaluate the model with other regression metrics if needed.


Mean Absolute Error (MAE): 99.16514977441257


In [46]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error


In [47]:
# Calculate R2 score
r2 = r2_score(y_test, predictions)
print(f'R-squared (R2) Score: {r2}')

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error (MSE): {mse}')

R-squared (R2) Score: 0.7345147664243825
Mean Absolute Error (MAE): 99.16514977441257
Mean Squared Error (MSE): 83485.20506014343
