In [3]:
import pandas as pd
merged_data = pd.read_csv("../dataset/merged_data.csv",index_col=0)

### SVM 

In [8]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

# 1. Cross-Validation

X = merged_data.drop(columns=['date', 'VKOSPI_Label'])
y = merged_data['VKOSPI_Label']


# 5-fold SVM
cv_scores = cross_val_score(SVC(kernel='linear'), X, y, cv=5, scoring='accuracy')
cv_mean = cv_scores.mean()

cv_mean

0.6345454545454545

In [11]:
# 2. 하이퍼파라미터 튜닝

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto']
}

grid_search_cv = GridSearchCV(SVC(), param_grid, cv=5, n_jobs=-1)
grid_search_cv.fit(X, y)

best_params_cv = grid_search_cv.best_params_
best_score_cv = grid_search_cv.best_score_

best_params_cv, best_score_cv

({'C': 1, 'gamma': 'scale', 'kernel': 'sigmoid'}, 0.7254545454545454)

In [12]:
# 3. Feature Importance
best_svm_model = SVC(kernel='linear', C=best_params_cv['C'], gamma=best_params_cv['gamma'])
best_svm_model.fit(X, y)

feature_importances = best_svm_model.coef_

# 데이터프레임으로 정리
feature_importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': feature_importances[0]
}).sort_values(by='Importance', ascending=False)

feature_importance_df


Unnamed: 0,Feature,Importance
10,kospi_p_range,1.088974
15,NAS_day_fluc_range,0.738331
6,cny_night_change,0.445919
21,sp_day_change,0.402114
16,niv_night_change,0.276642
27,vix_day_fluc_range,0.195732
5,cny_yesterday_change,0.09288
25,vix_close_change,0.045611
3,cd_p_change(%),-0.043847
14,NAS_day_change,-0.06568
