# Support Vector Machine (SVM)

## features

In [1]:
import finlab.ml as ml

dataset = ml.fundamental_features()
dataset.tail()



AttributeError: Can't get attribute 'new_block' on <module 'pandas.core.internals.blocks' from 'C:\\Users\\User\\.conda\\envs\\finlab2\\lib\\site-packages\\pandas\\core\\internals\\blocks.py'>

In [None]:
dataset.columns

In [None]:
dataset.columns
features = ['R69B_自由現金流量', 'R402_營業毛利成長率'] # ['R409_淨值成長率', 'R103_ROE稅後']
dataset = dataset[features].dropna(how='any')
dataset.tail(10)

## add prediction

In [None]:
ml.add_profit_prediction(dataset)
dataset.head()

In [None]:
%matplotlib inline
dataset.plot.scatter(features[0], features[1])

## remove outliers

In [None]:
def is_valid(feature, nstd):
    ub = feature.mean() + nstd * feature.std()
    lb = feature.mean() - nstd * feature.std()

    return (feature > lb) & (feature <ub)

valid = is_valid(dataset['R69B_自由現金流量'], 0.2) & is_valid(dataset['R402_營業毛利成長率'],0.05)
dataset_rmoutliers = dataset[valid].dropna()


#dataset_rmoutliers.plot.scatter(features[0], features[1])

dataset_rmoutliers['R69B_自由現金流量'].hist(bins=100)
#dataset_rmoutliers['R402_營業毛利成長率'].hist(bins=100)

## Scale features

In [None]:
import pandas as pd
import sklearn.preprocessing as preprocessing

dataset_scaled = pd.DataFrame(preprocessing.scale(dataset_rmoutliers), index=dataset_rmoutliers.index, columns=dataset_rmoutliers.columns)
dataset_scaled.head()

dataset_scaled['R69B_自由現金流量'].hist(bins=100)
dataset_scaled['R402_營業毛利成長率'].hist(bins=100, alpha=0.5)
dataset_scaled['return'] = dataset_rmoutliers['return']


## Training

In [None]:
from sklearn.model_selection import train_test_split

dataset_train, dataset_test = train_test_split(dataset_scaled, test_size=0.1, random_state=0)

from sklearn.svm import SVC

cf = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto',
  kernel='sigmoid', max_iter=-1, probability=False, random_state=None, # {‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}
  shrinking=True, tol=0.001, verbose=False)

cf.fit(dataset_train[features], dataset_train['return'] > dataset_train['return'].quantile(0.5))

In [None]:
cf.score(dataset_train[features], dataset_train['return'] > dataset_train['return'].quantile(0.5))
# 這個function不會讓它學習，看看學習分數

In [None]:
from mlxtend.plotting import plot_decision_regions

features_plot = dataset_test[features].values
labels_plot = (dataset_test['return'] > dataset_test['return'].quantile(0.5)).astype(int).values

plot_decision_regions(features_plot, labels_plot, cf)

In [None]:
features

## backtest 回測

In [None]:
history = dataset_test.copy()
history['svm prediction'] = cf.predict(dataset_test[features])
history = history.reset_index()

dates = sorted(list(set(history['date'])))

seasonal_returns1 = []
seasonal_returns2 = []
for date in dates:
    current_stocks = history[history['date'] == date]
    buy_stocks = current_stocks[current_stocks['svm prediction'] == True]
    sell_stocks = current_stocks[current_stocks['svm prediction'] == False]
    
    seasonal_return1 = buy_stocks['return'].mean()    #我們想買的股票報酬率，複利   buy_stocks
    seasonal_returns1.append(seasonal_return1)
    
    seasonal_return2 = sell_stocks['return'].mean()    #我們不想買的股票報酬率
    seasonal_returns2.append(seasonal_return2)

import matplotlib.pyplot as plt
plt.style.use("ggplot")

pd.Series(seasonal_returns1, index=dates).cumprod().plot(color='red')
pd.Series(seasonal_returns2, index=dates).cumprod().plot(color='blue')