In [4]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
import numpy as np
import os
import json
import pickle
from tqdm import tqdm
from sklearn.metrics import f1_score

In [5]:
config = json.load(open("../../configure.json"))
amzn_2grams = []
amzn_3grams = []
amzn_ngrams = []
aapl_2grams = []
aapl_3grams = []
aapl_ngrams = []

In [6]:
path = config["project_dir"] + config["feature_pkls"]
pkls = os.listdir(path)
results = []
for pkl in tqdm(pkls):
    data = pd.read_pickle(path + pkl, compression='infer')
    train_x = np.stack(data['features'])
    train_y = np.stack(data['trend'].astype('int'))

    pca = PCA(n_components=200)
    pComponents = pca.fit_transform(train_x)
    x_train, x_test, y_train, y_test = train_test_split(train_x, train_y, test_size=0.2)
    svm_model = svm.SVC().fit(x_train, y_train)
    y_pred = svm_model.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    names = pkl.split("_", )
    pkl_save_path = config['project_dir'] + "models/svm/pkls/ngram/"
    model_name = "svm_" + pkl
    with open(pkl_save_path + model_name, 'wb') as f:
        pickle.dump(svm_model, f)
    results = [names[1], round(np.mean(acc) * 100, 2), round(np.mean(f1) * 100, 2)]
    if "amzn" == names[0]:
        if "2gram" in names[2]:
            amzn_2grams.append(results)
        if "3" in names[2]:
            amzn_3grams.append(results)
        if "n" in names[2]:
            amzn_ngrams.append(results)
    if "aapl" == names[0]:
        if "2gram" in names[2]:
            aapl_2grams.append(results)
        if "3" in names[2]:
            aapl_3grams.append(results)
        if "n" in names[2]:
            aapl_ngrams.append(results)

100%|██████████| 36/36 [1:39:17<00:00, 165.48s/it]  


In [7]:
print("amazon 2grams results\n", amzn_2grams)
print("amazon 3grams results\n", amzn_3grams)
print("amazon ngrams results\n", amzn_ngrams)
print("apple 2grams results\n", aapl_2grams)
print("apple 3grams results\n", aapl_3grams)
print("apple ngrams results\n", aapl_ngrams)

amazon 2grams results
 [['30', 59.86, 23.23], ['60', 64.61, 22.69], ['240', 58.89, 68.59], ['5', 65.93, 21.82], ['1440', 58.19, 31.65], ['15', 56.95, 16.12]]
amazon 3grams results
 [['60', 59.77, 9.15], ['30', 58.27, 11.24], ['240', 54.75, 69.07], ['15', 56.34, 10.14], ['1440', 56.6, 22.36], ['5', 61.36, 7.58]]
amazon ngrams results
 [['15', 58.63, 31.09], ['5', 64.52, 19.24], ['1440', 61.8, 45.75], ['240', 62.06, 68.29], ['60', 60.56, 22.49], ['30', 61.36, 33.79]]
apple 2grams results
 [['15', 60.75, 41.94], ['30', 62.46, 43.29], ['60', 64.16, 35.83], ['240', 63.32, 65.4], ['1440', 64.27, 56.51], ['5', 64.68, 26.58]]
apple 3grams results
 [['15', 58.19, 29.82], ['1440', 59.34, 42.52], ['240', 57.85, 64.61], ['60', 61.12, 26.9], ['30', 60.52, 35.18], ['5', 63.64, 22.37]]
apple ngrams results
 [['5', 65.52, 26.48], ['240', 61.91, 65.76], ['60', 63.3, 36.75], ['1440', 63.64, 55.25], ['30', 61.38, 41.97], ['15', 62.25, 43.38]]
