In [1]:
import os
import pandas
import numpy
import cv2


def load_csv():
    df = pandas.read_csv("data/csv/0.csv")
    return df

def load_img():
    img_data = {}
    filename_list = os.listdir("data/img")
    for filename in filename_list:
        img = cv2.imread("data/img/{}".format(filename))
        ymd = filename.split(".")[0]
        img_data[ymd] = img
    return img_data

def load_txt_csv():
    news_df = pandas.read_csv("data/text/news.csv")
    news_df["date"] = news_df["Date"]
    arr = numpy.loadtxt("data/text/news_vec.csv", delimiter=",")
    news_feature = pandas.DataFrame(arr)
    cols = ["news_feature_{}".format(col) for col in news_feature.columns]
    news_feature.columns = cols
    df = pandas.concat([news_df, news_feature], axis=1)
    return df

In [2]:
csv = load_csv()
img = load_img()
news_df = load_txt_csv()

In [3]:
csv["nextday_profit"] = (csv["profit"].shift(-1)).apply(lambda x: 1 if x >= 0 else 0)
#csv["label_0"] = csv["nextday_profit"].apply(lambda x: 1 if x == 0 else 0)
#csv["label_1"] = csv["nextday_profit"].apply(lambda x: 1 if x == 1 else 0)

csv["label_0"] = csv["f1"].apply(lambda x: 1 if x <=0.5 else 0)
csv["label_1"] = csv["f1"].apply(lambda x: 1 if x >0.5 else 0)

In [4]:
split_ymd = "2016-10-30"

In [5]:
train_ymd = {ymd for ymd in img.keys() if ymd <= split_ymd}
test_ymd = {ymd for ymd in img.keys() if ymd > split_ymd}

In [7]:
tmp = pandas.DataFrame(train_ymd, columns=["date"])
train_img = numpy.array([arr for ymd, arr in img.items() if ymd <= split_ymd])
train_df = csv.merge(tmp, on="date", how="inner")
train_csv = train_df[["f1", "f2", "f3"]]
train_news_csv = news_df.merge(tmp, on="date", how="inner")[news_df.columns[3:]]
train_label = train_df[["label_0", "label_1"]]

tmp = pandas.DataFrame(test_ymd, columns=["date"])
test_img = numpy.array([arr for ymd, arr in img.items() if ymd > split_ymd])
test_df = csv.merge(tmp, on="date", how="inner")
test_news_csv = news_df.merge(tmp, on="date", how="inner")[news_df.columns[3:]]
test_csv = test_df[["f1", "f2", "f3"]]
test_label = test_df[["label_0", "label_1"]]

In [8]:
import keras
from keras.layers.core import Dense, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, Concatenate
from keras.layers.normalization import BatchNormalization
from keras import Input, Sequential
from keras.optimizers import SGD
from keras.losses import categorical_crossentropy

Using TensorFlow backend.


In [9]:
length, height, width, channel = train_img.shape

input_img = Input(name="input_img", shape=(height, width, channel))
n_img = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
                      activation="relu", name="img_conv2d_1")(input_img)
n_img = MaxPooling2D(pool_size=(2, 2), name="img_maxpool_1")(n_img)
n_img = Conv2D(filters=32, kernel_size=(3, 3), padding="same",
                      activation="relu", name="img_conv2d_2")(n_img)
n_img = BatchNormalization(name="img_bn_1")(n_img)
n_img = MaxPooling2D(pool_size=(2, 2), name="img_maxpool_2")(n_img)
n_img = Flatten(name="img_flatten_1")(n_img)
n_img = Dense(8, activation="relu", name="img_dence_1")(n_img)

Instructions for updating:
Colocations handled automatically by placer.


In [10]:
input_csv = Input(name="input_csv", shape=(3, ))
n_csv = Dense(8, activation="relu", name="csv_dence_1")(input_csv)

input_news = Input(name="input_news", shape=(768, ))
n_news = Dense(8, activation="relu", name="news_dence_1")(input_news)

In [11]:
network = Concatenate()([n_img, n_csv, n_news])
network = Dense(8, activation="relu", name="merge_dense_1")(network)
network = BatchNormalization(name="merge_bn_1")(network)
network = Dense(2, activation="softmax", name="output")(network)

In [12]:
model = keras.Model(inputs=[input_img, input_csv, input_news], outputs=network)

In [13]:
model.compile(SGD(), loss=categorical_crossentropy, metrics=["accuracy"])

In [14]:
history = model.fit(
    [train_img, numpy.array(train_csv), train_news_csv], numpy.array(train_label),
    epochs=10,
    batch_size=16,
    validation_data=([test_img, numpy.array(test_csv), test_news_csv], numpy.array(test_label))
)

Instructions for updating:
Use tf.cast instead.
Train on 202 samples, validate on 42 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
