In [1]:
import numpy as np
from keras.models import Sequential  
from keras.layers import Dense, Dropout, Activation, Flatten  
from keras.layers import Convolution2D, MaxPooling2D  
from keras.optimizers import SGD
from keras.utils import np_utils
from scipy import misc
import glob
import matplotlib.pyplot as plt
from PIL import Image
import math
import pandas as pd


Using TensorFlow backend.


# 모델과 r2

In [2]:
seed = 7
np.random.seed(seed)
width = 1
height = 1

# R^2
def r_squared(y_true, y_hat):
    ssr = 0
    sst = 0
    e = np.subtract(y_true, y_hat)
    y_mean = np.mean(y_true)
    for item in e:
        ssr += item**2
    for item in y_true:
        sst += (item - y_mean)**2
    r2 = 1 - ssr / sst
    return r2


def compile_model(model):
    lrate = 0.01
    sgd = SGD(lr=lrate, momentum=0.9, decay=1e-6, nesterov=True)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=sgd)
    return model


def create_model():
    model = Sequential()

    model.add(Convolution2D(32, 3, 3,
                            border_mode='valid', 
                            input_shape=(100, 100, 3)))  
    model.add(Activation('relu'))  
    model.add(Convolution2D(32, 3, 3))  
    model.add(Activation('relu'))  
    model.add(MaxPooling2D(pool_size=(2, 2)))  
    model.add(Dropout(0.25))  
      
    model.add(Convolution2D(64, 3, 3, 
                            border_mode='valid'))  
    model.add(Activation('relu'))  
    model.add(Convolution2D(64, 3, 3))  
    model.add(Activation('relu'))  
    model.add(MaxPooling2D(pool_size=(2, 2)))  
    model.add(Dropout(0.25))  
      
    model.add(Flatten())  
    model.add(Dense(256))  
    model.add(Activation('relu'))  
    model.add(Dropout(0.5))

    model.add(Dense(2))  
    model.add(Activation('softmax'))  

    return model



# 이미지학습

In [3]:
def get_pixel_values():
    file_name = './figures_v2'
    pixels = []
    for filename in glob.glob(file_name + '\*.png'):
        im = misc.imread(filename)
        pixels.append(im)
    return pixels
    
#이미지    
def convert_image():
    file_name = './figures_v2'
    for filename in glob.glob(file_name + '\*.png'):
        img = Image.open(filename)
        img = img.convert('RGB')
        img.save(filename)
    
#이미지    
def plot_data(data):
    #t = np.arange(0, 29, 1) # 29 range맞게 수정 필요 
    t = np.arange(0, 33, 1)
    file_name_number = 0
    fig = plt.figure(frameon=False, figsize=(width, height))
    for group in data:
        print(len(group))
        #break
        #count = 30 # 34개로 수정 시 수정 필요 
        count = 34
        #while count <= (len(group)-10)
        while count <= (len(group)-5):
            high = []
            low = []
            for item in group[count-34:count]:
                high.append(item[0])
                low.append(item[1])
            file_name = r'\fig_' + str(file_name_number)
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            ax.plot(t, high[0:-1], 'b', t, low[0:-1], 'g')
            fig.savefig('./figures_v2' + file_name, dpi=100)
            fig.clf()
            file_name_number += 1 # 이름이니까 해도되고 안해도 됨 
            count += 10 #10개씩 만들고 싶다면 1개에서 10개로 수정 필요 
            #count += 10
    print('Created %d files!' % file_name_number)


# 수익률계산

In [19]:
#데이터추출 ---- 여기가 문제 같음 


def extract_data():
    file_name = 'data_3only1.csv' #해당파일 open
    infile = open(file_name, 'r')
    temp_buffer = []
    for line in infile:
        temp_buffer.append(line.strip('\n')) ## 여기가 의심스러움 
        #temp_buffer.append(line.strip(',')) ## 5개씩 엔터로 구분해야 한다. 근데 내 데이터는 그렇게 구성이 안되어있음 
    #temp_buffer = temp_buffer[8:]
    print('how many split?')
    print(len(temp_buffer)) ##
    temp_buffer = temp_buffer[8:] ### 수정
    i = 0
    groups = []
    temp = []
    for item in temp_buffer:
        if i != 1223: #갯수 확인하기 1223개씩 나누기
            temp.append(item)
            i += 1 # n갯수 추가하기 2번 10개로 바꿨을 때 돌아가는지 확인
        else:
            groups.append(temp)
            temp = []
            i = 0
    groups.append(temp)
    infile.close() #해당파일 close 
    return groups
#여기까지 csv파일 import시 영향 미치는 곳 
#________________________________________________________________________________________________    
#여기부터 가져온 파일 구분할 때 문제생기는 곳 


#데이터분할 - 의심 2 ( 6개씩 나눠야 하는데, 시작이 하나씩 밀리는 이유가 여기? )
def split_data(data):
    groups = []
    for item in data:
        temp_buffer = []
        for string in item:
            number = string.split(',')
            temp_buffer.append(number)
        groups.append(temp_buffer)
    #print(groups)# group 3개 0,1,2 
    return groups


#분할된 데이터 모으기
def load_sample_data():
    original_data = extract_data()
    splitted_data = split_data(original_data)
    useful_data = extract_useful_data(splitted_data)
    #return useful_data, splitted_data
    return useful_data #, original_data


#필요정보취합
def extract_useful_data(data):
    groups = []
    for group in data:
        temp_buffer = []
        for item in group:
            temp = [item[2], item[3]]  # item - extract에서 등장
            temp = [float(i) for i in temp]
            temp_buffer.append(temp)
            
    #print(temp_buffer)
    #print('aaaaa')## 확인
        groups.append(temp_buffer)
    return groups


#수익률계산  
def find_returns(data): 
    returns = []
    price1 = []
    price2 = []
    for group in data:
        # count = 34으로 고쳐야?
        count = 34
        while count <= (len(group)-5):
            current_data = group[count-1]
            future_data = group[count+4]
            p1 = np.mean(current_data)
            p2 = np.mean(future_data)
            price1.append(p1) #
            price2.append(p2) #
            
            
            #수정함
            #math.log(p2/p1)에서 
            if p1 <= 0 or p2 <= 0: #S1. 아예 ()안이 값이 말이 안되는 경우 
                returns.append(0)
                count += 1
            elif math.log(p2/p1)>= 2:
                #S2. log(p2/p1)이 2보다 커서 값이 의미가 없는 경우                    
                print(p1)
                print(p2)
                print('aaaa')
                returns.append(0)
                count += 1
            else:    
                returns.append(math.log(p2/p1)) #S3. 그외 정상적인경우
                count += 1
                
            
            
            
#             if p1 <= 0 or p2 <= 0:
#                 returns.append(0)
#                 count += 1
#             else:
#                 if math.log(p2/p1)>=2:                    
#                     print(p1)
#                     print(p2)
#                     print('aaaa')
#                 #else:    
#                 returns.append(math.log(p2/p1))
#                 count += 1
                
                
                
                
#     print(returns)
#     print(price1)
#     print(price2)
    return returns
    

	DATE	CLOSE	HIGH	LOW	OPEN	VOLUME
0	EXCHANGE%3DNYSEARCA	NaN	NaN	NaN	NaN	NaN
1	MARKET_OPEN_MINUTE=570	NaN	NaN	NaN	NaN	NaN
2	MARKET_CLOSE_MINUTE=960	NaN	NaN	NaN	NaN	NaN
3	INTERVAL=60	NaN	NaN	NaN	NaN	NaN
4	COLUMNS=DATE	CLOSE	HIGH	LOW	OPEN	VOLUME
5	DATA=	NaN	NaN	NaN	NaN	NaN
6	TIMEZONE_OFFSET=-240	NaN	NaN	NaN	NaN	NaN
7	a1476451800	214.19	214.2	214.14	214.14	185740 --- [7:]시 시작되는 기점 
8	1	214.15	214.22	214.14	214.19	178844 -- [8:]시 시작되는 기점 


* range[0,391,1] - 0 , 1~390까지 390개 6 리스트형으로 반복 나머지 0   
  range[0, 1224] - 0, 1~1223까지 1223개 6리스트형으로 반복 나머지 5 
   
   ## 여기서 차이 나오는건가?


In [21]:
def main():
    print('load_sample_data')
    data= load_sample_data()
    #data,x = load_sample_data() # x값확인위한 변수지정
    #return 0
    plot_data(data)
    print('convert_image')
    convert_image()
    x = np.asarray(get_pixel_values())
    y = np.asarray(find_returns(data))
    x_train = x[0:len(x)] #1번 len(x),len(y)로 고쳤을 때 돌아가는지 확인 - checkO
    y_train = y[0:len(y)]
    x_test = x[0:len(x)]
    y_test = y[0:len(y)]
    print(len(x)) #
    print(len(y)) #
    
#     print(len(x_train))
#     print((y_train))
#     print(len(x_test))
#     print(len(y_test))
    
    #return 0 #2
    #return
#     x_train = x[0:4340] #1번 len(x)로 고쳤을 때 돌아가는지 확인
#     y_train = y[0:4340]
#     x_test = x[0:4340]
#     y_test = y[0:4340]
#    y_true = y_test
#    y_train = np_utils.to_categorical(y_train, 2)
#    y_test = np_utils.to_categorical(y_test, 2)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255.0
    x_test /= 255.0
    
    #return 0 #1
    model = create_model()
    model = compile_model(model)

    print('cnn')
    # Fit the model
    epochs = 1
    model.fit(x_train, y_train, validation_data=(x_test, y_test), 
              nb_epoch=epochs,
              shuffle=True, batch_size=100, verbose=1)
#    scores = model.evaluate(x_test, y_test, verbose=0)
#    print('Accuracy: %.2f%%' % (scores[1] * 100))
    classes = model.predict_classes(x_test, verbose=0)
    classes = list(classes)
    y_test = list(y_test)
    r2 = r_squared(y_test, classes)
    print(r2)


if __name__ == '__main__':
    main()


load_sample_data
how many split?
3679
1223
1223
1223
Created 357 files!
convert_image


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  """


1456
3555
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




cnn




ValueError: Input arrays should have the same number of samples as target arrays. Found 1456 input samples and 3555 target samples.

<Figure size 72x72 with 0 Axes>

Input arrays should have the same number of samples as target arrays. Found 357 input samples and 3556 target samples.

len(x)357 , len(y)3556


    print(len(x_test)) 357

    print(len(y_test)) 3556
    
    
    
    
    

In [None]:
data,x = load_sample_data()

In [None]:
x

In [None]:
import pandas as pd

In [None]:
data = {'x': x[0]},
       
        
        
#         'y':x[1][],
#        'z':x[2][]}

In [None]:
df = pd.DataFrame(data)

In [None]:
np.arange(0, 33, 1)

In [None]:
df.tail()

In [None]:
print(x[0][1223]) 
print(x[1][1223])
print(x[2][1223])

#한칸이 밀려서 들어가서 1~ 1223,0 , 2~1, 3~2 이런식으로 들어가는것 