# 재무

In [None]:

# coding: utf-8

# In[1]:


import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import pandas_datareader.data as web
import time
import tensorflow
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
import logging
import math
import os
from sklearn.preprocessing import MinMaxScaler, scale
from keras.utils import np_utils
import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Activation
from keras import optimizers
from sklearn.metrics import mean_squared_error
from keras.models import load_model
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split


# In[2]:


pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', 500)


# # 데이터 불러오기 / 데이터 정리

# In[3]:


IT_finance = pd.read_excel('./data/dongboo/IT_TOTAL_재무사항.xlsx')
IT_validation_finance = pd.read_excel('./data/dongboo/IT_validation_재무사항.xlsx')


# In[4]:


# krx에서 가져온 자료
IT_stock = pd.read_excel('./data/dongboo/IT_주가_total_krx.xlsx')
IT_validation_stock = pd.read_excel('./data/dongboo/IT_validation_주가.xls')

# IT_stock = pd.read_excel('./data/dongboo/IT_TOTAL_주가.xlsx')
# Medicine_stock = pd.read_excel('./data/dongboo/제약_TOTAL_주가.xlsx')


# In[5]:


IT = pd.merge(IT_finance,IT_stock,how = 'left', on = ['회사명','거래소코드','회계년도'])
IT_validation = pd.merge(IT_validation_finance,IT_validation_stock,how = 'left', on = ['회사명','거래소코드','회계년도'])


# # 회사별로 분리

# In[6]:


lg = IT[IT['회사명'] == 'LG이노텍(주)']
sdi = IT[IT['회사명'] == '삼성SDI(주)']
ssem = IT[IT['회사명'] == '삼성전기(주)']
hynix = IT[IT['회사명'] == '에스케이하이닉스(주)']

# -----validation ---------

samsungelectric = IT_validation[IT_validation['회사명'] == '삼성전자(주)']
lgelectric = IT_validation[IT_validation['회사명'] == '엘지전자(주)']
sds = IT_validation[IT_validation['회사명'] == '삼성에스디에스(주)']
lgdisplay = IT_validation[IT_validation['회사명'] == '엘지디스플레이(주)']


# In[7]:


def classify(stock):
    
    # 회계년도 순으로 정렬 및 인덱스 초기화
    stock.sort_values('회계년도', ascending= True, inplace = True)
    
    stock.reset_index(drop = True, inplace = True)
    
    
    # 새로운 변수 추가
    
    stock['3개월후종가'] = stock['종가'][1:].reset_index(drop=True)
    
    stock['분기수익률'] = (stock['3개월후종가'] - stock['종가']) / stock['종가']
    
    stock['수익률상승하락'] = stock['분기수익률']
    
    for i in range(len(stock)):
        if stock['분기수익률'][i] < 0.005:
            stock['수익률상승하락'][i] = 0
        else:
            stock['수익률상승하락'][i] = 1
        
       
    return stock


# In[8]:


classify(lg)
classify(sdi)
classify(ssem)
classify(hynix)

# -----validation ---------

classify(samsungelectric)
classify(lgelectric)
classify(sds)
classify(lgdisplay)


# In[9]:


# 3개월 후 오르는 지 안오르는 지 확인하는 모델이기에
# 마지막 행은 올랐는지 확인할 수가 없음
# 이에 마지막 행 제외시킴

lg = lg.iloc[:-1,:]
sdi = sdi.iloc[:-1,:]
ssem = ssem.iloc[:-1,:]
hynix = hynix.iloc[:-1,:]

# -----validation ---------

samsungelectric = samsungelectric.iloc[:-1,:]
lgelectric = lgelectric.iloc[:-1,:]
sds = sds.iloc[:-1,:]
lgdisplay = lgdisplay.iloc[:-1,:]


# # 산업별 변수 <3개월 후 종가>와의 상관관계 분석


# In[11]:


lg_corr = lg.corr(method = 'pearson')
df_lg = pd.DataFrame(lg_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_lg.columns = ['지표','3개월후종가']

sdi_corr = sdi.corr(method = 'pearson')
df_sdi = pd.DataFrame(sdi_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_sdi.columns = ['지표','3개월후종가']

ssem_corr = ssem.corr(method = 'pearson')
df_ssem = pd.DataFrame(ssem_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_ssem.columns = ['지표','3개월후종가']

hynix_corr = hynix.corr(method = 'pearson')
df_hynix = pd.DataFrame(hynix_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_hynix.columns = ['지표','3개월후종가']


# # 산업별 상관관계 데이터프레임

# In[12]:


pd.concat([df_lg.reset_index(drop = True), df_sdi.reset_index(drop = True), df_ssem.reset_index(drop = True), df_hynix.reset_index(drop = True)],axis = 1)


# In[13]:


list_corr = []
order = 30 # 기업별로 상위 30개 상관관계 변수 추출

for i in range(len(df_lg.index[:order])):
    list_corr.append(df_lg.index[:order][i])
    list_corr.append(df_sdi.index[:order][i])
    list_corr.append(df_ssem.index[:order][i])
    list_corr.append(df_hynix.index[:order][i])

variable = pd.Series(list_corr).value_counts()
variable = pd.DataFrame(variable).reset_index()
variable.columns = ['var_index', 'number']


# In[14]:


var = []
for i in range(len(variable)):
    var.append(df_lg[df_lg.index == variable['var_index'][i]]['지표'].values[0])
var = pd.DataFrame(var)
var.columns = ['var']


# In[15]:


corr = pd.concat([variable,var], axis = 1)
corr = corr[['var_index','var','number']]
corr = corr[corr['var'] != '3개월후종가'].reset_index(drop = True)
corr = corr[corr['var'] != '수익률상승하락'].reset_index(drop = True)
corr = corr[corr['var'] != '분기수익률'].reset_index(drop = True)
corr


# # 위 자료를 보고 변수 선정

# In[16]:


deep_lg = lg[corr['var'][0:20]]
deep_sdi = sdi[corr['var'][0:20]]
deep_ssem = ssem[corr['var'][0:20]]
deep_hynix = hynix[corr['var'][0:20]]

deep_lg['수익률상승하락'] = lg['수익률상승하락']
deep_sdi['수익률상승하락'] = sdi['수익률상승하락']
deep_ssem['수익률상승하락'] = ssem['수익률상승하락']
deep_hynix['수익률상승하락'] = hynix['수익률상승하락']

# ---------------- validtaion ----------------------

deep_samsungelectric = samsungelectric[corr['var'][0:20]]
deep_lgelectric = lgelectric[corr['var'][0:20]]
deep_sds = sds[corr['var'][0:20]]
deep_lgdisplay = lgdisplay[corr['var'][0:20]]

deep_samsungelectric['수익률상승하락'] = samsungelectric['수익률상승하락']
deep_lgelectric['수익률상승하락'] = lgelectric['수익률상승하락']
deep_sds['수익률상승하락'] = sds['수익률상승하락']
deep_lgdisplay['수익률상승하락'] = lgdisplay['수익률상승하락']


# In[17]:


# 표준화

from sklearn.preprocessing import StandardScaler

scaler_lg = StandardScaler()
scale_lg = pd.DataFrame(scaler_lg.fit_transform(deep_lg.iloc[:,:-1]))

scaler_sdi = StandardScaler()
scale_sdi = pd.DataFrame(scaler_sdi.fit_transform(deep_sdi.iloc[:,:-1]))

scaler_ssem = StandardScaler()
scale_ssem = pd.DataFrame(scaler_ssem.fit_transform(deep_ssem.iloc[:,:-1]))

scaler_hynix = StandardScaler()
scale_hynix = pd.DataFrame(scaler_hynix.fit_transform(deep_hynix.iloc[:,:-1]))

# --------------- validation ---------------------

scaler_samsungelectric = StandardScaler()
scale_samsungelectric = pd.DataFrame(scaler_samsungelectric.fit_transform(deep_samsungelectric.iloc[:,:-1]))

scaler_lgelectric = StandardScaler()
scale_lgelectric = pd.DataFrame(scaler_lgelectric.fit_transform(deep_lgelectric.iloc[:,:-1]))

scaler_sds = StandardScaler()
scale_sds = pd.DataFrame(scaler_sds.fit_transform(deep_sds.iloc[:,:-1]))

scaler_lgdisplay = StandardScaler()
scale_lgdisplay = pd.DataFrame(scaler_lgdisplay.fit_transform(deep_lgdisplay.iloc[:,:-1]))

# 원래 값으로 변환
# scaler_lg.inverse_transform(scale_lg)


# In[18]:


# 주가를 합치려고 만듬(마지막 열을 가져오면 됨)
deep_IT = pd.concat([deep_lg,deep_sdi,deep_ssem,deep_hynix], ignore_index=True)

# 표준화한 변수를 합치려고 만듬
scale_IT = pd.concat([scale_lg,scale_sdi,scale_ssem,scale_hynix], ignore_index=True)

# --------------- validation ---------------------

# 주가를 합치려고 만듬(마지막 열을 가져오면 됨)
deep_IT_validation = pd.concat([deep_samsungelectric,deep_lgelectric,deep_sds,deep_lgdisplay], ignore_index=True)

# 표준화한 변수를 합치려고 만듬
scale_IT_validation = pd.concat([scale_samsungelectric,scale_lgelectric,scale_sds,scale_lgdisplay], ignore_index=True)



# # 상관관계 표 보기

# In[20]:


# import matplotlib.pyplot as plt
# from matplotlib import font_manager, rc
# import seaborn as sns
# import matplotlib
# font_location = "c:/Windows/fonts/malgun.ttf"
# font_name = font_manager.FontProperties(fname=font_location).get_name()
# matplotlib.rc('font', family=font_name)


# In[21]:


# plt.figure(figsize=(20,50))

# sns.heatmap(data = deep_IT.corr(), annot = True)
# # plt.savefig('tmp.png')


# In[22]:


def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
        # 6 was used in the paper.
        init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        # 3 gives us approximately the same limints as above since this repicks
        # values greater than 2 standard deviations from the mean.
        stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_normal_initializer(stddev=stddev)


# In[23]:


# 히든레이어 1개 짜리!!!!

# Review : Learning rate and Evaluation
import tensorflow as tf
import random
import matplotlib.pyplot as plt

tf.set_random_seed(777)  # reproducibility
tf.reset_default_graph()


x_data = scale_IT
y_data = deep_IT.iloc[:,[-1]]


# parameters
placeholder_num = len(x_data.columns)
learning_rate = 0.001


X = tf.placeholder(tf.float32, [None, placeholder_num])
Y = tf.placeholder(tf.float32, [None, 1])
keep_prob = tf.placeholder(tf.float32)


W1 = tf.get_variable("W1", shape=[placeholder_num, 1], initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([1]))

hypothesis = tf.sigmoid(tf.matmul(X, W1) + b1)


# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# train = tf.train.AdamOptimizer(1e-4).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))



# In[25]:


# Launch graph

validation_number = 20

graph_cost = []
graph_acc = []

x_data_train, x_data_test, y_data_train, y_data_test = train_test_split(x_data, y_data, test_size=0.2, random_state=0)


with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in tqdm_notebook(range(50001)):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train, keep_prob: 0.8})       
        
        h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X: x_data_test, Y: y_data_test, keep_prob: 1.0})
        
        graph_cost.append(cost_val)
        graph_acc.append(a)
        if step % 200 == 0 or step < 10 :
            print("Step : {} \t Cost : {} \t Acc : {}".format(step, cost_val, a*100))    


# In[26]:


print('모델 최고 정확도: ', max(graph_acc) * 100 , '%')


# In[27]:


plt.rc('font', size=15)          # controls default text sizes
plt.rc('axes', titlesize=15)     # fontsize of the axes title
plt.rc('axes', labelsize=15)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=6)    # fontsize of the tick labels
plt.rc('ytick', labelsize=9)    # fontsize of the tick labels
plt.rc('legend', fontsize=15)    # legend fontsize
plt.rc('figure', titlesize=15)  # fontsize of the figure title


# In[28]:


plt.figure(figsize=(14, 8))
plt.ylim([0,1.5])
plt.plot(graph_cost, color = 'blue', label = 'model cost')
plt.plot(graph_acc, color = 'red', label = 'Validation Acc')
plt.title('model graph')
plt.xlabel('Step')
plt.ylabel('Cost & Acc')
plt.legend()
plt.show()


# In[29]:


pd.DataFrame({'graph_cost':graph_cost,'graph_acc': graph_acc})


# In[30]:


# pd.DataFrame(y_data.iloc[-validation_number:,:].values, c.ravel()).reset_index()
pd.DataFrame({'real':list(y_data_test.values.ravel()),'predict': list(c.ravel())})


# # <span style="color:red"> 비 가치주로 validation

# In[31]:


# Launch graph

validation_number = 20


# samsungelectric / lgelectric / sds / lgdisplay

# x_val_data = scale_lgdisplay
# y_val_data = deep_lgdisplay.iloc[:,[-1]]


graph_cost = []
graph_acc = []

x_val_data = scale_IT_validation
y_val_data = deep_IT_validation.iloc[:,[-1]]

with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in tqdm_notebook(range(50001)):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data, keep_prob: 0.8})       
        h, c, a = sess.run([hypothesis, predicted, accuracy],
                               feed_dict={X: x_val_data, Y: y_val_data, keep_prob: 1.0})
        
        graph_cost.append(cost_val)
        graph_acc.append(a)
        if step % 200 == 0 or step < 10 :
            print("Step : {} \t Cost : {} \t Acc : {}".format(step, cost_val, a*100))


# In[32]:


print('모델 최고 정확도: ', max(graph_acc) * 100 , '%')


# In[33]:


pd.DataFrame({'graph_cost':graph_cost,'graph_acc': graph_acc})


# In[34]:


plt.figure(figsize=(14, 8))
plt.ylim([0,1.5])
plt.plot(graph_cost, color = 'blue', label = 'model cost')
plt.plot(graph_acc, color = 'red', label = 'Validation Acc')
plt.title('model graph')
plt.xlabel('Step')
plt.ylabel('Cost & Acc')
plt.legend()
plt.show()


# In[35]:


# print('모델 정확도: ', a * 100 , '%')


# In[36]:


# pd.DataFrame(y_data.iloc[-validation_number:,:].values, c.ravel()).reset_index()
pd.DataFrame({'real':list(y_val_data.values.ravel()),'predict': list(c.ravel())})



# 비재무

In [None]:

# coding: utf-8

# In[1]:


# import warnings
# warnings.filterwarnings("ignore")
# import pandas as pd
# import pandas_datareader.data as web
# import time
# import tensorflow
# import matplotlib.pyplot as plt
# import datetime as dt
# import numpy as np
# import logging
# import math
# import os
# from sklearn.preprocessing import MinMaxScaler, scale
# from keras.utils import np_utils
# import keras
# from keras.models import Sequential
# from keras.layers import Dense, LSTM, Activation
# from keras import optimizers
# from sklearn.metrics import mean_squared_error
# from keras.models import load_model
# from tqdm import tqdm_notebook
# from sklearn.model_selection import train_test_split


# In[2]:


# pd.set_option('display.max_columns', 500)
# pd.set_option('display.width', 1000)
# pd.set_option('display.max_rows', 500)


# # 데이터 불러오기 / 데이터 정리

# In[3]:


IT_nonfinance = pd.read_excel('./data/dongboo/비재무/IT_비재무_TOTAL_최종.xlsx')
IT_validation_nonfinance = pd.read_excel('./data/dongboo/비재무validation/validation_비재무_it_total_최종.xlsx')


# In[4]:


# 
IT_stock = pd.read_excel('./data/dongboo/IT_주가_total_krx.xlsx')

IT_validation_stock = pd.read_excel('./data/dongboo/IT_validation_주가.xls')


# In[5]:


IT = pd.merge(IT_nonfinance,IT_stock,how = 'left', on = ['회사명','거래소코드','회계년도'])

IT_val = pd.merge(IT_validation_nonfinance,IT_validation_stock,how = 'left', on = ['회사명','거래소코드','회계년도'])


# # 회사별로 분리

# In[7]:


lg = IT[IT['회사명'] == 'LG이노텍(주)']
sdi = IT[IT['회사명'] == '삼성SDI(주)']
ssem = IT[IT['회사명'] == '삼성전기(주)']
hynix = IT[IT['회사명'] == '에스케이하이닉스(주)']

# ------------- validation ------------------

samsungelectric = IT_val[IT_val['회사명'] == '삼성전자(주)']
lgelectric = IT_val[IT_val['회사명'] == '엘지전자(주)']
sds = IT_val[IT_val['회사명'] == '삼성에스디에스(주)']
lgdisplay = IT_val[IT_val['회사명'] == '엘지디스플레이(주)']


# In[8]:


def classify(stock):
    
    # 회계년도 순으로 정렬 및 인덱스 초기화
    stock.sort_values('회계년도', ascending= True, inplace = True)
    
    stock.reset_index(drop = True, inplace = True)
    
    
    # 새로운 변수 추가
    
    stock['3개월후종가'] = stock['종가'][1:].reset_index(drop=True)
    
    stock['분기수익률'] = (stock['3개월후종가'] - stock['종가']) / stock['종가']
    
    stock['수익률상승하락'] = stock['분기수익률']
    
    for i in range(len(stock)):
        if stock['분기수익률'][i] < 0.005:
            stock['수익률상승하락'][i] = 0
        else:
            stock['수익률상승하락'][i] = 1
        
       
    return stock


# In[9]:


classify(lg)
classify(sdi)
classify(ssem)
classify(hynix)

# ------------- validation ------------------

classify(samsungelectric)
classify(lgelectric)
classify(sds)
classify(lgdisplay)


# In[10]:


lg = lg.iloc[:-1,:]
sdi = sdi.iloc[:-1,:]
ssem = ssem.iloc[:-1,:]
hynix = hynix.iloc[:-1,:]

# ------------- validation ------------------

samsungelectric = samsungelectric.iloc[:-1,:]
lgelectric = lgelectric.iloc[:-1,:]
sds = sds.iloc[:-1,:]
lgdisplay = lgdisplay.iloc[:-1,:]


# # 산업별 변수 <3개월 후 종가>와의 상관관계 분석

# In[11]:


# 밑에 함수로 해보기!!!!


# In[12]:


lg_corr = lg.corr(method = 'pearson')
df_lg = pd.DataFrame(lg_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_lg.columns = ['지표','3개월후종가']

sdi_corr = sdi.corr(method = 'pearson')
df_sdi = pd.DataFrame(sdi_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_sdi.columns = ['지표','3개월후종가']

ssem_corr = ssem.corr(method = 'pearson')
df_ssem = pd.DataFrame(ssem_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_ssem.columns = ['지표','3개월후종가']

hynix_corr = hynix.corr(method = 'pearson')
df_hynix = pd.DataFrame(hynix_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_hynix.columns = ['지표','3개월후종가']


# # 산업별 상관관계 데이터프레임

# In[13]:


pd.concat([df_lg.reset_index(drop = True), df_sdi.reset_index(drop = True), df_ssem.reset_index(drop = True), df_hynix.reset_index(drop = True)],axis = 1)


# In[14]:


list_corr = []
order = 20 # 기업별로 상위 30개 상관관계 변수 추출

for i in range(len(df_lg.index[:order])):
    list_corr.append(df_lg.index[:order][i])
    list_corr.append(df_sdi.index[:order][i])
    list_corr.append(df_ssem.index[:order][i])
    list_corr.append(df_hynix.index[:order][i])

variable = pd.Series(list_corr).value_counts()
variable = pd.DataFrame(variable).reset_index()
variable.columns = ['var_index', 'number']


# In[15]:


var = []
for i in range(len(variable)):
    var.append(df_lg[df_lg.index == variable['var_index'][i]]['지표'].values[0])
var = pd.DataFrame(var)
var.columns = ['var']


# In[16]:


corr = pd.concat([variable,var], axis = 1)
corr = corr[['var_index','var','number']]


# In[17]:


corr = corr[corr['var'] != '3개월후종가'].reset_index(drop = True)
corr = corr[corr['var'] != '수익률상승하락'].reset_index(drop = True)
corr = corr[corr['var'] != '분기수익률'].reset_index(drop = True)
corr


# # 위 자료를 보고 변수 선정

# In[18]:


deep_lg = lg[corr['var'][0:18]]
deep_sdi = sdi[corr['var'][0:18]]
deep_ssem = ssem[corr['var'][0:18]]
deep_hynix = hynix[corr['var'][0:18]]

deep_lg['수익률상승하락'] = lg['수익률상승하락']
deep_sdi['수익률상승하락'] = sdi['수익률상승하락']
deep_ssem['수익률상승하락'] = ssem['수익률상승하락']
deep_hynix['수익률상승하락'] = hynix['수익률상승하락']

# ------------ validation --------------

deep_samsungelectric = samsungelectric[corr['var'][0:18]]
deep_lgelectric = lgelectric[corr['var'][0:18]]
deep_sds = sds[corr['var'][0:18]]
deep_lgdisplay = lgdisplay[corr['var'][0:18]]

deep_samsungelectric['수익률상승하락'] = samsungelectric['수익률상승하락']
deep_lgelectric['수익률상승하락'] = lgelectric['수익률상승하락']
deep_sds['수익률상승하락'] = sds['수익률상승하락']
deep_lgdisplay['수익률상승하락'] = lgdisplay['수익률상승하락']


# In[19]:


# 표준화

from sklearn.preprocessing import StandardScaler

scaler_lg = StandardScaler()
scale_lg = pd.DataFrame(scaler_lg.fit_transform(deep_lg.iloc[:,:-1]))

scaler_sdi = StandardScaler()
scale_sdi = pd.DataFrame(scaler_sdi.fit_transform(deep_sdi.iloc[:,:-1]))

scaler_ssem = StandardScaler()
scale_ssem = pd.DataFrame(scaler_ssem.fit_transform(deep_ssem.iloc[:,:-1]))

scaler_hynix = StandardScaler()
scale_hynix = pd.DataFrame(scaler_hynix.fit_transform(deep_hynix.iloc[:,:-1]))

# ------------ validation --------------

scaler_samsungelectric = StandardScaler()
scale_samsungelectric = pd.DataFrame(scaler_lg.fit_transform(deep_samsungelectric.iloc[:,:-1]))

scaler_lgelectric = StandardScaler()
scale_lgelectric = pd.DataFrame(scaler_sdi.fit_transform(deep_lgelectric.iloc[:,:-1]))

scaler_sds = StandardScaler()
scale_sds = pd.DataFrame(scaler_ssem.fit_transform(deep_sds.iloc[:,:-1]))

scaler_lgdisplay = StandardScaler()
scale_lgdisplay = pd.DataFrame(scaler_hynix.fit_transform(deep_lgdisplay.iloc[:,:-1]))


# 원래 값으로 변환
# scaler_lg.inverse_transform(scale_lg)


# In[20]:


# 주가를 합치려고 만듬(마지막 열을 가져오면 됨)
deep_IT = pd.concat([deep_lg,deep_sdi,deep_ssem,deep_hynix], ignore_index=True)

# 표준화한 변수를 합치려고 만듬
scale_IT = pd.concat([scale_lg,scale_sdi,scale_ssem,scale_hynix], ignore_index=True)

# ------------ validation --------------

# 주가를 합치려고 만듬(마지막 열을 가져오면 됨)
deep_IT_validation = pd.concat([deep_samsungelectric,deep_lgelectric,deep_sds,deep_lgdisplay], ignore_index=True)

# 표준화한 변수를 합치려고 만듬
scale_IT_validation = pd.concat([scale_samsungelectric,scale_lgelectric,scale_sds,scale_lgdisplay], ignore_index=True)


# # 샤비어함수 및 히든레이어 5개

# # 및 Validation

# In[21]:


def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
        # 6 was used in the paper.
        init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        # 3 gives us approximately the same limints as above since this repicks
        # values greater than 2 standard deviations from the mean.
        stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_normal_initializer(stddev=stddev)


# In[22]:


# 히든레이어 1개 짜리!!!!

# Review : Learning rate and Evaluation
import tensorflow as tf
import random
import matplotlib.pyplot as plt

tf.set_random_seed(777)  # reproducibility
tf.reset_default_graph()


x_data = scale_IT
y_data = deep_IT.iloc[:,[-1]]


# parameters
placeholder_num = len(x_data.columns)
learning_rate = 0.001


X = tf.placeholder(tf.float32, [None, placeholder_num])
Y = tf.placeholder(tf.float32, [None, 1])
keep_prob = tf.placeholder(tf.float32)


W1 = tf.get_variable("W1", shape=[placeholder_num, 1], initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([1]))

hypothesis = tf.sigmoid(tf.matmul(X, W1) + b1)


# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# train = tf.train.AdamOptimizer(1e-4).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))




# # <span style="color:red"> 가치주로 validation

# In[24]:


# Launch graph

validation_number = 20

graph_cost = []
graph_acc = []

x_data_train, x_data_test, y_data_train, y_data_test = train_test_split(x_data, y_data, test_size=0.2, random_state=0)


with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in tqdm_notebook(range(50001)):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train, keep_prob: 0.8})       
        h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X: x_data_test, Y: y_data_test, keep_prob: 1.0})
        graph_cost.append(cost_val)
        graph_acc.append(a)
        if step % 200 == 0 or step < 10 :
            print("Step : {} \t Cost : {} \t Acc : {}".format(step, cost_val, a*100))    


# In[25]:


print('모델 최고 정확도: ', max(graph_acc) * 100 , '%')


# In[26]:


plt.rc('font', size=15)          # controls default text sizes
plt.rc('axes', titlesize=15)     # fontsize of the axes title
plt.rc('axes', labelsize=15)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=6)    # fontsize of the tick labels
plt.rc('ytick', labelsize=9)    # fontsize of the tick labels
plt.rc('legend', fontsize=15)    # legend fontsize
plt.rc('figure', titlesize=15)  # fontsize of the figure title


# In[27]:


plt.figure(figsize=(14, 8))
plt.ylim([0,1.5])
plt.plot(graph_cost, color = 'blue', label = 'model cost')
plt.plot(graph_acc, color = 'red', label = 'Validation Acc')
plt.title('model graph')
plt.xlabel('Step')
plt.ylabel('Cost & Acc')
plt.legend()
plt.show()


# In[28]:


pd.DataFrame({'graph_cost':graph_cost,'graph_acc': graph_acc})


# In[29]:


# pd.DataFrame(y_data.iloc[-validation_number:,:].values, c.ravel()).reset_index()
pd.DataFrame({'real':list(y_data_test.values.ravel()),'predict': list(c.ravel())})


# # <span style="color:red"> 비 가치주로 validation

# In[30]:


# Launch graph


graph_cost = []
graph_acc = []

# samsungelectric / lgelectric / sds / lgdisplay

# x_val_data = scale_lgdisplay
# y_val_data = deep_lgdisplay.iloc[:,[-1]]

x_val_data = scale_IT_validation
y_val_data = deep_IT_validation.iloc[:,[-1]]

with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in tqdm_notebook(range(50001)):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data, keep_prob: 0.8})
        h, c, a = sess.run([hypothesis, predicted, accuracy],
                               feed_dict={X: x_val_data, Y: y_val_data, keep_prob: 1.0})
        graph_cost.append(cost_val)
        graph_acc.append(a)
        if step % 200 == 0 or step < 10 :
            
            print("Step : {} \t Cost : {} \t Acc : {}".format(step, cost_val, a*100))    

    # Accuracy report
   


# In[31]:


print('모델 최고 정확도: ', max(graph_acc) * 100 , '%')


# In[32]:


plt.figure(figsize=(14, 8))
plt.ylim([0,1.5])
plt.plot(graph_cost, color = 'blue', label = 'model cost')
plt.plot(graph_acc, color = 'red', label = 'Validation Acc')
plt.title('model graph')
plt.xlabel('Step')
plt.ylabel('Cost & Acc')
plt.legend()
plt.show()


# In[33]:


# pd.DataFrame(y_data.iloc[-validation_number:,:].values, c.ravel()).reset_index()
pd.DataFrame({'real':list(y_val_data.iloc[:,:].values.ravel()),'predict': list(c.ravel())})



# 재무+비재무

In [None]:

# coding: utf-8

# In[1]:


# import warnings
# warnings.filterwarnings("ignore")
# import pandas as pd
# import pandas_datareader.data as web
# import time
# import tensorflow
# import matplotlib.pyplot as plt
# import datetime as dt
# import numpy as np
# import logging
# import math
# import os
# from sklearn.preprocessing import MinMaxScaler, scale
# from keras.utils import np_utils
# import keras
# from keras.models import Sequential
# from keras.layers import Dense, LSTM, Activation
# from keras import optimizers
# from sklearn.metrics import mean_squared_error
# from keras.models import load_model
# from tqdm import tqdm_notebook
# from sklearn.model_selection import train_test_split


# In[2]:


# pd.set_option('display.max_columns', 500)
# pd.set_option('display.width', 1000)
# pd.set_option('display.max_rows', 500)


# # 데이터 불러오기 / 데이터 정리

# In[3]:


IT_finance = pd.read_excel('./data/dongboo/IT_TOTAL_재무사항.xlsx')
IT_nonfinance = pd.read_excel('./data/dongboo/비재무/IT_비재무_TOTAL_최종.xlsx')

IT_validation_finance = pd.read_excel('./data/dongboo/IT_validation_재무사항.xlsx')
IT_validation_nonfinance = pd.read_excel('./data/dongboo/비재무validation/validation_비재무_it_total_최종.xlsx')


# In[4]:


# krx에서 가져온 자료
IT_stock = pd.read_excel('./data/dongboo/IT_주가_total_krx.xlsx')

IT_validation_stock = pd.read_excel('./data/dongboo/IT_validation_주가.xls')

# IT_stock = pd.read_excel('./data/dongboo/IT_TOTAL_주가.xlsx')
# Medicine_stock = pd.read_excel('./data/dongboo/제약_TOTAL_주가.xlsx')


# In[5]:


IT_info = pd.merge(IT_finance,IT_nonfinance, how = 'left', on = ['회사명','거래소코드','회계년도'])
IT_val_info = pd.merge(IT_validation_finance,IT_validation_nonfinance, how = 'left', on = ['회사명','거래소코드','회계년도'])


# In[6]:


IT = pd.merge(IT_info, IT_stock, how = 'left', on = ['회사명','거래소코드','회계년도'])
IT_val = pd.merge(IT_val_info,IT_validation_stock,how = 'left', on = ['회사명','거래소코드','회계년도'])


# In[7]:


IT_fi = pd.merge(IT_finance, IT_stock, how = 'left', on = ['회사명','거래소코드','회계년도'])
IT_no_fi = pd.merge(IT_nonfinance, IT_stock, how = 'left', on = ['회사명','거래소코드','회계년도'])


# In[8]:


lg = IT[IT['회사명'] == 'LG이노텍(주)']
sdi = IT[IT['회사명'] == '삼성SDI(주)']
ssem = IT[IT['회사명'] == '삼성전기(주)']
hynix = IT[IT['회사명'] == '에스케이하이닉스(주)']

# ------------- validation ------------------

samsungelectric = IT_val[IT_val['회사명'] == '삼성전자(주)']
lgelectric = IT_val[IT_val['회사명'] == '엘지전자(주)']
sds = IT_val[IT_val['회사명'] == '삼성에스디에스(주)']
lgdisplay = IT_val[IT_val['회사명'] == '엘지디스플레이(주)']


# In[9]:


lg_f = IT_fi[IT_fi['회사명'] == 'LG이노텍(주)']
sdi_f = IT_fi[IT_fi['회사명'] == '삼성SDI(주)']
ssem_f = IT_fi[IT_fi['회사명'] == '삼성전기(주)']
hynix_f = IT_fi[IT_fi['회사명'] == '에스케이하이닉스(주)']


# In[10]:


lg_nf = IT_no_fi[IT_no_fi['회사명'] == 'LG이노텍(주)']
sdi_nf = IT_no_fi[IT_no_fi['회사명'] == '삼성SDI(주)']
ssem_nf = IT_no_fi[IT_no_fi['회사명'] == '삼성전기(주)']
hynix_nf = IT_no_fi[IT_no_fi['회사명'] == '에스케이하이닉스(주)']


# In[11]:


def classify(stock):
    
    # 회계년도 순으로 정렬 및 인덱스 초기화
    stock.sort_values('회계년도', ascending= True, inplace = True)
    
    stock.reset_index(drop = True, inplace = True)
    
    
    # 새로운 변수 추가
    
    stock['3개월후종가'] = stock['종가'][1:].reset_index(drop=True)
    
    stock['분기수익률'] = (stock['3개월후종가'] - stock['종가']) / stock['종가']
    
    stock['수익률상승하락'] = stock['분기수익률']
    
    for i in range(len(stock)):
        if stock['분기수익률'][i] < 0.005:
            stock['수익률상승하락'][i] = 0
        else:
            stock['수익률상승하락'][i] = 1
        
       
    return stock


# In[12]:


classify(lg)
classify(sdi)
classify(ssem)
classify(hynix)

classify(lg_f)
classify(sdi_f)
classify(ssem_f)
classify(hynix_f)

classify(lg_nf)
classify(sdi_nf)
classify(ssem_nf)
classify(hynix_nf)

# ------------- validation ------------------

classify(samsungelectric)
classify(lgelectric)
classify(sds)
classify(lgdisplay)


# In[13]:


lg = lg.iloc[:-1,:]
sdi = sdi.iloc[:-1,:]
ssem = ssem.iloc[:-1,:]
hynix = hynix.iloc[:-1,:]

lg_f = lg_f.iloc[:-1,:]
sdi_f = sdi_f.iloc[:-1,:]
ssem_f = ssem_f.iloc[:-1,:]
hynix_f = hynix_f.iloc[:-1,:]

lg_nf = lg_nf.iloc[:-1,:]
sdi_nf = sdi_nf.iloc[:-1,:]
ssem_nf = ssem_nf.iloc[:-1,:]
hynix_nf = hynix_nf.iloc[:-1,:]

# ------------- validation ------------------

samsungelectric = samsungelectric.iloc[:-1,:]
lgelectric = lgelectric.iloc[:-1,:]
sds = sds.iloc[:-1,:]
lgdisplay = lgdisplay.iloc[:-1,:]


# In[14]:


lg_f_corr = lg_f.corr(method = 'pearson')
df_lg_f = pd.DataFrame(lg_f_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_lg_f.columns = ['지표','3개월후종가']

sdi_f_corr = sdi_f.corr(method = 'pearson')
df_sdi_f = pd.DataFrame(sdi_f_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_sdi_f.columns = ['지표','3개월후종가']

ssem_f_corr = ssem_f.corr(method = 'pearson')
df_ssem_f = pd.DataFrame(ssem_f_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_ssem_f.columns = ['지표','3개월후종가']

hynix_f_corr = hynix_f.corr(method = 'pearson')
df_hynix_f = pd.DataFrame(hynix_f_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_hynix_f.columns = ['지표','3개월후종가']


# In[15]:


lg_nf_corr = lg_nf.corr(method = 'pearson')
df_lg_nf = pd.DataFrame(lg_nf_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_lg_nf.columns = ['지표','3개월후종가']

sdi_nf_corr = sdi_nf.corr(method = 'pearson')
df_sdi_nf = pd.DataFrame(sdi_nf_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_sdi_nf.columns = ['지표','3개월후종가']

ssem_nf_corr = ssem_nf.corr(method = 'pearson')
df_ssem_nf = pd.DataFrame(ssem_nf_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_ssem_nf.columns = ['지표','3개월후종가']

hynix_nf_corr = hynix_nf.corr(method = 'pearson')
df_hynix_nf = pd.DataFrame(hynix_nf_corr['3개월후종가']).reset_index().sort_values('3개월후종가', ascending = False)
df_hynix_nf.columns = ['지표','3개월후종가']


# In[16]:


list_corr = []
order = 30 # 기업별로 상위 30개 상관관계 변수 추출

for i in range(len(df_lg_f.index[:order])):
    list_corr.append(df_lg_f.index[:order][i])
    list_corr.append(df_sdi_f.index[:order][i])
    list_corr.append(df_ssem_f.index[:order][i])
    list_corr.append(df_hynix_f.index[:order][i])

variable = pd.Series(list_corr).value_counts()
variable = pd.DataFrame(variable).reset_index()
variable.columns = ['var_index', 'number']

var = []

for i in range(len(variable)):
    var.append(df_lg_f[df_lg_f.index == variable['var_index'][i]]['지표'].values[0])
var = pd.DataFrame(var)
var.columns = ['var']

corr = pd.concat([variable,var], axis = 1)
corr = corr[['var_index','var','number']]

corr = corr[corr['var'] != '3개월후종가'].reset_index(drop = True)
corr = corr[corr['var'] != '수익률상승하락'].reset_index(drop = True)
corr = corr[corr['var'] != '분기수익률'].reset_index(drop = True)
corr


# In[17]:


list_corr_nf = []
order = 15 # 기업별로 상위 30개 상관관계 변수 추출

for i in range(len(df_lg_nf.index[:order])):
    list_corr_nf.append(df_lg_nf.index[:order][i])
    list_corr_nf.append(df_sdi_nf.index[:order][i])
    list_corr_nf.append(df_ssem_nf.index[:order][i])
    list_corr_nf.append(df_hynix_nf.index[:order][i])

variable_nf = pd.Series(list_corr_nf).value_counts()
variable_nf = pd.DataFrame(variable_nf).reset_index()
variable_nf.columns = ['var_index', 'number']

var_nf = []


for i in range(len(variable_nf)):
    var_nf.append(df_lg_nf[df_lg_nf.index == variable_nf['var_index'][i]]['지표'].values[0])
var_nf = pd.DataFrame(var_nf)
var_nf.columns = ['var']

corr_nf = pd.concat([variable_nf,var_nf], axis = 1)
corr_nf = corr_nf[['var_index','var','number']]

corr_nf = corr_nf[corr_nf['var'] != '3개월후종가'].reset_index(drop = True)
corr_nf = corr_nf[corr_nf['var'] != '수익률상승하락'].reset_index(drop = True)
corr_nf = corr_nf[corr_nf['var'] != '분기수익률'].reset_index(drop = True)


corr_nf = corr_nf[corr_nf['var'] != '거래대금'].reset_index(drop = True)
corr_nf = corr_nf[corr_nf['var'] != '종가'].reset_index(drop = True)
corr_nf = corr_nf[corr_nf['var'] != '최저가'].reset_index(drop = True)
corr_nf = corr_nf[corr_nf['var'] != '최고가'].reset_index(drop = True)
corr_nf = corr_nf[corr_nf['var'] != '거래량'].reset_index(drop = True)
corr_nf


# In[18]:


corr = pd.concat([corr[:10],corr_nf[:10]],axis = 0).reset_index(drop = True)


# In[19]:


deep_lg = lg[corr['var'][:20]]
deep_sdi = sdi[corr['var'][:20]]
deep_ssem = ssem[corr['var'][:20]]
deep_hynix = hynix[corr['var'][:20]]

deep_lg['수익률상승하락'] = lg['수익률상승하락']
deep_sdi['수익률상승하락'] = sdi['수익률상승하락']
deep_ssem['수익률상승하락'] = ssem['수익률상승하락']
deep_hynix['수익률상승하락'] = hynix['수익률상승하락']

deep_lg = deep_lg.dropna(axis = 0)
deep_ssem = deep_ssem.dropna(axis = 0)
deep_sdi = deep_sdi.dropna(axis = 0)
deep_hynix = deep_hynix.dropna(axis = 0)
# ------------ validation --------------

deep_samsungelectric = samsungelectric[corr['var'][0:20]]
deep_lgelectric = lgelectric[corr['var'][0:20]]
deep_sds = sds[corr['var'][0:20]]
deep_lgdisplay = lgdisplay[corr['var'][0:20]]

deep_samsungelectric['수익률상승하락'] = samsungelectric['수익률상승하락']
deep_lgelectric['수익률상승하락'] = lgelectric['수익률상승하락']
deep_sds['수익률상승하락'] = sds['수익률상승하락']
deep_lgdisplay['수익률상승하락'] = lgdisplay['수익률상승하락']

deep_samsungelectric = deep_samsungelectric.dropna(axis = 0)
deep_lgelectric = deep_lgelectric.dropna(axis = 0)
deep_sds = deep_sds.dropna(axis = 0)
deep_lgdisplay = deep_lgdisplay.dropna(axis = 0)


# In[20]:


# 표준화

from sklearn.preprocessing import StandardScaler

scaler_lg = StandardScaler()
scale_lg = pd.DataFrame(scaler_lg.fit_transform(deep_lg.iloc[:,:-1]))

scaler_sdi = StandardScaler()
scale_sdi = pd.DataFrame(scaler_sdi.fit_transform(deep_sdi.iloc[:,:-1]))

scaler_ssem = StandardScaler()
scale_ssem = pd.DataFrame(scaler_ssem.fit_transform(deep_ssem.iloc[:,:-1]))

scaler_hynix = StandardScaler()
scale_hynix = pd.DataFrame(scaler_hynix.fit_transform(deep_hynix.iloc[:,:-1]))

# ------------ validation --------------

scaler_samsungelectric = StandardScaler()
scale_samsungelectric = pd.DataFrame(scaler_lg.fit_transform(deep_samsungelectric.iloc[:,:-1]))

scaler_lgelectric = StandardScaler()
scale_lgelectric = pd.DataFrame(scaler_sdi.fit_transform(deep_lgelectric.iloc[:,:-1]))

scaler_sds = StandardScaler()
scale_sds = pd.DataFrame(scaler_ssem.fit_transform(deep_sds.iloc[:,:-1]))

scaler_lgdisplay = StandardScaler()
scale_lgdisplay = pd.DataFrame(scaler_hynix.fit_transform(deep_lgdisplay.iloc[:,:-1]))

# 원래 값으로 변환
# scaler_lg.inverse_transform(scale_lg)


# In[21]:


# 주가를 합치려고 만듬(마지막 열을 가져오면 됨)
deep_IT = pd.concat([deep_lg,deep_sdi,deep_ssem,deep_hynix], ignore_index=True)

# 표준화한 변수를 합치려고 만듬
scale_IT = pd.concat([scale_lg,scale_sdi,scale_ssem,scale_hynix], ignore_index=True)

# ------------ validation --------------

# 주가를 합치려고 만듬(마지막 열을 가져오면 됨)
deep_IT_validation = pd.concat([deep_samsungelectric,deep_lgelectric,deep_sds,deep_lgdisplay], ignore_index=True)

# 표준화한 변수를 합치려고 만듬
scale_IT_validation = pd.concat([scale_samsungelectric,scale_lgelectric,scale_sds,scale_lgdisplay], ignore_index=True)


# In[22]:


# import matplotlib.pyplot as plt
# from matplotlib import font_manager, rc
# import seaborn as sns
# import matplotlib
# font_location = "c:/Windows/fonts/malgun.ttf"
# font_name = font_manager.FontProperties(fname=font_location).get_name()
# matplotlib.rc('font', family=font_name)


# In[23]:


# plt.figure(figsize=(20,50))

# sns.heatmap(data = scale_IT.corr(), annot = True)
# # plt.savefig('tmp.png')


# # 샤비어함수 및 히든레이어 5개

# # 및 Validation

# In[24]:


def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
        # 6 was used in the paper.
        init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        # 3 gives us approximately the same limints as above since this repicks
        # values greater than 2 standard deviations from the mean.
        stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_normal_initializer(stddev=stddev)


# In[25]:


# 히든레이어 1개 짜리!!!!

# Review : Learning rate and Evaluation
import tensorflow as tf
import random
import matplotlib.pyplot as plt

tf.set_random_seed(777)  # reproducibility
tf.reset_default_graph()

deep_IT = pd.concat([deep_hynix,deep_ssem,deep_sdi,deep_lg], ignore_index=True)
scale_IT = pd.concat([scale_hynix,scale_ssem,scale_sdi,scale_lg], ignore_index=True)

x_data = scale_IT
y_data = deep_IT.iloc[:,[-1]]


graph_cost = []
graph_acc = []

# parameters
placeholder_num = len(x_data.columns)
learning_rate = 0.001


X = tf.placeholder(tf.float32, [None, placeholder_num])
Y = tf.placeholder(tf.float32, [None, 1])
keep_prob = tf.placeholder(tf.float32)


W1 = tf.get_variable("W1", shape=[placeholder_num, 1], initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([1]))

hypothesis = tf.sigmoid(tf.matmul(X, W1) + b1)


# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
                       tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# train = tf.train.AdamOptimizer(1e-4).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))




# # <span style="color:red"> 가치주로 validation

# In[27]:


# Launch graph

# validation_number = 3




x_data_train, x_data_test, y_data_train, y_data_test = train_test_split(x_data, y_data, test_size=0.2, random_state=0)

with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
    
    for step in tqdm_notebook(range(50001)):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data_train, Y: y_data_train, keep_prob: 1.0})       
        if step % 200 == 0 or step < 10 :
            h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={X: x_data_test, Y: y_data_test, keep_prob: 1.0})
            graph_cost.append(cost_val)
            graph_acc.append(a)
            print("Step : {} \t Cost : {} \t Acc : {}".format(step, cost_val, a*100))    

#     for step in tqdm_notebook(range(9001)):
#         cost_val, _ = sess.run([cost, train], feed_dict={X: x_data.iloc[:-validation_number,:], Y: y_data.iloc[:-validation_number,:], keep_prob: 1.0})       
#         if step % 200 == 0 or step < 10 :
#             h, c, a = sess.run([hypothesis, predicted, accuracy],
#                        feed_dict={X: x_data.iloc[-validation_number:,:], Y: y_data.iloc[-validation_number:,:], keep_prob: 1.0})
#             graph_cost.append(cost_val)
#             graph_acc.append(a)
#             print("Step : {} \t Cost : {} \t Acc : {}".format(step, cost_val, a*100))    

    # Accuracy report
#     h, c, a = sess.run([hypothesis, predicted, accuracy],
#                        feed_dict={X: x_data.iloc[-validation_number:,:], Y: y_data.iloc[-validation_number:,:], keep_prob: 1.0})


# In[28]:


print('모델 최고 정확도: ', max(graph_acc) * 100 , '%')


# In[31]:


pd.DataFrame({'real':list(y_data_test.values.ravel()),'predict': list(c.ravel())})



# In[32]:


plt.rc('font', size=15)          # controls default text sizes
plt.rc('axes', titlesize=15)     # fontsize of the axes title
plt.rc('axes', labelsize=15)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=6)    # fontsize of the tick labels
plt.rc('ytick', labelsize=9)    # fontsize of the tick labels
plt.rc('legend', fontsize=15)    # legend fontsize
plt.rc('figure', titlesize=15)  # fontsize of the figure title


# In[33]:


plt.figure(figsize=(14, 8))
plt.ylim([0,1.5])
plt.plot(graph_cost, color = 'blue', label = 'model cost')
plt.plot(graph_acc, color = 'red', label = 'Validation Acc')
plt.title('model graph')
plt.xlabel('Step')
plt.ylabel('Cost & Acc')
plt.legend()
plt.show()


# # <span style="color:red"> 비 가치주로 validation

# In[34]:


# x_val_data = scale_sds
# y_val_data = deep_sds.iloc[:,[-1]]
x_val_data = scale_IT_validation
y_val_data = deep_IT_validation.iloc[:,[-1]]

# samsungelectric
# lgelectric
# sds
# lgdisplay

graph_cost = []
graph_acc = []

with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in tqdm_notebook(range(50001)):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data, keep_prob: 1.0})       
        if step % 200 == 0 or step < 10 :
            h, c, a = sess.run([hypothesis, predicted, accuracy],
                               feed_dict={X: x_val_data.iloc[:,:], Y: y_val_data.iloc[:,:], keep_prob: 1.0})
            graph_cost.append(cost_val)
            graph_acc.append(a)
            print("Step : {} \t Cost : {} \t Acc : {}".format(step, cost_val, a*100))    
  


# In[35]:


print('모델 최고 정확도: ', max(graph_acc) * 100 , '%')


# In[36]:


plt.figure(figsize=(14, 8))
plt.ylim([0,1.5])
plt.plot(graph_cost, color = 'blue', label = 'model cost')
plt.plot(graph_acc, color = 'red', label = 'Validation Acc')
plt.title('model graph')
plt.xlabel('Step')
plt.ylabel('Cost & Acc')
plt.legend()
plt.show()


# In[37]:


pd.DataFrame({'real':list(y_val_data.values.ravel()),'predict': list(c.ravel())})

