In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import os.path
import jsm

from pandas.core import common as com

def set_span(start=None, end=None, periods=None, freq='D'):
    """ 引数のstart, end, periodsに対して
    startとendの時間を返す。

    * start, end, periods合わせて2つの引数が指定されていなければエラー
    * start, endが指定されていたらそのまま返す
    * start, periodsが指定されていたら、endを計算する
    * end, periodsが指定されていたら、startを計算する
    """
    if com._count_not_none(start, end, periods) != 2:  # Like a pd.date_range Error
        raise ValueError('Must specify two of start, end, or periods')
    start = start if start else (pd.Period(end, freq) - periods).start_time
    end = end if end else (pd.Period(start, freq) + periods).start_time
    return start, end


def get_jstock(code, freq='D', start=None, end=None, periods=None):
    """get Japanese stock data using jsm
    Usage:
        `get_jstock(6502)`
        To get TOSHIBA daily from today back to 30days except holiday.

        `get_jstock(6502, 'W', start=pd.Timestamp('2016'), end=pd.Timestamp('2017'))`
        To get TOSHIBA weekly from 2016-01-01 to 2017-01-01.

        `get_jstock(6502, end=pd.Timestamp('20170201'), periods=50)`
        To get TOSHIBA daily from 2017-02-01 back to 50days except holiday.

        `get_jstock(6502, 'M', start='first', end='last')`
        To get TOSHIBA monthly from 2000-01-01 (the date of start recording) to today.
    """
    # Default args
    if com._count_not_none(start, end, periods) == 0:  # All of args is None
        end, periods = 'last', 30

    # Switch frequency Dayly, Weekly or Monthly
    freq_dict = {'D': jsm.DAILY, 'W': jsm.WEEKLY, 'M': jsm.MONTHLY}

    # 'first' means the start of recording date
    if start == 'first':
        data = jsm.Quotes().get_historical_prices(
            code, range_type=freq_dict[freq], all=True)
        start = [i.date for i in data][-1]
    else:
        data = None  # Temporaly defined

    # 'last' means last weekday (or today)
    if end == 'last':
        end = pd.datetime.today()

    # Return "start" and "end"
    start, end = (x.date() if hasattr(x, 'date')
                  else x for x in set_span(start, end, periods, freq))
    print('Get data from {} to {}'.format(start, end))

    data = jsm.Quotes().get_historical_prices(
        code, range_type=freq_dict[freq], start_date=start, end_date=end) if not data else data
    df = _convert_dataframe(data)
    return df[start:end]


def _convert_dataframe(target):
    """Convert <jsm.pricebase.PriceData> to <pandas.DataFrame>"""
    date = [_.date for _ in target]
    open = [_.open for _ in target]
    high = [_.high for _ in target]
    low = [_.low for _ in target]
    close = [_.close for _ in target]
    adj_close = [_._adj_close for _ in target]
    volume = [_.volume for _ in target]
    data = {'Open': open,
            'High': high,
            'Low': low,
            'Close': close,
            'Adj Close': adj_close,
            'Volume': volume}
    columns = *data.keys(),
    df = pd.DataFrame(data, index=date, columns=columns).sort_index()
    df.index.name = 'Date'
    return df

In [5]:
#学習用
df = pd.read_csv('importETF.csv',encoding='utf8')
errorStock_array = []

#for code in df['code']:
for code in tempStockArray:
    print("start:" + str(code))
    
    #csvファイルの存在チェック
    #path = 'StockData/' + str(code) + '過去データ.csv'
    
    try:
        df_temp = get_jstock(code,start=pd.Timestamp('20000101'),end=pd.Timestamp('20191231'))
        df_temp.to_csv('importETF/' + str(code) + '.csv')
        print("end:" + str(code))
    except:
        errorStock_array.append(code)
        print("error")
    

start:6976
Get data from 2000-01-01 to 2019-12-31




 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))


end:6976
start:8028
Get data from 2000-01-01 to 2019-12-31
end:8028
start:8331
Get data from 2000-01-01 to 2019-12-31
end:8331
start:8354
Get data from 2000-01-01 to 2019-12-31
end:8354
start:8355
Get data from 2000-01-01 to 2019-12-31
end:8355
start:8411
Get data from 2000-01-01 to 2019-12-31
end:8411
start:8601
Get data from 2000-01-01 to 2019-12-31
end:8601
start:8604
Get data from 2000-01-01 to 2019-12-31
end:8604
start:8628
Get data from 2000-01-01 to 2019-12-31
end:8628
start:8630
Get data from 2000-01-01 to 2019-12-31
end:8630
start:8725
Get data from 2000-01-01 to 2019-12-31
end:8725
start:8729
Get data from 2000-01-01 to 2019-12-31
end:8729
start:8750
Get data from 2000-01-01 to 2019-12-31
end:8750
start:8766
Get data from 2000-01-01 to 2019-12-31
end:8766
start:8795
Get data from 2000-01-01 to 2019-12-31
end:8795
start:8801
Get data from 2000-01-01 to 2019-12-31
end:8801
start:8802
Get data from 2000-01-01 to 2019-12-31
end:8802
start:8804
Get data from 2000-01-01 to 2019-12-

In [81]:
#予想用

from datetime import datetime, date, timedelta
import numpy as np
import pandas as pd
import pickle
import os.path
from sklearn import preprocessing

Xcolumns = pickle.load(open("max_Xcolumns.sav","rb"))
columns_array = ['1321']

for index in range(len(Xcolumns)):
    columns_array.append(Xcolumns[index][:4])
columns_unique_array = list(set(columns_array))

errorStock_array = []
today = datetime.today()
one = today - timedelta(days=1)
one_str = datetime.strftime(one,'%Y-%m-%d')
two = today - timedelta(days=2)
two_str = datetime.strftime(two,'%Y-%m-%d')

for code in columns_unique_array :
    print("start:" + str(code))
    
    try:
        df_temp = get_jstock(code,start=pd.Timestamp(two_str),end=pd.Timestamp(one_str))
        df_temp.to_csv('PredictImportETF/' + str(code) + '.csv')
        print("end:" + str(code))
    except:
        df_temp = pd.DataFrame(np.zeros([2,6]), columns=['Open','High','Low','Close','Adj Close','Volume'],index=[two_str,one_str])
        df_temp.index.name = 'Date'

        df_temp.to_csv('PredictImportETF/' + str(code) + '.csv')
        errorStock_array.append(code)
        print("error")


start:1623
Get data from 2019-04-09 to 2019-04-10
end:1623
start:1321
Get data from 2019-04-09 to 2019-04-10
end:1321
start:1613
Get data from 2019-04-09 to 2019-04-10
end:1613
start:1320
Get data from 2019-04-09 to 2019-04-10
end:1320
start:1672
Get data from 2019-04-09 to 2019-04-10
end:1672
start:1679
Get data from 2019-04-09 to 2019-04-10
end:1679
start:1546
Get data from 2019-04-09 to 2019-04-10
end:1546
start:1682
Get data from 2019-04-09 to 2019-04-10
end:1682
start:1540
Get data from 2019-04-09 to 2019-04-10
end:1540
start:1551
Get data from 2019-04-09 to 2019-04-10
end:1551
start:1619
Get data from 2019-04-09 to 2019-04-10
end:1619
start:1309
Get data from 2019-04-09 to 2019-04-10
end:1309
start:1343
Get data from 2019-04-09 to 2019-04-10
end:1343
start:1305
Get data from 2019-04-09 to 2019-04-10
end:1305
start:1348
Get data from 2019-04-09 to 2019-04-10
end:1348
start:1622
Get data from 2019-04-09 to 2019-04-10
end:1622
start:1542
Get data from 2019-04-09 to 2019-04-10
end:15

In [82]:
for code in columns_unique_array:
    print(code)
    
    path = "PredictImportETF/" + str(code) + ".csv"
    
    if os.path.exists(path):
        temp_df = pd.read_csv(path,engine = "python" ,encoding="utf8")
    else:
        errorStock_array.append(code)
        continue
        
    new_df = pd.DataFrame()
    new_df["Date"] = temp_df["Date"]
    
    #始値
    new_df["Open"] = 0
    
    for dateIndex in temp_df.index:
        
        #当日の始値
        openValue = temp_df.at[dateIndex,"Open"]
        
        new_df.at[dateIndex,"High"] = temp_df.at[dateIndex,"High"] - openValue
        new_df.at[dateIndex,"Low"] = temp_df.at[dateIndex,"Low"] - openValue
        new_df.at[dateIndex,"Close"] = temp_df.at[dateIndex,"Close"] - openValue
        
        if dateIndex != 0:
            new_df.at[dateIndex,"Volume"] = temp_df.at[dateIndex,"Volume"] - temp_df.at[dateIndex-1,"Volume"]
            new_df.at[dateIndex,"Open"] = openValue - temp_df.at[dateIndex-1,"Close"]
            
        else:
            new_df.at[0,"Volume"] = 0
    
    #csvファイル書き出し
    new_df.to_csv("PredictStockDataDif/" + str(code) + "_dif.csv")

1623
1321
1613
1320
1672
1679
1546
1682
1540
1551
1619
1309
1343
1305
1348
1622
1542
1681
1631
1678
1311
1329
1552
1324
1629
1690
1349
1323
1697
1698
1545
1695
1306
1326
1612
1624
1683
1310
1677
1680
1345
1550
1627
1670
1633
1671
1617
1547
1632
1626
1325
1620
1327
1543
1696
1313
1344
1615
1319
1625
1541
1699
1621
1346
1628
1630
1308
1689
1322
1618
1330


In [87]:
ETF_df = pd.DataFrame()

for code in columns_unique_array:
    
    code = str(code)
    
    temp = pd.DataFrame()
    temp = pd.read_csv("PredictStockDataDif/" + code + "_dif.csv",encoding="utf8")
    
    if code == columns_unique_array[0]:
        #初回のみETF_dfにindexを設定
        ETF_df["Date"] = temp["Date"]
        ETF_df = ETF_df.set_index("Date")
        
    if code =="1321":
        for dateIndex in range(0,len(temp.index)-1):
            tempDate = temp.at[dateIndex,"Date"]
                
            tempClose = temp.at[dateIndex+1,"Close"]
            if tempClose >= 0:
                ETF_df.at[tempDate,"nextDay_HighLow"] = 1
            else:
                ETF_df.at[tempDate,"nextDay_HighLow"] = -1
                
    for dateIndex in temp.index:
        tempDate = temp.at[dateIndex,"Date"]
            
        ETF_df.at[tempDate,code + "Open"] = temp.at[dateIndex,"Open"]
        ETF_df.at[tempDate,code + "High"] = temp.at[dateIndex,"High"]
        ETF_df.at[tempDate,code + "Low"] = temp.at[dateIndex,"Low"]
        ETF_df.at[tempDate,code + "Close"] = temp.at[dateIndex,"Close"]
        ETF_df.at[tempDate,code + "Volume"] = temp.at[dateIndex,"Volume"]/10000
        

In [88]:
errorStock_array

['1697', '1683']

In [89]:
ETF_df

Unnamed: 0_level_0,1623Open,1623High,1623Low,1623Close,1623Volume,nextDay_HighLow,1321Open,1321High,1321Low,1321Close,...,1618Open,1618High,1618Low,1618Close,1618Volume,1330Open,1330High,1330Low,1330Close,1330Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-04-09,0.0,0.0,-170.0,-170.0,0.0,1.0,0.0,50.0,-70.0,10.0,...,0.0,50.0,-60.0,50.0,0.0,0.0,60.0,-60.0,30.0,0.0
2019-04-10,70.0,0.0,-30.0,-30.0,-0.0007,,-200.0,110.0,-30.0,110.0,...,-250.0,0.0,-90.0,-20.0,-0.0092,-220.0,130.0,-20.0,130.0,2.253


In [90]:
ETF_df = ETF_df[Xcolumns]

In [92]:
ETF_df = ETF_df.fillna(0)

In [93]:
clf = pickle.load(open("max_clf.sav","rb"))
pred = clf.predict(ETF_df)

In [94]:
pred

array([-1., -1.])