In [4]:
import yfinance as yf
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
import seaborn as sns
import pandas as pd
import sys
import time
import stockpy as st
from datetime import datetime
from tqdm import tqdm

In [5]:
def ticker(ticker):
    try:
        stock = yf.Ticker(ticker)
        return stock
    except Exception as e:
        print("Ticker not found")
        print(e)
        raise Exception

def get_data_period(stock, period):
    try:
        data = stock.history(period=period)
        return data
    except Exception as e:
        print("Error getting data")
        print(e)
        raise Exception
def process_data(data, kind, name):
    try:
        # 最初に要素を追加
        data.insert(0, name)
        # リストをNumPy配列に変換
        data = np.array([data])
        return data
    except Exception as e:
        print("Error processing data")
        print(e)
        raise Exception

def process_data2(data, kind):
    try:
        data = data[kind].values.tolist()
        max_value = max(data)
        result = [x / max_value for x in data]
    except Exception as e:
        print("Error processing data2")
        print(e)
        raise Exception
    return result

def get_header(data):
    date_list_str = [d.strftime('%Y-%m-%d') for d in data.index]
    # date_list_str.pop()
    # date_list_str.insert(0, 'target')
    date_list_str.insert(0, 'name')
    return date_list_str

def validate_data(data, kind, num):
    data = data[kind].values.tolist()
    if (len(data) != num):
        print(f"Not enough data {len(data)}")
        raise Exception

In [6]:
name_sheet_df = pd.read_csv('../database/america_stock_name_sheet_v2.csv', header=None, names=['name'])

In [10]:
df = pd.DataFrame()
now = datetime.now()
date_time = now.strftime("%Y%m%d")
for name in name_sheet_df['name']:
    print(name)
    try:
        ticker_info = ticker(name)
        hist = get_data_period(ticker_info, "6mo")
        header_list = get_header(hist)
        validate_data(hist, "Close", num=503)
        data = process_data2(hist,"Close")
        data = process_data(data, "Close", name)
        df_2 = pd.DataFrame(data, columns=header_list)
        df = pd.concat([df, df_2], axis=0)
        df.to_csv(f'./dataset_{date_time}.csv', index=False)
        time.sleep(3)
    except Exception as e:
        print(e)
        continue

NameError: name 'name_sheet_df' is not defined

In [19]:
ticker_info = ticker("XERS")
hist = get_data_period(ticker_info, "1y")

In [20]:
hist

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-10-24 00:00:00-04:00,1.41,1.480,1.3200,1.47,981700,0.0,0.0
2022-10-25 00:00:00-04:00,1.47,1.570,1.4600,1.56,637500,0.0,0.0
2022-10-26 00:00:00-04:00,1.60,1.640,1.5700,1.63,712200,0.0,0.0
2022-10-27 00:00:00-04:00,1.65,1.650,1.5700,1.59,478700,0.0,0.0
2022-10-28 00:00:00-04:00,1.58,1.700,1.5800,1.65,662000,0.0,0.0
...,...,...,...,...,...,...,...
2023-10-17 00:00:00-04:00,1.89,1.950,1.8400,1.86,827400,0.0,0.0
2023-10-18 00:00:00-04:00,1.86,1.870,1.7700,1.77,887100,0.0,0.0
2023-10-19 00:00:00-04:00,1.78,1.790,1.6800,1.73,798800,0.0,0.0
2023-10-20 00:00:00-04:00,1.74,1.800,1.7000,1.74,843100,0.0,0.0


In [103]:

data1 = yf.download(list(name_sheet_df['name'][:2]), period = "3d")['Adj Close']
data1

[*********************100%%**********************]  2 of 2 completed


Unnamed: 0_level_0,AACI,AADI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-10-20,10.69,4.09
2023-10-23,10.69,3.94
2023-10-24,,4.29


In [89]:
df = pd.DataFrame()

In [126]:
df1['AACI']

Date
2023-10-18    10.69
2023-10-19    10.69
2023-10-20    10.69
2023-10-23    10.69
2023-10-24      NaN
Freq: B, Name: AACI, dtype: float64

In [19]:
df = pd.DataFrame()
date_time = datetime.now().strftime("%Y%m%d")
name_sheet_list = list(name_sheet_df['name'])

chunk_size = 100  # リストを分割するサイズ
for i in tqdm(range(0, len(name_sheet_list), chunk_size)):
    chunk = name_sheet_list[i:i+chunk_size]  # リストを分割
    df2 = yf.download(chunk, period="3mo")['Adj Close']
    # 2つのDataFrameを縦方向に結合する
    df = pd.concat([df, df2], axis=1)
    time.sleep(3)
df_transposed = df.transpose()
df_transposed.columns = pd.to_datetime(df_transposed.columns, format='%Y-%m-%d')
df_transposed = df_transposed.rename_axis("name")
df_transposed.dropna() 
df_transposed.to_csv(f'./dataset_{date_time}.csv')

  0%|                                                                                           | 0/30 [00:00<?, ?it/s]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['ACGN']: Exception('%ticker%: No data found, symbol may be delisted')





  3%|██▊                                                                                | 1/30 [00:05<02:45,  5.72s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['AMRS']: Exception('%ticker%: No data found, symbol may be delisted')





  7%|█████▌                                                                             | 2/30 [00:11<02:39,  5.70s/it]

[*********************100%%**********************]  100 of 100 completed


2 Failed downloads:
['ARCK', 'ARBG']: Exception('%ticker%: No data found, symbol may be delisted')





 10%|████████▎                                                                          | 3/30 [00:17<02:34,  5.74s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['BMTC']: Exception('%ticker%: No data found, symbol may be delisted')





 13%|███████████                                                                        | 4/30 [00:22<02:27,  5.68s/it]

[*********************100%%**********************]  100 of 100 completed


2 Failed downloads:
['BRPM', 'BWC']: Exception('%ticker%: No data found, symbol may be delisted')





 17%|█████████████▊                                                                     | 5/30 [00:28<02:20,  5.62s/it]

[*********************100%%**********************]  100 of 100 completed


2 Failed downloads:
['CEGVV', 'CLAQ']: Exception('%ticker%: No data found, symbol may be delisted')





 20%|████████████████▌                                                                  | 6/30 [00:34<02:15,  5.66s/it]

[*********************100%%**********************]  100 of 100 completed


 23%|███████████████████▎                                                               | 7/30 [00:39<02:08,  5.59s/it]

[*********************100%%**********************]  100 of 100 completed


2 Failed downloads:
['DNAB', 'DCRD']: Exception('%ticker%: No data found, symbol may be delisted')





 27%|██████████████████████▏                                                            | 8/30 [00:45<02:02,  5.58s/it]

[*********************100%%**********************]  100 of 100 completed


 30%|████████████████████████▉                                                          | 9/30 [00:50<01:57,  5.60s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['FCCY']: Exception('%ticker%: No data found, symbol may be delisted')





 33%|███████████████████████████▎                                                      | 10/30 [00:56<01:52,  5.62s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['FLDM']: Exception('%ticker%: No data found, symbol may be delisted')





 37%|██████████████████████████████                                                    | 11/30 [01:01<01:46,  5.60s/it]

[*********************100%%**********************]  100 of 100 completed


2 Failed downloads:
['GGPI', 'HAPP']: Exception('%ticker%: No data found, symbol may be delisted')





 40%|████████████████████████████████▊                                                 | 12/30 [01:07<01:41,  5.63s/it]

[*********************100%%**********************]  100 of 100 completed


 43%|███████████████████████████████████▌                                              | 13/30 [01:13<01:35,  5.62s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['IMRA']: Exception('%ticker%: No data found, symbol may be delisted')





 47%|██████████████████████████████████████▎                                           | 14/30 [01:18<01:29,  5.59s/it]

[*********************100%%**********************]  100 of 100 completed


2 Failed downloads:
['KAII', 'JYAC']: Exception('%ticker%: No data found, symbol may be delisted')





 50%|█████████████████████████████████████████                                         | 15/30 [01:24<01:24,  5.61s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['LJAQ']: Exception('%ticker%: No data found, symbol may be delisted')





 53%|███████████████████████████████████████████▋                                      | 16/30 [01:30<01:19,  5.66s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['MCFE']: Exception('%ticker%: No data found, symbol may be delisted')





 57%|██████████████████████████████████████████████▍                                   | 17/30 [01:35<01:13,  5.64s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['MSPR']: Exception('%ticker%: No data found, symbol may be delisted')





 60%|█████████████████████████████████████████████████▏                                | 18/30 [01:41<01:08,  5.74s/it]

[*********************100%%**********************]  100 of 100 completed


 63%|███████████████████████████████████████████████████▉                              | 19/30 [01:47<01:02,  5.73s/it]

[*********************100%%**********************]  100 of 100 completed


 67%|██████████████████████████████████████████████████████▋                           | 20/30 [01:52<00:56,  5.65s/it]

[*********************100%%**********************]  100 of 100 completed


3 Failed downloads:
['PDCE', 'PHAS', 'PME']: Exception('%ticker%: No data found, symbol may be delisted')





 70%|█████████████████████████████████████████████████████████▍                        | 21/30 [01:58<00:50,  5.66s/it]

[*********************100%%**********************]  100 of 100 completed


 73%|████████████████████████████████████████████████████████████▏                     | 22/30 [02:04<00:45,  5.70s/it]

[*********************100%%**********************]  100 of 100 completed


3 Failed downloads:
['RBNC', 'REUN', 'RCHG']: Exception('%ticker%: No data found, symbol may be delisted')





 77%|██████████████████████████████████████████████████████████████▊                   | 23/30 [02:10<00:39,  5.69s/it]

[*********************100%%**********************]  100 of 100 completed


 80%|█████████████████████████████████████████████████████████████████▌                | 24/30 [02:15<00:33,  5.62s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['SLCT']: Exception('%ticker%: No data found, symbol may be delisted')





 83%|████████████████████████████████████████████████████████████████████▎             | 25/30 [02:21<00:28,  5.65s/it]

[*********************100%%**********************]  100 of 100 completed


 87%|███████████████████████████████████████████████████████████████████████           | 26/30 [02:26<00:22,  5.63s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['TIOA']: Exception('%ticker%: No data found, symbol may be delisted')





 90%|█████████████████████████████████████████████████████████████████████████▊        | 27/30 [02:32<00:16,  5.67s/it]

[*********************100%%**********************]  100 of 100 completed


 93%|████████████████████████████████████████████████████████████████████████████▌     | 28/30 [02:38<00:11,  5.65s/it]

[*********************100%%**********************]  100 of 100 completed


1 Failed download:
['WTRH']: Exception('%ticker%: No data found, symbol may be delisted')





 97%|███████████████████████████████████████████████████████████████████████████████▎  | 29/30 [02:43<00:05,  5.56s/it]

[*********************100%%**********************]  52 of 52 completed


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:47<00:00,  5.59s/it]


In [12]:
df3 = yf.download(list(name_sheet_df['name'][:2]), period="1y")
df3['Adj Close']

[*********************100%%**********************]  2 of 2 completed


Unnamed: 0_level_0,AACI,AADI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-10-27,9.980,12.700
2022-10-28,9.980,12.950
2022-10-31,9.960,12.915
2022-11-01,9.965,12.880
2022-11-02,9.965,12.440
...,...,...
2023-10-20,10.690,4.090
2023-10-23,10.690,3.940
2023-10-24,10.690,4.290
2023-10-25,10.690,4.220


In [7]:
df1 = yf.download(list(name_sheet_df['name'][:2]), period="1y")['Adj Close']
df2 = yf.download(list(name_sheet_df['name'][2:4]), period="1y")['Adj Close']

# 2つのDataFrameを縦方向に結合する
df_concat = pd.concat([df1, df2], axis=1)

[*********************100%%**********************]  2 of 2 completed
[*********************100%%**********************]  2 of 2 completed


In [13]:
df_concat.to_csv('lstm_temp.csv')

In [74]:
df_transposed = data.transpose()

In [75]:
df_transposed.columns = pd.to_datetime(df_transposed.columns, format='%Y-%m-%d')

In [76]:
df_transposed

Date,2023-07-25,2023-07-26,2023-07-27,2023-07-28,2023-07-31,2023-08-01,2023-08-02,2023-08-03,2023-08-04,2023-08-07,...,2023-10-11,2023-10-12,2023-10-13,2023-10-16,2023-10-17,2023-10-18,2023-10-19,2023-10-20,2023-10-23,2023-10-24
AACI,10.560,10.570,10.5700,10.590000,10.570,10.500,10.5600,10.570,10.500,10.5000,...,10.670,10.630,10.69,10.720,10.690,10.690,10.690,10.690,10.690,
AADI,5.710,5.760,5.3800,5.360000,5.330,5.250,5.2600,5.660,5.730,5.9900,...,4.380,4.200,4.23,4.220,4.180,3.990,3.960,4.090,3.940,4.2900
AAL,16.650,16.760,16.5700,16.790001,16.750,16.230,15.9500,15.980,15.840,15.9900,...,12.480,12.060,11.72,11.940,11.940,11.360,11.450,11.080,11.290,11.2100
AAME,1.870,1.940,1.8700,1.830000,1.830,1.770,1.7600,1.810,1.920,1.8600,...,1.820,1.860,1.85,1.820,1.730,1.800,1.930,1.810,1.940,
AAOI,8.430,7.950,6.1900,6.710000,6.750,7.030,7.3900,6.590,11.010,12.3800,...,7.790,7.950,7.55,7.650,7.590,7.550,7.450,6.910,7.170,7.4400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZVRA,4.770,4.710,4.6400,4.830000,4.880,4.900,4.8100,4.840,4.820,4.8600,...,4.770,4.560,4.47,4.460,4.530,4.370,4.370,4.210,4.290,4.3700
ZVSA,0.168,0.166,0.1620,0.169000,0.173,0.170,0.1700,0.160,0.170,0.1640,...,0.138,0.131,0.13,0.126,0.122,0.128,0.128,0.125,0.118,0.1156
ZYME,7.450,7.510,7.2800,7.270000,7.460,7.450,7.3200,7.320,7.390,7.2600,...,6.495,6.420,6.50,7.050,7.250,7.160,7.210,7.190,7.000,7.0100
ZYNE,0.356,0.364,0.3551,0.359800,0.365,0.355,0.3688,0.352,0.353,0.3421,...,1.300,1.300,1.30,,,,,,,


In [77]:
df_transposed = df_transposed.rename_axis("name")

In [78]:
now = datetime.now()
date_time = now.strftime("%Y%m%d")
df_transposed.to_csv(f'./dataset_{date_time}.csv')

In [16]:
list(name_sheet_df['name'])

['AACI',
 'AADI',
 'AAL',
 'AAME',
 'AAOI',
 'AAON',
 'AAPL',
 'ABCB',
 'ABCL',
 'ABEO',
 'ABIO',
 'ABL',
 'ABNB',
 'ABOS',
 'ABSI',
 'ABUS',
 'ABVC',
 'ACAB',
 'ACAD',
 'ACAH',
 'ACAX',
 'ACB',
 'ACBA',
 'ACCD',
 'ACDC',
 'ACER',
 'ACET',
 'ACGL',
 'ACGN',
 'ACHC',
 'ACHV',
 'ACIC',
 'ACIU',
 'ACIW',
 'ACLS',
 'ACLX',
 'ACMR',
 'ACNB',
 'ACNT',
 'ACOR',
 'ACRS',
 'ACRX',
 'ACST',
 'ACTG',
 'ACVA',
 'ACXP',
 'ADBE',
 'ADD',
 'ADES',
 'ADI',
 'ADIL',
 'ADMA',
 'ADN',
 'ADOC',
 'ADP',
 'ADPT',
 'ADSE',
 'ADSK',
 'ADTH',
 'ADTN',
 'ADTX',
 'ADUS',
 'ADV',
 'ADVM',
 'AEHL',
 'AEHR',
 'AEI',
 'AEIS',
 'AEMD',
 'AENT',
 'AEP',
 'AEY',
 'AEYE',
 'AEZS',
 'AFBI',
 'AFIB',
 'AFMD',
 'AFRI',
 'AFRM',
 'AFYA',
 'AGAE',
 'AGBA',
 'AGEN',
 'AGFY',
 'AGIL',
 'AGIO',
 'AGLE',
 'AGMH',
 'AGRI',
 'AGRX',
 'AGYS',
 'AHCO',
 'AIB',
 'AIP',
 'AIRG',
 'AIRS',
 'AIRT',
 'AKAM',
 'AKAN',
 'AKBA',
 'AKLI',
 'AKRO',
 'AKTS',
 'AKU',
 'AKYA',
 'ALBT',
 'ALCO',
 'ALDX',
 'ALEC',
 'ALGM',
 'ALGN',
 'ALGS',
 'ALGT

In [12]:
df_concat

Unnamed: 0_level_0,XELA,XELB,XENE,XERS,XFOR,XGN,XLO,XMTR,XNCR,XOMA,...,ZNTL,ZS,ZTEK,ZUMZ,ZURA,ZVRA,ZVSA,ZYME,ZYNE,ZYXI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-10-19 00:00:00,3.01,1.02,34.950001,1.73,0.83,1.71,1.95,16.34,18.33,17.41,...,18.0,169.070007,1.19,17.219999,5.16,4.37,0.128,7.21,,7.66
2023-10-20 00:00:00,3.04,1.035,33.240002,1.74,0.822,1.62,1.94,15.87,18.190001,17.049999,...,17.950001,162.190002,1.24,17.01,4.62,4.21,0.125,7.19,,7.34
2023-10-23 00:00:00,3.1,1.04,32.700001,1.69,0.822,1.614,1.883,15.94,17.889999,17.33,...,16.559999,162.479996,1.22,16.74,4.93,4.29,0.118,7.0,,7.29
2023-10-24 00:00:00,3.16,1.04,33.150002,1.84,0.82,1.64,1.98,16.08,18.15,16.639999,...,16.67,164.789993,1.26,16.74,5.12,4.37,0.116,7.01,,7.3
2023-10-25 00:00:00,3.04,1.03,32.950001,1.73,0.7551,1.59,1.9799,14.88,17.74,17.58,...,16.0,157.610001,1.25,16.77,5.26,4.39,0.1145,6.97,,7.1
