In [18]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse, mean_absolute_error as mae, r2_score as r2

import torch
from torch import nn, optim
from torch.utils.data import DataLoader

from torchinfo import summary

from torch.utils.tensorboard import SummaryWriter

from tqdm.notebook import tqdm

import requests
import json
from datetime import datetime

import os
from key import BASE_URL

In [19]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
print(requests.get(f'{BASE_URL}/').text)

cuda:0
Hello Server



In [20]:
current_datetime = datetime.now().strftime('%Y-%m-%d-%H-%M')

path = f'./models_lstm/{current_datetime}'

try:
    os.makedirs(path, exist_ok=True)
    print(f'フォルダが作成されました: {path}')
except OSError as error:
    print(f'フォルダの作成に失敗しました: {error}')

with open('./stock_name.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

フォルダが作成されました: ./models_lstm/2024-12-08-22-05


In [21]:
class MyLSTM(nn.Module):
    def __init__(self, feature_size, hidden_dim, n_layers):
        super(MyLSTM, self).__init__()
        self.lstm = nn.LSTM(feature_size, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
        
    def forward(self, x):
        h_0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device, dtype=torch.float32)
        c_0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device, dtype=torch.float32)
        out, _ = self.lstm(x, (h_0, c_0))
        out = out[:, -1, :]  # 最後の時刻の出力を使用
        out = self.fc(out)
        return out

In [22]:
def norm(column):
    norm_value = np.linalg.norm(column)
    if norm_value == 0:
        return column  # norm が 0 の場合、そのままの値を返す
    else:
        return column / norm_value, norm_value

In [24]:
for category in data:
    for company in data[category]:
        stock_code = company['symbol']
        print(stock_code)
        
        # JSON取得・DataFrame化
        df = requests.get(f'{BASE_URL}/ml_data/{stock_code}').json()
        df = pd.DataFrame(df)
        
        # 日付をdatetime型に変換し、インデックスに設定
        df['date'] = pd.to_datetime(df['date'])
        df.set_index('date', inplace=True)
        
        # 日単位で合計をとる
        df = df.resample('D').sum()
        
        # dateカラムを再生成
        df = df.reset_index()
        
        # 除外カラム
        exclude_columns = ['date', 'time']
        columns = [col for col in df.columns if col not in exclude_columns]
        
        # 各列を正規化または取得
        # norm()関数はユーザー定義関数（前提）
        ny_dow = norm(df['NY_Dow'])[0]
        sp_500 = norm(df['SP_500'])[0]
        
        content_concern = norm(df['content_concern'])[0]
        content_despair = norm(df['content_despair'])[0]
        content_excitement = norm(df['content_excitement'])[0]
        content_optimism = norm(df['content_optimism'])[0]
        content_stability = norm(df['content_stability'])[0]
        headline_concern = norm(df['headline_concern'])[0]
        headline_despair = norm(df['headline_despair'])[0]
        headline_excitement = norm(df['headline_excitement'])[0]
        headline_optimism = norm(df['headline_optimism'])[0]
        headline_stability = norm(df['headline_stability'])[0]
        
        value = norm(df['value'])[0]
        vix = norm(df['vix'])[0]

        # データ辞書化
        data_dict = {
            'NY_Dow': ny_dow,
            'SP_500': sp_500,
            'content_concern': content_concern,
            'content_despair': content_despair,
            'content_excitement': content_excitement,
            'content_optimism': content_optimism,
            'content_stability': content_stability,
            'headline_concern': headline_concern,
            'headline_despair': headline_despair,
            'headline_excitement': headline_excitement,
            'headline_optimism': headline_optimism,
            'headline_stability': headline_stability,
            'value': value,
            'vix': vix,
        }
        
        data_result = pd.DataFrame(data_dict)
        
        # dateを再度インデックス化
        data_result['date'] = df['date']
        data_result.set_index('date', inplace=True)
        
        print(data_result.info())


A
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 154 entries, 2024-07-08 to 2024-12-08
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   NY_Dow               154 non-null    float64
 1   SP_500               154 non-null    float64
 2   content_concern      154 non-null    float64
 3   content_despair      154 non-null    float64
 4   content_excitement   154 non-null    float64
 5   content_optimism     154 non-null    float64
 6   content_stability    154 non-null    float64
 7   headline_concern     154 non-null    float64
 8   headline_despair     154 non-null    float64
 9   headline_excitement  154 non-null    float64
 10  headline_optimism    154 non-null    float64
 11  headline_stability   154 non-null    float64
 12  value                154 non-null    float64
 13  vix                  154 non-null    float64
dtypes: float64(14)
memory usage: 18.0 KB
None
AAPL


KeyboardInterrupt: 