In [1]:
import os
import glob
from datetime import datetime
import warnings
import gc

# import japanize_matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import json
import seaborn as sns
from dateutil import tz
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

warnings.simplefilter("ignore")

In [2]:
RAW = "/content/drive/MyDrive/日本株/raw/J-QuantsAPI"
PATH_ID = f"{RAW}/id.csv"

FETCH_ALL_DATA = False

# 20240210lightプラン用のコードも追記
# listとpriceしか対応してないので注意！！！！
LIGHT_PLAN = True

In [3]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in df.columns:
        col_type = df[col].dtype

        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                try:
#                 if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
#                     df[col] = df[col].astype(np.float16)
#                 elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
#                     df[col] = df[col].astype(np.float32)
                    if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                        df[col] = df[col].astype(np.float32)
                    else:
                        df[col] = df[col].astype(np.float64)
                except:
                    continue
#         else:
#             df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df

In [4]:
today = datetime.now()
year = today.year
month = today.month
day = today.day
save_date = f"{year}{month:02}{day:02}"
save_date

'20240228'

# J-Quants API アクセスクライアント作成

- ラッパー群　J-Quants API の各APIエンドポイントに対応しています。
  - get_idtoken
  - get_listed_info
  - get_listed_sections
  - get_market_segments
  - get_prices_daily_quotes
  - get_fins_statements
  - get_fins_announcement
- ユーティリティー群　日付範囲を指定して一括でデータ取得して、取得したデータを結合して返すようなユーティリティーメソッドです。
  - get_list
  - get_price_all
  - get_statements_all

In [5]:
class JQuantsAPIDemo:
  """
  J-Quants API からデータを取得する
  ref. https://jpx.gitbook.io/j-quants-api/
  """
  JQUANTS_API_BASE = "https://api.jquants.com/v1"

  def __init__(self, address: str, passcode: str) -> None:
    """
    Args:
        address : J-Quantsにログインするのに使うアドレス
        passcode: J-Quantsにログインするのに使うパスワード
    """
    self.address = address
    self.passcode = passcode
    self._refresh_token = ""
    self._id_token = ""
    self._id_token_expire = pd.Timestamp.utcnow()

  def _base_headers(self) -> dict:
    """
    J-Quants API にアクセスする際にヘッダーにIDトークンを設定
    """
    if not self._refresh_token:
      self.get_refresh_token()
    headers = {"Authorization": f"Bearer {self.get_id_token()}"}
    return headers

  def _request_session(
    self,
    status_forcelist=[429, 500, 502, 503, 504],
    method_whitelist=["HEAD", "GET", "OPTIONS"]
  ):
    """
    requests の session 取得

    リトライを設定

    Args:
        N/A
    Returns:
        requests.session
    """
    retry_strategy = Retry(
        total=3,
        status_forcelist=status_forcelist,
        allowed_methods=method_whitelist
    )
    adapter = HTTPAdapter(max_retries=retry_strategy)
    s = requests.Session()
    s.mount("https://", adapter)
    s.mount("http://", adapter)
    return s

  def _get(self, url: str, params: dict = None) -> requests.Response:
    """
    requests の get 用ラッパー

    ヘッダーにアクセストークンを設定
    タイムアウトを設定

    Args:
        url: アクセスするURL
        params: パラメーター

    Returns:
        requests.Response: レスポンス
    """
    s = self._request_session()

    headers = self._base_headers()
    ret = s.get(url, params=params, headers=headers, timeout=30)
    ret.raise_for_status()
    return ret

  def _post(self, url: str, payload: json = None, headers: dict = None) -> requests.Response:
    """
    requests の get 用ラッパー

    ヘッダーにアクセストークンを設定
    タイムアウトを設定

    Args:
        url: アクセスするURL
        params: パラメーター

    Returns:
        requests.Response: レスポンス
    """
    s = self._request_session(method_whitelist=["POST"])

    ret = s.post(url, data=payload, headers=headers, timeout=30)
    ret.raise_for_status()
    return ret

  def get_refresh_token(self) -> None:
    """
    リフレッシュトークンを取得する

    """
    data = {"mailaddress": self.address, "password": self.passcode}
    # r_post = requests.post("https://api.jquants.com/v1/token/auth_user", data=json.dumps(data))
    url = f"{self.JQUANTS_API_BASE}/token/auth_user"
    ret = self._post(url, payload=json.dumps(data))
    refresh_token = ret.json()["refreshToken"]
    self._refresh_token = refresh_token

  def get_id_token(self) -> str:
    """
    IDトークンを取得する

    """
    if self._id_token_expire > pd.Timestamp.utcnow():
      return self._id_token

    url = f"{self.JQUANTS_API_BASE}/token/auth_refresh?refreshtoken={self._refresh_token}"
    ret = self._post(url)
    id_token = ret.json()["idToken"]
    self._id_token = id_token
    self._id_token_expire = pd.Timestamp.utcnow() + pd.Timedelta(23, unit="hour")
    return self._id_token


  def get_listed_info(
          self,
          code: str = "",
          date: str = "",
          light_plan = False
  ) -> pd.DataFrame:
    """
    銘柄一覧を取得

    Args:
        code: 銘柄コード (Optional)
        date: 基準となる日付 (Optional)

    Returns:
        pd.DataFrame: 銘柄一覧
    """
    url = f"{self.JQUANTS_API_BASE}/listed/info"
    params = {}
    if code:
      params["code"] = code
    if date:
      params["date"] = date
    ret = self._get(url, params)
    d = ret.json()
    df = pd.DataFrame.from_dict(d["info"])

    cols = [
      "Date",
      "Code",
    #   "CompanyName",
      "CompanyNameEnglish",
      "Sector17Code",
    #   "Sector17CodeName",
      "Sector33Code",
    #   "Sector33CodeName",
      "ScaleCategory",
      "MarketCode",
      "MarketCodeName",
    ]
    if not light_plan:
        # standard以上のみで以下の列が取れる
        cols += [
            "MarginCode",
            "MarginCodeName"
        ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)

    df.loc[:, "Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
    df.sort_values("Code", inplace=True)

    return df[cols]

  def get_list_range(
    self,
    start_dt: datetime = datetime(2008, 1, 1, tzinfo=tz.gettz("Asia/Tokyo")),
    end_dt: datetime = datetime.now(tz.gettz("Asia/Tokyo")),
    light_plan: bool = True
  ) -> pd.DataFrame:
    """
    全銘柄情報を日付範囲指定して取得（7日ごと）

    Args:
        start_dt: 取得開始日
        end_dt: 取得終了日

    Returns:
        pd.DataFrame: 銘柄情報
    """
    buff = []
    dates = pd.date_range(start_dt, end_dt, freq="W-MON")
    counter = 1
    for s in dates:
      df = self.get_listed_info(
          date=s.strftime("%Y%m%d"),
          light_plan=light_plan
      )
      buff.append(df)
      # progress log
      if (counter % 100) == 0:
        print(f"{counter} / {len(dates)}")
      counter += 1
    if len(buff)==0:
        return None
    if len(buff)==1:
        df_return = buff[0]
        if len(df_return)==0:
            return None
        return df_return
    return pd.concat(buff).reset_index(drop=True)

  def get_prices_daily_quotes(
      self,
      code: str = "",
      from_yyyymmdd: str = "",
      to_yyyymmdd: str = "",
      date_yyyymmdd: str = "",
      premium_plan: bool = True
  ) -> pd.DataFrame:
    """
    株価情報を取得

    Args:
        code: 銘柄コード
        from_yyyymmdd: 取得開始日
        to_yyyymmdd: 取得終了日
        date_yyyymmdd: 取得日
    Returns:
        pd.DataFrame: 株価情報
    """
    url = f"{self.JQUANTS_API_BASE}/prices/daily_quotes"
    params = {}
    if code:
      params["code"] = code
    if date_yyyymmdd != "":
      params["date"] = date_yyyymmdd
    else:
      if from_yyyymmdd != "":
        params["from"] = from_yyyymmdd
      if to_yyyymmdd != "":
        params["to"] = to_yyyymmdd
    ret = self._get(url, params)
    data = ret.json()["daily_quotes"]

    # https://jpx.gitbook.io/j-quants-ja/api-reference#resuponsunopjingunitsuite
    # 大容量データが返却された場合の再検索
    # データ量により複数ページ取得できる場合があるため、pagination_keyが含まれる限り、再検索を実施
    while "pagination_key" in ret.json():
      params["pagination_key"] = ret.json()["pagination_key"]
      ret = self._get(url, params)
      data += ret.json()["daily_quotes"]

    df = pd.DataFrame.from_dict(data)
    cols = [
      "Date",
      "Code",
      "Open",
      "High",
      "Low",
      "Close",
      "UpperLimit",
      "LowerLimit",
      "Volume",
      "TurnoverValue",
      "AdjustmentFactor",
      "AdjustmentOpen",
      "AdjustmentHigh",
      "AdjustmentLow",
      "AdjustmentClose",
      "AdjustmentVolume",
    ]
    if premium_plan:
      cols += [
        "MorningOpen",
        "MorningHigh",
        "MorningLow",
        "MorningClose",
        "MorningUpperLimit",
        "MorningLowerLimit",
        "MorningVolume",
        "MorningTurnoverValue",
        "MorningAdjustmentOpen",
        "MorningAdjustmentHigh",
        "MorningAdjustmentLow",
        "MorningAdjustmentClose",
        "MorningAdjustmentVolume",
        "AfternoonOpen",
        "AfternoonHigh",
        "AfternoonLow",
        "AfternoonClose",
        "AfternoonUpperLimit",
        "AfternoonLowerLimit",
        "AfternoonVolume",
        "AfternoonAdjustmentOpen",
        "AfternoonAdjustmentHigh",
        "AfternoonAdjustmentLow",
        "AfternoonAdjustmentClose",
        "AfternoonAdjustmentVolume",
      ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, "Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
    df = df.sort_values(["Date", "Code"]).reset_index(drop=True)
    return df[cols]

  def get_price_range(
    self,
    start_dt: datetime = datetime(2008, 1, 1, tzinfo=tz.gettz("Asia/Tokyo")),
    end_dt: datetime = datetime.now(tz.gettz("Asia/Tokyo")),
    premium_plan: bool = True
  ) -> pd.DataFrame:
    """
    全銘柄の株価情報を日付範囲指定して取得

    Args:
        start_dt: 取得開始日
        end_dt: 取得終了日

    Returns:
        pd.DataFrame: 株価情報
    """
    buff = []
    dates = pd.date_range(start_dt, end_dt, freq="D")
    counter = 1
    for s in dates:
      df = self.get_prices_daily_quotes(
          date_yyyymmdd=s.strftime("%Y%m%d"),
          premium_plan=premium_plan
      )
      buff.append(df)
      # progress log
      if (counter % 100) == 0:
        print(f"{counter} / {len(dates)}")
      counter += 1
    if len(buff)==0:
        return None
    if len(buff)==1:
        df_return = buff[0]
        if len(df_return)==0:
            return None
        return df_return
    return pd.concat(buff).reset_index(drop=True)

  def get_fins_statements(
      self,
      code: str = "",
      date_yyyymmdd: str = ""
  ) -> pd.DataFrame:
    """
    財務情報取得

    Args:
        code: 銘柄コード
        date_yyyymmdd: 日付(YYYYMMDD or YYYY-MM-DD)

    Returns:
        pd.DataFrame: 財務情報
    """
    url = f"{self.JQUANTS_API_BASE}/fins/statements"
    params = {
        "code": code,
        "date": date_yyyymmdd,
    }
    ret = self._get(url, params)
    d = ret.json()
    df = pd.DataFrame.from_dict(d["statements"])
    cols = [
        'DisclosedDate',
        'DisclosedTime',
        'LocalCode',
        'DisclosureNumber',
        'TypeOfDocument',
        'TypeOfCurrentPeriod',
        'CurrentPeriodStartDate',
        'CurrentPeriodEndDate',
        'CurrentFiscalYearStartDate',
        'CurrentFiscalYearEndDate',
        'NextFiscalYearStartDate',
        'NextFiscalYearEndDate',
        'NetSales',
        'OperatingProfit',
        'OrdinaryProfit',
        'Profit',
        'EarningsPerShare',
        'DilutedEarningsPerShare',
        'TotalAssets',
        'Equity',
        'EquityToAssetRatio',
        'BookValuePerShare',
        'CashFlowsFromOperatingActivities',
        'CashFlowsFromInvestingActivities',
        'CashFlowsFromFinancingActivities',
        'CashAndEquivalents',
        'ResultDividendPerShare1stQuarter',
        'ResultDividendPerShare2ndQuarter',
        'ResultDividendPerShare3rdQuarter',
        'ResultDividendPerShareFiscalYearEnd',
        'ResultDividendPerShareAnnual',
        'DistributionsPerUnit(REIT)',
        'ResultTotalDividendPaidAnnual',
        'ResultPayoutRatioAnnual',
        'ForecastDividendPerShare1stQuarter',
        'ForecastDividendPerShare2ndQuarter',
        'ForecastDividendPerShare3rdQuarter',
        'ForecastDividendPerShareFiscalYearEnd',
        'ForecastDividendPerShareAnnual',
        'ForecastDistributionsPerUnit(REIT)',
        'ForecastTotalDividendPaidAnnual',
        'ForecastPayoutRatioAnnual',
        'NextYearForecastDividendPerShare1stQuarter',
        'NextYearForecastDividendPerShare2ndQuarter',
        'NextYearForecastDividendPerShare3rdQuarter',
        'NextYearForecastDividendPerShareFiscalYearEnd',
        'NextYearForecastDividendPerShareAnnual',
        'NextYearForecastDistributionsPerUnit(REIT)',
        'NextYearForecastPayoutRatioAnnual',
        'ForecastNetSales2ndQuarter',
        'ForecastOperatingProfit2ndQuarter',
        'ForecastOrdinaryProfit2ndQuarter',
        'ForecastProfit2ndQuarter',
        'ForecastEarningsPerShare2ndQuarter',
        'NextYearForecastNetSales2ndQuarter',
        'NextYearForecastOperatingProfit2ndQuarter',
        'NextYearForecastOrdinaryProfit2ndQuarter',
        'NextYearForecastProfit2ndQuarter',
        'NextYearForecastEarningsPerShare2ndQuarter',
        'ForecastNetSales',
        'ForecastOperatingProfit',
        'ForecastOrdinaryProfit',
        'ForecastProfit',
        'ForecastEarningsPerShare',
        'NextYearForecastNetSales',
        'NextYearForecastOperatingProfit',
        'NextYearForecastOrdinaryProfit',
        'NextYearForecastProfit',
        'NextYearForecastEarningsPerShare',
        'MaterialChangesInSubsidiaries',
        'ChangesBasedOnRevisionsOfAccountingStandard',
        'ChangesOtherThanOnesBasedOnRevisionsOfAccountingStandard',
        'ChangesInAccountingEstimates',
        'RetrospectiveRestatement',
        'NumberOfIssuedAndOutstandingSharesAtTheEndOfFiscalYearIncludingTreasuryStock',
        'NumberOfTreasuryStockAtTheEndOfFiscalYear',
        'AverageNumberOfShares',
        'NonConsolidatedNetSales',
        'NonConsolidatedOperatingProfit',
        'NonConsolidatedOrdinaryProfit',
        'NonConsolidatedProfit',
        'NonConsolidatedEarningsPerShare',
        'NonConsolidatedTotalAssets',
        'NonConsolidatedEquity',
        'NonConsolidatedEquityToAssetRatio',
        'NonConsolidatedBookValuePerShare',
        'ForecastNonConsolidatedNetSales2ndQuarter',
        'ForecastNonConsolidatedOperatingProfit2ndQuarter',
        'ForecastNonConsolidatedOrdinaryProfit2ndQuarter',
        'ForecastNonConsolidatedProfit2ndQuarter',
        'ForecastNonConsolidatedEarningsPerShare2ndQuarter',
        'NextYearForecastNonConsolidatedNetSales2ndQuarter',
        'NextYearForecastNonConsolidatedOperatingProfit2ndQuarter',
        'NextYearForecastNonConsolidatedOrdinaryProfit2ndQuarter',
        'NextYearForecastNonConsolidatedProfit2ndQuarter',
        'NextYearForecastNonConsolidatedEarningsPerShare2ndQuarter',
        'ForecastNonConsolidatedNetSales',
        'ForecastNonConsolidatedOperatingProfit',
        'ForecastNonConsolidatedOrdinaryProfit',
        'ForecastNonConsolidatedProfit',
        'ForecastNonConsolidatedEarningsPerShare',
        'NextYearForecastNonConsolidatedNetSales',
        'NextYearForecastNonConsolidatedOperatingProfit',
        'NextYearForecastNonConsolidatedOrdinaryProfit',
        'NextYearForecastNonConsolidatedProfit',
        'NextYearForecastNonConsolidatedEarningsPerShare'
    ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, "DisclosedDate"] = pd.to_datetime(df["DisclosedDate"], format="%Y-%m-%d")
    df.loc[:, "DisclosureNumber"] = pd.to_numeric(df["DisclosureNumber"], errors="coerce")
    df.loc[:, "CurrentPeriodStartDate"] = pd.to_datetime(df["CurrentPeriodStartDate"], format="%Y-%m-%d")
    df.loc[:, "CurrentPeriodEndDate"] = pd.to_datetime(df["CurrentPeriodEndDate"], format="%Y-%m-%d")
    df.loc[:, "CurrentFiscalYearStartDate"] = pd.to_datetime(df["CurrentFiscalYearStartDate"], format="%Y-%m-%d")
    df.loc[:, "CurrentFiscalYearEndDate"] = pd.to_datetime(df["CurrentFiscalYearEndDate"], format="%Y-%m-%d")
    df.loc[:, "NextFiscalYearStartDate"] = pd.to_datetime(df["NextFiscalYearStartDate"], format="%Y-%m-%d")
    df.loc[:, "NextFiscalYearEndDate"] = pd.to_datetime(df["NextFiscalYearEndDate"], format="%Y-%m-%d")
    df.sort_values(["DisclosedDate", "DisclosedTime"], inplace=True)
    return df[cols]

  def get_fins_announcement(self) -> pd.DataFrame:
    """
    翌日の決算発表情報の取得

    Args:
        N/A

    Returns:
        pd.DataFrame: 翌日決算発表情報
    """
    url = f"{self.JQUANTS_API_BASE}/fins/announcement"
    ret = self._get(url)
    d = ret.json()
    df = pd.DataFrame.from_dict(d["announcement"])
    cols = [
      "Date",
      "Code",
      "CompanyName",
      "FiscalYear",
      "SectorName",
      "FiscalQuarter",
      "Section",
    ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, "Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
    df.sort_values(["Date", "Code"], inplace=True)
    return df[cols]

  def get_statements_range(
    self,
    start_dt: datetime = datetime(2008, 1, 1, tzinfo=tz.gettz("Asia/Tokyo")),
    end_dt: datetime = datetime.now(tz.gettz("Asia/Tokyo")),
    cache_dir: str = "",
  ) -> pd.DataFrame:
    """
    財務情報を日付範囲指定して取得

    Args:
        start_dt: 取得開始日
        end_dt: 取得終了日

    Returns:
        pd.DataFrame: 財務情報
    """
    buff = []
    dates = pd.date_range(start_dt, end_dt, freq="D")
    counter = 1
    for s in dates:
      # fetch data via API or cache file
      cache_file = f"fins_statements_{s.strftime('%Y%m%d')}.csv.gz"
      if (cache_dir != "") and os.path.isfile(f"{cache_dir}/{s.strftime('%Y')}/{cache_file}"):
        df = pd.read_csv(f"{cache_dir}/{s.strftime('%Y')}/{cache_file}")
      else:
        df = self.get_fins_statements(date_yyyymmdd=s.strftime("%Y%m%d"))
        if cache_dir != "":
          # create year directory
          os.makedirs(f"{cache_dir}/{s.strftime('%Y')}", exist_ok=True)
          # write cache file
          df.to_csv(f"{cache_dir}/{s.strftime('%Y')}/{cache_file}", index=False)

      buff.append(df)
      # progress log
      if (counter % 100) == 0:
        print(f"{counter} / {len(dates)}")
      counter += 1
    if len(buff)==0:
        return None
    if len(buff)==1:
        df_return = buff[0]
        if len(df_return)==0:
            return None
        return df_return
    return pd.concat(buff).reset_index(drop=True)

  def get_options(
      self,
      date_yyyymmdd: str = "",
  ) -> pd.DataFrame:
    """
    オプション情報を取得

    Args:
        date_yyyymmdd: 取得日
    Returns:
        pd.DataFrame: オプション情報
    """
    url = f"{self.JQUANTS_API_BASE}/option/index_option"
    params = {}
    params["date"] = date_yyyymmdd
    ret = self._get(url, params)
    data = ret.json()["index_option"]

    # https://jpx.gitbook.io/j-quants-ja/api-reference#resuponsunopjingunitsuite
    # 大容量データが返却された場合の再検索
    # データ量により複数ページ取得できる場合があるため、pagination_keyが含まれる限り、再検索を実施
    while "pagination_key" in ret.json():
      params["pagination_key"] = ret.json()["pagination_key"]
      ret = self._get(url, params)
      data += ret.json()["daily_quotes"]

    df = pd.DataFrame.from_dict(data)
    cols = [
        'Date',
        'Code',
        'WholeDayOpen',
        'WholeDayHigh',
        'WholeDayLow',
        'WholeDayClose',
        'NightSessionOpen',
        'NightSessionHigh',
        'NightSessionLow',
        'NightSessionClose',
        'DaySessionOpen',
        'DaySessionHigh',
        'DaySessionLow',
        'DaySessionClose',
        'Volume',
        'OpenInterest',
        'TurnoverValue',
        'ContractMonth',
        'StrikePrice',
        'Volume(OnlyAuction)',
        'EmergencyMarginTriggerDivision',
        'PutCallDivision',
        'LastTradingDay',
        'SpecialQuotationDay',
        'SettlementPrice',
        'TheoreticalPrice',
        'BaseVolatility',
        'UnderlyingPrice',
        'ImpliedVolatility',
        'InterestRate'
    ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, "Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
    df = df.sort_values(["Date", "Code"]).reset_index(drop=True)
    return df[cols]

  def get_options_range(
    self,
    start_dt: datetime = datetime(2008, 1, 1, tzinfo=tz.gettz("Asia/Tokyo")),
    end_dt: datetime = datetime.now(tz.gettz("Asia/Tokyo"))
  ) -> pd.DataFrame:
    """
    全銘柄のオプション情報を日付範囲指定して取得

    Args:
        start_dt: 取得開始日
        end_dt: 取得終了日

    Returns:
        pd.DataFrame: オプション情報
    """
    buff = []
    dates = pd.date_range(start_dt, end_dt, freq="D")
    counter = 1
    for s in dates:
      df = self.get_options(date_yyyymmdd=s.strftime("%Y%m%d"))
      buff.append(df)
      # progress log
      if (counter % 100) == 0:
        print(f"{counter} / {len(dates)}")
      counter += 1
    if len(buff)==0:
        return None
    if len(buff)==1:
        df_return = buff[0]
        if len(df_return)==0:
            return None
        return df_return
    return pd.concat(buff).reset_index(drop=True)

  def get_markets(
      self,
      from_yyyymmdd: str = "",
      to_yyyymmdd: str = "",
  ) -> pd.DataFrame:
    """
    投資部門別売買状況を取得

    Args:
        from_yyyymmdd: fromの指定(全期間取得の場合は指定しない)
        to_yyyymmdd  : toの指定(全期間取得の場合は指定しない)
    Returns:
        pd.DataFrame: 投資部門別売買状況の情報
    """
    url = f"{self.JQUANTS_API_BASE}/markets/trades_spec"
    params = {}
    if len(from_yyyymmdd)>0:
      params["from"] = from_yyyymmdd
      params["to"] = to_yyyymmdd
    ret = self._get(url, params)
    data = ret.json()['trades_spec']

    # https://jpx.gitbook.io/j-quants-ja/api-reference#resuponsunopjingunitsuite
    # 大容量データが返却された場合の再検索
    # データ量により複数ページ取得できる場合があるため、pagination_keyが含まれる限り、再検索を実施
    while "pagination_key" in ret.json():
      params["pagination_key"] = ret.json()["pagination_key"]
      ret = self._get(url, params)
      data += ret.json()['trades_spec']

    df = pd.DataFrame.from_dict(data)
    cols = [
        'PublishedDate',
        'StartDate',
        'EndDate',
        'Section',
        'ProprietarySales',
        'ProprietaryPurchases',
        'ProprietaryTotal',
        'ProprietaryBalance',
        'BrokerageSales',
        'BrokeragePurchases',
        'BrokerageTotal',
        'BrokerageBalance',
        'TotalSales',
        'TotalPurchases',
        'TotalTotal',
        'TotalBalance',
        'IndividualsSales',
        'IndividualsPurchases',
        'IndividualsTotal',
        'IndividualsBalance',
        'ForeignersSales',
        'ForeignersPurchases',
        'ForeignersTotal',
        'ForeignersBalance',
        'SecuritiesCosSales',
        'SecuritiesCosPurchases',
        'SecuritiesCosTotal',
        'SecuritiesCosBalance',
        'InvestmentTrustsSales',
        'InvestmentTrustsPurchases',
        'InvestmentTrustsTotal',
        'InvestmentTrustsBalance',
        'BusinessCosSales',
        'BusinessCosPurchases',
        'BusinessCosTotal',
        'BusinessCosBalance',
        'OtherCosSales',
        'OtherCosPurchases',
        'OtherCosTotal',
        'OtherCosBalance',
        'InsuranceCosSales',
        'InsuranceCosPurchases',
        'InsuranceCosTotal',
        'InsuranceCosBalance',
        'CityBKsRegionalBKsEtcSales',
        'CityBKsRegionalBKsEtcPurchases',
        'CityBKsRegionalBKsEtcTotal',
        'CityBKsRegionalBKsEtcBalance',
        'TrustBanksSales',
        'TrustBanksPurchases',
        'TrustBanksTotal',
        'TrustBanksBalance',
        'OtherFinancialInstitutionsSales',
        'OtherFinancialInstitutionsPurchases',
        'OtherFinancialInstitutionsTotal',
        'OtherFinancialInstitutionsBalance'
    ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, 'PublishedDate'] = pd.to_datetime(df['PublishedDate'], format="%Y-%m-%d")
    df.loc[:, 'StartDate'] = pd.to_datetime(df['StartDate'], format="%Y-%m-%d")
    df.loc[:, 'EndDate'] = pd.to_datetime(df['EndDate'], format="%Y-%m-%d")
    df = df.sort_values(['PublishedDate']).reset_index(drop=True)
    return df[cols]

  def get_weekly_interest(
      self,
      date_yyyymmdd: str = "",
  ) -> pd.DataFrame:
    """
    信用取引週末残高を取得
    Args:
        date_yyyymmdd: 取得日
    Returns:
        pd.DataFrame: 信用取引週末残高情報
    """
    url = f"{self.JQUANTS_API_BASE}/markets/weekly_margin_interest"
    params = {}
    params["date"] = date_yyyymmdd
    ret = self._get(url, params)
    data = ret.json()["weekly_margin_interest"]

    # https://jpx.gitbook.io/j-quants-ja/api-reference#resuponsunopjingunitsuite
    # 大容量データが返却された場合の再検索
    # データ量により複数ページ取得できる場合があるため、pagination_keyが含まれる限り、再検索を実施
    while "pagination_key" in ret.json():
      params["pagination_key"] = ret.json()["pagination_key"]
      ret = self._get(url, params)
      data += ret.json()["weekly_margin_interest"]

    df = pd.DataFrame.from_dict(data)
    cols = [
        'Date',
        'Code',
        "ShortMarginTradeVolume",
        "LongMarginTradeVolume",
        "ShortNegotiableMarginTradeVolume",
        "LongNegotiableMarginTradeVolume",
        "ShortStandardizedMarginTradeVolume",
        "LongStandardizedMarginTradeVolume",
        "IssueType"
    ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, "Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
    df = df.sort_values(["Date", "Code"]).reset_index(drop=True)
    return df[cols]

  def get_weekly_interest_range(
    self,
    start_dt: datetime = datetime(2008, 1, 1, tzinfo=tz.gettz("Asia/Tokyo")),
    end_dt: datetime = datetime.now(tz.gettz("Asia/Tokyo"))
  ) -> pd.DataFrame:
    """
    全銘柄の信用取引週末残高情報を日付範囲指定して取得

    Args:
        start_dt: 取得開始日
        end_dt: 取得終了日

    Returns:
        pd.DataFrame: 信用取引週末残高情報
    """
    buff = []
    dates = pd.date_range(start_dt, end_dt, freq="D")
    counter = 1
    for s in dates:
      df = self.get_weekly_interest(date_yyyymmdd=s.strftime("%Y%m%d"))
      buff.append(df)
      # progress log
      if (counter % 100) == 0:
        print(f"{counter} / {len(dates)}")
      counter += 1
    if len(buff)==0:
        return None
    if len(buff)==1:
        df_return = buff[0]
        if len(df_return)==0:
            return None
        return df_return
    return pd.concat(buff).reset_index(drop=True)

  def get_short_selling(
      self,
      date_yyyymmdd: str = "",
  ) -> pd.DataFrame:
    """
    業種別空売り比率を取得
    Args:
        date_yyyymmdd: 取得日
    Returns:
        pd.DataFrame: 業種別空売り比率情報
    """
    url = f"{self.JQUANTS_API_BASE}/markets/short_selling"
    params = {}
    params["date"] = date_yyyymmdd
    ret = self._get(url, params)
    data = ret.json()["short_selling"]

    # https://jpx.gitbook.io/j-quants-ja/api-reference#resuponsunopjingunitsuite
    # 大容量データが返却された場合の再検索
    # データ量により複数ページ取得できる場合があるため、pagination_keyが含まれる限り、再検索を実施
    while "pagination_key" in ret.json():
      params["pagination_key"] = ret.json()["pagination_key"]
      ret = self._get(url, params)
      data += ret.json()["short_selling"]

    df = pd.DataFrame.from_dict(data)
    cols = [
        'Date',
        'Sector33Code',
        "SellingExcludingShortSellingTurnoverValue",
        "ShortSellingWithRestrictionsTurnoverValue",
        "ShortSellingWithoutRestrictionsTurnoverValue",
    ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, "Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
    df = df.sort_values(["Date", "Sector33Code"]).reset_index(drop=True)
    return df[cols]

  def get_short_selling_range(
    self,
    start_dt: datetime = datetime(2008, 1, 1, tzinfo=tz.gettz("Asia/Tokyo")),
    end_dt: datetime = datetime.now(tz.gettz("Asia/Tokyo"))
  ) -> pd.DataFrame:
    """
    全銘柄の業種別空売り比率情報を日付範囲指定して取得

    Args:
        start_dt: 取得開始日
        end_dt: 取得終了日

    Returns:
        pd.DataFrame: 業種別空売り比率情報
    """
    buff = []
    dates = pd.date_range(start_dt, end_dt, freq="D")
    counter = 1
    for s in dates:
      df = self.get_short_selling(date_yyyymmdd=s.strftime("%Y%m%d"))
      buff.append(df)
      # progress log
      if (counter % 100) == 0:
        print(f"{counter} / {len(dates)}")
      counter += 1
    if len(buff)==0:
        return None
    if len(buff)==1:
        df_return = buff[0]
        if len(df_return)==0:
            return None
        return df_return
    return pd.concat(buff).reset_index(drop=True)

  def get_markets_breakdown(
      self,
      date_yyyymmdd: str = "",
  ) -> pd.DataFrame:
    """
    売買内訳データを取得
    Args:
        date_yyyymmdd: 取得日
    Returns:
        pd.DataFrame: 売買内訳データ
    """
    url = f"{self.JQUANTS_API_BASE}/markets/breakdown"
    params = {}
    params["date"] = date_yyyymmdd
    ret = self._get(url, params)
    data = ret.json()["breakdown"]

    # https://jpx.gitbook.io/j-quants-ja/api-reference#resuponsunopjingunitsuite
    # 大容量データが返却された場合の再検索
    # データ量により複数ページ取得できる場合があるため、pagination_keyが含まれる限り、再検索を実施
    while "pagination_key" in ret.json():
      params["pagination_key"] = ret.json()["pagination_key"]
      ret = self._get(url, params)
      data += ret.json()["breakdown"]

    df = pd.DataFrame.from_dict(data)
    cols = [
        'Date',
        'Code',
        "LongSellValue",
        "ShortSellWithoutMarginValue",
        "MarginSellNewValue",
        "MarginSellCloseValue",
        "LongBuyValue",
        "MarginBuyNewValue",
        "MarginBuyCloseValue",
        "LongSellVolume",
        "ShortSellWithoutMarginVolume",
        "MarginSellNewVolume",
        "MarginSellCloseVolume",
        "LongBuyVolume",
        "MarginBuyNewVolume",
        "MarginBuyCloseVolume"
    ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, "Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
    df = df.sort_values(["Date", "Code"]).reset_index(drop=True)
    return df[cols]

  def get_markets_breakdown_range(
    self,
    start_dt: datetime = datetime(2008, 1, 1, tzinfo=tz.gettz("Asia/Tokyo")),
    end_dt: datetime = datetime.now(tz.gettz("Asia/Tokyo"))
  ) -> pd.DataFrame:
    """
    全銘柄の売買内訳データを日付範囲指定して取得

    Args:
        start_dt: 取得開始日
        end_dt: 取得終了日

    Returns:
        pd.DataFrame: 売買内訳データ情報
    """
    buff = []
    dates = pd.date_range(start_dt, end_dt, freq="D")
    counter = 1
    for s in dates:
      df = self.get_markets_breakdown(date_yyyymmdd=s.strftime("%Y%m%d"))
      buff.append(df)
      # progress log
      if (counter % 100) == 0:
        print(f"{counter} / {len(dates)}")
      counter += 1
    if len(buff)==0:
        return None
    if len(buff)==1:
        df_return = buff[0]
        if len(df_return)==0:
            return None
        return df_return
    return pd.concat(buff).reset_index(drop=True)

  def get_topix(
    self,
    start_dt: datetime = datetime(2008, 1, 1, tzinfo=tz.gettz("Asia/Tokyo")),
    end_dt: datetime = datetime.now(tz.gettz("Asia/Tokyo"))
  ) -> pd.DataFrame:
    """
    TOPIX指数データを日付範囲指定して取得

    Args:
        start_dt: 取得開始日
        end_dt: 取得終了日

    Returns:
        pd.DataFrame: TOPIX指数データ
    """
    url = f"{self.JQUANTS_API_BASE}/indices/topix"
    params = {}
    params["from"] = start_dt.strftime("%Y%m%d")
    params["to"] = end_dt.strftime("%Y%m%d")
    ret = self._get(url, params)
    data = ret.json()["topix"]

    # https://jpx.gitbook.io/j-quants-ja/api-reference#resuponsunopjingunitsuite
    # 大容量データが返却された場合の再検索
    # データ量により複数ページ取得できる場合があるため、pagination_keyが含まれる限り、再検索を実施
    while "pagination_key" in ret.json():
      params["pagination_key"] = ret.json()["pagination_key"]
      ret = self._get(url, params)
      data += ret.json()["topix"]

    df = pd.DataFrame.from_dict(data)
    cols = [
        'Date',
        'Open',
        'High',
        'Low',
        'Close'
    ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, "Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
    df = df.sort_values("Date").reset_index(drop=True)
    return df[cols]

  def get_dividend(
      self,
      date_yyyymmdd: str = "",
  ) -> pd.DataFrame:
    """
    配当金情報を取得
    Args:
        date_yyyymmdd: 取得日
    Returns:
        pd.DataFrame: 配当金情報
    """
    url = f"{self.JQUANTS_API_BASE}/fins/dividend"
    params = {}
    params["date"] = date_yyyymmdd
    ret = self._get(url, params)
    data = ret.json()["dividend"]

    # https://jpx.gitbook.io/j-quants-ja/api-reference#resuponsunopjingunitsuite
    # 大容量データが返却された場合の再検索
    # データ量により複数ページ取得できる場合があるため、pagination_keyが含まれる限り、再検索を実施
    while "pagination_key" in ret.json():
      params["pagination_key"] = ret.json()["pagination_key"]
      ret = self._get(url, params)
      data += ret.json()["breakdown"]

    df = pd.DataFrame.from_dict(data)
    cols = [
        'AnnouncementDate',
        'AnnouncementTime',
        'Code',
        'ReferenceNumber',
        'StatusCode',
        'BoardMeetingDate',
        'InterimFinalCode',
        'ForecastResultCode',
        'InterimFinalTerm',
        'GrossDividendRate',
        'RecordDate',
        'ExDate',
        'ActualRecordDate',
        'PayableDate',
        'CAReferenceNumber',
        'DistributionAmount',
        'RetainedEarnings',
        'DeemedDividend',
        'DeemedCapitalGains',
        'NetAssetDecreaseRatio',
        'CommemorativeSpecialCode',
        'CommemorativeDividendRate',
        'SpecialDividendRate'
    ]
    if len(df) == 0:
      return pd.DataFrame([], columns=cols)
    df.loc[:, "AnnouncementDate"] = pd.to_datetime(df["AnnouncementDate"], format="%Y-%m-%d")
    df = df.sort_values(["AnnouncementDate", "Code"]).reset_index(drop=True)
    return df[cols]

  def get_dividend_range(
    self,
    start_dt: datetime = datetime(2008, 1, 1, tzinfo=tz.gettz("Asia/Tokyo")),
    end_dt: datetime = datetime.now(tz.gettz("Asia/Tokyo"))
  ) -> pd.DataFrame:
    """
    配当金情報を日付範囲指定して取得

    Args:
        start_dt: 取得開始日
        end_dt: 取得終了日

    Returns:
        pd.DataFrame: 配当金情報
    """
    buff = []
    dates = pd.date_range(start_dt, end_dt, freq="D")
    counter = 1
    for s in dates:
      df = self.get_dividend(date_yyyymmdd=s.strftime("%Y%m%d"))
      buff.append(df)
      # progress log
      if (counter % 100) == 0:
        print(f"{counter} / {len(dates)}")
      counter += 1
    if len(buff)==0:
        return None
    if len(buff)==1:
        df_return = buff[0]
        if len(df_return)==0:
            return None
        return df_return
    return pd.concat(buff).reset_index(drop=True)

In [6]:
df_id = pd.read_csv(PATH_ID, index_col=0)
address = df_id.at["address", "value"]
passcode = df_id.at["pass", "value"]
jqapi = JQuantsAPIDemo(address=address, passcode=passcode)

# 銘柄情報の更新

In [None]:
if not FETCH_ALL_DATA:
    # 古いやつのパスを取得しておく（新しいのを保存してから消す）
    if LIGHT_PLAN:
        path_list = glob.glob(RAW + f"/market_data/20240213_ライトプラン/list_*.pkl")
    else:
        path_list = glob.glob(RAW + f"/market_data/list_*.pkl")
    assert len(path_list) == 1
    df_list = pd.read_pickle(path_list[0])
    # 20240210 Codeに文字列が入るようになったのでstr型に変更
    df_list["Code"] = df_list["Code"].astype(str)
    # 以下は対応済み
    # if LIGHT_PLAN:
    #     # lightプランだと無い列があるので消しておく
    #     df_list.drop(['MarginCode', 'MarginCodeName'], axis=1, inplace=True)
    start_dt = pd.Timestamp(df_list["Date"].iloc[-1], tz="Asia/Tokyo")
else:
    start_dt = pd.Timestamp(year=2008, month=1, day=1, tz="Asia/Tokyo")

end_dt = pd.Timestamp.now(tz="Asia/Tokyo")

if LIGHT_PLAN:
    newFileName_list = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/20240213_ライトプラン/list_20080101_{save_date}_light.pkl"
    df_l = jqapi.get_list_range(start_dt=start_dt, end_dt=end_dt, light_plan=True)
else:
    newFileName_list = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/list_20080101_{save_date}.pkl"
    df_l = jqapi.get_list_range(start_dt=start_dt, end_dt=end_dt)

if not FETCH_ALL_DATA:
    if df_l is None:
        print(f"There's no new data. ({start_dt.strftime('%Y%m%d')} to {end_dt.strftime('%Y%m%d')})")
    else:
        df_l["Code"] = df_l["Code"].astype(str)
        display(df_l.head(3))
        df_list = pd.concat((df_list, df_l)).drop_duplicates(subset=["Code", "Date"], keep="last").sort_values(by="Date").reset_index(drop=True)
        df_list = reduce_mem_usage(df_list)
        df_list.to_pickle(newFileName_list)
        print(f"save file: {newFileName_list}")
        # 古いファイル消す
        # if not LIGHT_PLAN:
        os.remove(path_list[0])
        print(f"removed old file: {path_list[0]}")
        del df_list

else:
    df_l = reduce_mem_usage(df_l)
    df_l.to_pickle(newFileName_list)
    print(f"save file: {newFileName_list}")

There's no new data. (20240213 to 20240215)


In [None]:
df_l

In [None]:
del df_l
gc.collect()

0

# 株価データの更新

In [None]:
### ToDo upperLimitとlowerLimitを取得するようにする

if not FETCH_ALL_DATA:
    # 古いファイルのパスを取得
    if LIGHT_PLAN:
        path_price = glob.glob(RAW + f"/market_data/20240213_ライトプラン/price_20080101*.pkl")
    else:
        path_price = glob.glob(RAW + f"/market_data/price_20080101*.pkl")
    assert len(path_price) == 1
    df_price = pd.read_pickle(path_price[0])
    # 20240210 Codeに文字列が入るようになったのでstr型に変更
    df_price["Code"] = df_price["Code"].astype(str)
    if LIGHT_PLAN:
        cols = [
            "Date",
            "Code",
            "Open",
            "High",
            "Low",
            "Close",
            "Volume",
            "TurnoverValue",
            "AdjustmentFactor",
            "AdjustmentOpen",
            "AdjustmentHigh",
            "AdjustmentLow",
            "AdjustmentClose",
            "AdjustmentVolume",
        ]
        # lightプランだと無い列があるので消しておく
        df_price = df_price[cols]
    start_dt = pd.Timestamp(df_price["Date"].iloc[-1], tz="Asia/Tokyo")
else:
    start_dt = pd.Timestamp(year=2015, month=1, day=1, tz="Asia/Tokyo")

end_dt = pd.Timestamp.now(tz="Asia/Tokyo")
if end_dt.hour < 19:
    # データ更新時間前の場合は日付を1日ずらします。
    end_dt -= pd.Timedelta(1, unit="D")

if LIGHT_PLAN:
    newFileName_price = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/20240213_ライトプラン/price_20080101_{save_date}_light.pkl"
    df_p = jqapi.get_price_range(start_dt=start_dt, end_dt=end_dt, premium_plan=False)
    # 後で修正
    cols_del = [
        "UpperLimit",
        "LowerLimit"
    ]
    df_p.drop(cols_del, axis=1, inplace=True)
else:
    newFileName_price = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/price_20080101_{save_date}.pkl"
    df_p = jqapi.get_price_range(start_dt=start_dt, end_dt=end_dt)
    # 後で修正
    cols_del = [
        "UpperLimit",
        "LowerLimit",
        "MorningUpperLimit",
        "MorningLowerLimit",
        "AfternoonUpperLimit",
        "AfternoonLowerLimit",
    ]
    df_p.drop(cols_del, axis=1, inplace=True)

if df_p is not None:
    for col in df_p.columns:
        if col in ["Date", "Code"]:
            continue
        df_p[col] = pd.to_numeric(df_p[col], errors = 'coerce')
    display(df_p.head(3))

Unnamed: 0,Date,Code,Open,High,Low,Close,Volume,TurnoverValue,AdjustmentFactor,AdjustmentOpen,AdjustmentHigh,AdjustmentLow,AdjustmentClose,AdjustmentVolume
0,2024-02-14,13010,3650.0,3675.0,3550.0,3550.0,1266200.0,4589907000.0,1.0,3650.0,3675.0,3550.0,3550.0,1266200.0
1,2024-02-14,13050,2746.5,2746.5,2721.0,2734.0,165230.0,451936300.0,1.0,2746.5,2746.5,2721.0,2734.0,165230.0
2,2024-02-14,13060,2718.0,2718.5,2693.0,2703.0,1342040.0,3629221000.0,1.0,2718.0,2718.5,2693.0,2703.0,1342040.0


In [None]:
if not FETCH_ALL_DATA:
    if df_p is None:
        print(f"There's no new data. ({start_dt.strftime('%Y%m%d')} to {end_dt.strftime('%Y%m%d')})")
    else:
        df_price = pd.concat((df_price, df_p)).drop_duplicates(subset=["Code", "Date"], keep="last").sort_values(by="Date").reset_index(drop=True)
        df_price = reduce_mem_usage(df_price)
        df_price.to_pickle(newFileName_price)
        print(f"save file: {newFileName_price}")
        # if not LIGHT_PLAN:
        # 古いファイル消す
        os.remove(path_price[0])
        print(f"removed old file: {path_price[0]}")
        del df_price

else:
    df_p = reduce_mem_usage(df_p)
    df_p.to_pickle(newFileName_price)
    print(f"save file: {newFileName_price}")

Memory usage of dataframe is 1426.95 MB
Memory usage after optimization is: 815.40 MB
Decreased by 42.9%
save file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/20240213_ライトプラン/price_20080101_20240215_light.pkl
removed old file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/20240213_ライトプラン/price_20080101_20240214_light.pkl


In [None]:
df_p

Unnamed: 0,Date,Code,Open,High,Low,Close,Volume,TurnoverValue,AdjustmentFactor,AdjustmentOpen,AdjustmentHigh,AdjustmentLow,AdjustmentClose,AdjustmentVolume
0,2024-02-14,13010,3650.0,3675.0,3550.0,3550.0,1266200.0,4.589907e+09,1.0,3650.0,3675.0,3550.0,3550.0,1266200.0
1,2024-02-14,13050,2746.5,2746.5,2721.0,2734.0,165230.0,4.519363e+08,1.0,2746.5,2746.5,2721.0,2734.0,165230.0
2,2024-02-14,13060,2718.0,2718.5,2693.0,2703.0,1342040.0,3.629221e+09,1.0,2718.0,2718.5,2693.0,2703.0,1342040.0
3,2024-02-14,13080,2685.0,2685.0,2660.0,2671.0,233560.0,6.243198e+08,1.0,2685.0,2685.0,2660.0,2671.0,233560.0
4,2024-02-14,13090,37500.0,38250.0,37100.0,38250.0,137.0,5.158420e+06,1.0,37500.0,38250.0,37100.0,38250.0,137.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8672,2024-02-15,99930,1265.0,1269.0,1262.0,1262.0,8000.0,1.011250e+07,1.0,1265.0,1269.0,1262.0,1262.0,8000.0
8673,2024-02-15,99940,3335.0,3335.0,3250.0,3250.0,17100.0,5.604150e+07,1.0,3335.0,3335.0,3250.0,3250.0,17100.0
8674,2024-02-15,99950,748.0,749.0,747.0,747.0,460500.0,3.443827e+08,1.0,748.0,749.0,747.0,747.0,460500.0
8675,2024-02-15,99960,1980.0,1980.0,1940.0,1957.0,1600.0,3.149200e+06,1.0,1980.0,1980.0,1940.0,1957.0,1600.0


In [None]:
del df_p
gc.collect()

31

# 財務データの更新

In [None]:
if not FETCH_ALL_DATA:
    # 古いファイルのパスを取得
    path_states = glob.glob(RAW + f"/market_data/statements_20080101*.pkl")
    assert len(path_states) == 1
    df_states = pd.read_pickle(path_states[0])
    start_dt = pd.Timestamp(df_states["DisclosedDate"].iloc[-1], tz="Asia/Tokyo")

else:
    start_dt = pd.Timestamp(year=2015, month=1, day=1, tz="Asia/Tokyo")

end_dt = pd.Timestamp.now(tz="Asia/Tokyo")
if end_dt.hour < 19:
  # データ更新時間前の場合は日付を1日ずらします。
  end_dt -= pd.Timedelta(1, unit="D")
newFileName_states = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/statements_20080101_{end_dt.strftime('%Y%m%d')}.pkl"

cache_dir = "/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/raw_statements"
df_s = jqapi.get_statements_range(start_dt=start_dt, end_dt=end_dt, cache_dir=cache_dir)

In [None]:
#### 後でやる：数字のデータの部分をここで修正する

if not FETCH_ALL_DATA:
    if df_s is None:
        print(f"There's no new data. ({start_dt.strftime('%Y%m%d')} to {end_dt.strftime('%Y%m%d')})")
    else:
        df_states = pd.concat((df_states, df_s))
        # 一部object型になる。cashのせい？修正する。
        df_states["DisclosureNumber"] = pd.to_numeric(df_states["DisclosureNumber"], errors="coerce")
        df_states["DisclosedDate"] = pd.to_datetime(df_states["DisclosedDate"], format="%Y-%m-%d")
        df_states = df_states.drop_duplicates(subset="DisclosureNumber", keep="last").sort_values(by="DisclosedDate").reset_index(drop=True)
        df_states.to_pickle(newFileName_states)
        print(f"save file: {newFileName_states}")
        # 古いファイル消す
        os.remove(path_states[0])
        print(len(df_states["DisclosureNumber"].unique()))
        print(len(df_states))
        print(f"removed old file: {path_states[0]}")
        del df_states

else:
    df_s.to_pickle(newFileName_states)
    print(f"save file: {newFileName_states}")
    print(len(df_s["DisclosureNumber"].unique()))
    print(len(df_s))

save file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/statements_20080101_20230801.pkl
303541
303541
removed old file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/statements_20080101_20230729.pkl


In [None]:
del df_s
gc.collect()

0

# オプションデータの更新

In [None]:
if not FETCH_ALL_DATA:
    # 古いファイルのパスを取得
    path_options = glob.glob(RAW + f"/market_data/options_20080101*.pkl")
    assert len(path_options) == 1
    df_options = pd.read_pickle(path_options[0])
    start_dt = pd.Timestamp(df_options["Date"].iloc[-1], tz="Asia/Tokyo")
else:
    start_dt = pd.Timestamp(year=2015, month=1, day=1, tz="Asia/Tokyo")

end_dt = pd.Timestamp.now(tz="Asia/Tokyo")
if end_dt.hour < 19:
  # データ更新時間前の場合は日付を1日ずらします。
  end_dt -= pd.Timedelta(1, unit="D")
newFileName_options = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/options_20080101_{end_dt.strftime('%Y%m%d')}.pkl"

df_o = jqapi.get_options_range(start_dt=start_dt, end_dt=end_dt)

In [None]:
if not FETCH_ALL_DATA:
    if df_o is None:
        print(f"There's no new data. ({start_dt.strftime('%Y%m%d')} to {end_dt.strftime('%Y%m%d')})")
    else:
        df_options = pd.concat((df_options, df_o)).drop_duplicates(subset=["Code", "Date"], keep="last").sort_values(by="Date").reset_index(drop=True)
        df_options.to_pickle(newFileName_options)
        print(f"save file: {newFileName_options}")
        # 古いファイル消す
        os.remove(path_options[0])
        print(f"removed old file: {path_options[0]}")
        del df_options

else:
    df_o.to_pickle(newFileName_options)
    print(f"save file: {newFileName_options}")

save file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/options_20080101_20230801.pkl
removed old file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/options_20080101_20230729.pkl


In [None]:
del df_o
gc.collect()

0

# 投資部門別情報の取得

全期間取得する

In [None]:
if not FETCH_ALL_DATA:
    # 古いファイルのパスを取得
    path_trades_spec = glob.glob(RAW + f"/market_data/trades_spec_*.pkl")
    assert len(path_trades_spec) == 1
    # 古いファイル消す
    os.remove(path_trades_spec[0])
    print(f"removed old file: {path_trades_spec[0]}")
df_trades = jqapi.get_markets()
end_dt = pd.Timestamp.now(tz="Asia/Tokyo")
newFileName_trades = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/trades_spec_{end_dt.strftime('%Y%m%d')}.pkl"
df_trades.to_pickle(newFileName_trades)
print(f"save file: {newFileName_trades}")

removed old file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/trades_spec_20230730.pkl
save file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/trades_spec_20230801.pkl


In [None]:
df_trades.head()

Unnamed: 0,PublishedDate,StartDate,EndDate,Section,ProprietarySales,ProprietaryPurchases,ProprietaryTotal,ProprietaryBalance,BrokerageSales,BrokeragePurchases,...,CityBKsRegionalBKsEtcTotal,CityBKsRegionalBKsEtcBalance,TrustBanksSales,TrustBanksPurchases,TrustBanksTotal,TrustBanksBalance,OtherFinancialInstitutionsSales,OtherFinancialInstitutionsPurchases,OtherFinancialInstitutionsTotal,OtherFinancialInstitutionsBalance
0,2008-01-16,2008-01-04,2008-01-04,TokyoNagoya,536890700.0,338191000.0,875081700.0,-198699702.0,1257939000.0,1445994000.0,...,4585894.0,3025604.0,10702321.0,24622811.0,35325132.0,13920490.0,601599.0,2627530.0,3229129.0,2025931.0
1,2008-01-16,2008-01-04,2008-01-04,TSE1st,511187300.0,322802100.0,833989400.0,-188385246.0,1165763000.0,1341989000.0,...,4518098.0,2958192.0,10386990.0,23960842.0,34347832.0,13573852.0,601599.0,2513675.0,3115274.0,1912076.0
2,2008-01-16,2008-01-04,2008-01-04,TSEMothers,1250579.0,1421602.0,2672181.0,171023.0,22077000.0,21814760.0,...,135.0,135.0,16884.0,91333.0,108217.0,74449.0,0.0,141.0,141.0,141.0
3,2008-01-16,2008-01-04,2008-01-04,TSE2nd,195690.0,196453.0,392143.0,763.0,4034952.0,4055644.0,...,9172.0,8788.0,82701.0,94015.0,176716.0,11314.0,0.0,7740.0,7740.0,7740.0
4,2008-01-21,2008-01-07,2008-01-11,TokyoNagoya,3899717000.0,3444936000.0,7344653000.0,-454781888.0,9613852000.0,10055580000.0,...,19668030.0,11622192.0,415672498.0,535492334.0,951164832.0,119819836.0,8775056.0,10661666.0,19436722.0,1886610.0


In [None]:
del df_trades
gc.collect()

0

# 信用取引週末残高の更新

In [None]:
if not FETCH_ALL_DATA:
    # 古いファイルのパスを取得
    path_weekly_interest = glob.glob(RAW + f"/market_data/weekly_interest_20080101*.pkl")
    assert len(path_weekly_interest) == 1
    df_weekly_interest = pd.read_pickle(path_weekly_interest[0])
    start_dt = pd.Timestamp(df_weekly_interest["Date"].iloc[-1], tz="Asia/Tokyo")
else:
    start_dt = pd.Timestamp(year=2008, month=1, day=1, tz="Asia/Tokyo")

end_dt = pd.Timestamp.now(tz="Asia/Tokyo")
if end_dt.hour < 19:
  # データ更新時間前の場合は日付を1日ずらします。
  end_dt -= pd.Timedelta(1, unit="D")
newFileName_weekly_interest = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/weekly_interest_20080101_{end_dt.strftime('%Y%m%d')}.pkl"

df_wi = jqapi.get_weekly_interest_range(start_dt=start_dt, end_dt=end_dt)

In [None]:
if not FETCH_ALL_DATA:
    if df_wi is None:
        print(f"There's no new data. ({start_dt.strftime('%Y%m%d')} to {end_dt.strftime('%Y%m%d')})")
    else:
        df_weekly_interest = pd.concat((df_weekly_interest, df_wi)).drop_duplicates(subset=["Code", "Date"], keep="last").sort_values(by="Date").reset_index(drop=True)
        df_weekly_interest.to_pickle(newFileName_weekly_interest)
        print(f"save file: {newFileName_weekly_interest}")
        # 古いファイル消す
        os.remove(path_weekly_interest[0])
        print(f"removed old file: {path_weekly_interest[0]}")
        del df_weekly_interest

else:
    df_wi.to_pickle(newFileName_weekly_interest)
    print(f"save file: {newFileName_weekly_interest}")

save file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/weekly_interest_20080101_20230801.pkl
removed old file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/weekly_interest_20080101_20230729.pkl


In [None]:
del df_wi
gc.collect()

0

# 業種別空売り比率の更新

In [None]:
if not FETCH_ALL_DATA:
    # 古いファイルのパスを取得
    path_short_selling = glob.glob(RAW + f"/market_data/short_selling_20080101*.pkl")
    assert len(path_short_selling) == 1
    df_short_selling = pd.read_pickle(path_short_selling[0])
    start_dt = pd.Timestamp(df_short_selling["Date"].iloc[-1], tz="Asia/Tokyo")
else:
    start_dt = pd.Timestamp(year=2008, month=1, day=1, tz="Asia/Tokyo")

end_dt = pd.Timestamp.now(tz="Asia/Tokyo")
if end_dt.hour < 19:
  # データ更新時間前の場合は日付を1日ずらします。
  end_dt -= pd.Timedelta(1, unit="D")
newFileName_short_selling = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/short_selling_20080101_{end_dt.strftime('%Y%m%d')}.pkl"

df_ss = jqapi.get_short_selling_range(start_dt=start_dt, end_dt=end_dt)

In [None]:
if not FETCH_ALL_DATA:
    if df_ss is None:
        print(f"There's no new data. ({start_dt.strftime('%Y%m%d')} to {end_dt.strftime('%Y%m%d')})")
    else:
        df_short_selling = pd.concat((df_short_selling, df_ss)).drop_duplicates(subset=["Sector33Code", "Date"], keep="last").sort_values(by="Date").reset_index(drop=True)
        df_short_selling.to_pickle(newFileName_short_selling)
        print(f"save file: {newFileName_short_selling}")
        # 古いファイル消す
        os.remove(path_short_selling[0])
        print(f"removed old file: {path_short_selling}")
        del df_short_selling

else:
    df_ss.to_pickle(newFileName_short_selling)
    print(f"save file: {newFileName_short_selling}")

save file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/short_selling_20080101_20230801.pkl
removed old file: ['/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/short_selling_20080101_20230729.pkl']


In [None]:
del df_ss
gc.collect()

0

# 売買内訳データの更新

In [None]:
if not FETCH_ALL_DATA:
    # 古いファイルのパスを取得
    path_markets_breakdown = glob.glob(RAW + f"/market_data/markets_breakdown_20080101*.pkl")
    assert len(path_markets_breakdown) == 1
    df_markets_breakdown = pd.read_pickle(path_markets_breakdown[0])
    start_dt = pd.Timestamp(df_markets_breakdown["Date"].iloc[-1], tz="Asia/Tokyo")
else:
    start_dt = pd.Timestamp(year=2008, month=1, day=1, tz="Asia/Tokyo")

end_dt = pd.Timestamp.now(tz="Asia/Tokyo")
if end_dt.hour < 19:
  # データ更新時間前の場合は日付を1日ずらします。
  end_dt -= pd.Timedelta(1, unit="D")
newFileName_markets_breakdown = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/markets_breakdown_20080101_{end_dt.strftime('%Y%m%d')}.pkl"

df_mb = jqapi.get_markets_breakdown_range(start_dt=start_dt, end_dt=end_dt)

In [None]:
if not FETCH_ALL_DATA:
    if df_mb is None:
        print(f"There's no new data. ({start_dt.strftime('%Y%m%d')} to {end_dt.strftime('%Y%m%d')})")
    df_markets_breakdown = pd.concat((df_markets_breakdown, df_mb)).drop_duplicates(subset=["Code", "Date"], keep="last").sort_values(by="Date").reset_index(drop=True)
    df_markets_breakdown.to_pickle(newFileName_markets_breakdown)
    print(f"save file: {newFileName_markets_breakdown}")
    # 古いファイル消す
    os.remove(path_markets_breakdown[0])
    print(f"removed old file: {path_markets_breakdown[0]}")
    del df_markets_breakdown

else:
    df_mb.to_pickle(newFileName_markets_breakdown)
    print(f"save file: {newFileName_markets_breakdown}")

save file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/markets_breakdown_20080101_20230801.pkl
removed old file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/markets_breakdown_20080101_20230729.pkl


In [None]:
del df_mb
gc.collect()

0

# TOPIX指数の更新

In [7]:
if not FETCH_ALL_DATA:
    # 古いファイルのパスを取得
    path_topix = glob.glob(RAW + f"/market_data/topix_20080101*.pkl")
    assert len(path_topix) == 1
    df_topix = pd.read_pickle(path_topix[0])
    start_dt = pd.Timestamp(df_topix["Date"].iloc[-1], tz="Asia/Tokyo")
else:
    start_dt = pd.Timestamp(year=2008, month=1, day=1, tz="Asia/Tokyo")

end_dt = pd.Timestamp.now(tz="Asia/Tokyo")
if end_dt.hour < 19:
  # データ更新時間前の場合は日付を1日ずらします。
  end_dt -= pd.Timedelta(1, unit="D")
newFileName_topix = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/topix_20080101_{end_dt.strftime('%Y%m%d')}.pkl"

df_to = jqapi.get_topix(start_dt=start_dt, end_dt=end_dt)

In [8]:
if not FETCH_ALL_DATA:
    if df_to is None:
        print(f"There's no new data. ({start_dt.strftime('%Y%m%d')} to {end_dt.strftime('%Y%m%d')})")
    else:
        df_topix = pd.concat((df_topix, df_to)).drop_duplicates(subset=["Date"], keep="last").sort_values(by="Date").reset_index(drop=True)
        df_topix.to_pickle(newFileName_topix)
        print(f"save file: {newFileName_topix}")
        # 古いファイル消す
        os.remove(path_topix[0])
        print(f"removed old file: {path_topix[0]}")
    del df_topix

else:
    df_to.to_pickle(newFileName_topix)
    print(f"save file: {newFileName_topix}")

save file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/topix_20080101_20240228.pkl
removed old file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/topix_20080101_20230801.pkl


In [9]:
del df_to
gc.collect()

0

# 配当金情報の更新

In [None]:
if not FETCH_ALL_DATA:
    # 古いファイルのパスを取得
    path_dividend = glob.glob(RAW + f"/market_data/dividend_20080101*.pkl")
    assert len(path_dividend) == 1
    df_dividend = pd.read_pickle(path_dividend[0])
    start_dt = pd.Timestamp(df_dividend["AnnouncementDate"].iloc[-1], tz="Asia/Tokyo")
else:
    start_dt = pd.Timestamp(year=2008, month=1, day=1, tz="Asia/Tokyo")

end_dt = pd.Timestamp.now(tz="Asia/Tokyo")
if end_dt.hour < 19:
  # データ更新時間前の場合は日付を1日ずらします。
  end_dt -= pd.Timedelta(1, unit="D")
newFileName_dividend = f"/content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/dividend_20080101_{end_dt.strftime('%Y%m%d')}.pkl"

df_di = jqapi.get_dividend_range(start_dt=start_dt, end_dt=end_dt)

df_di

Unnamed: 0,AnnouncementDate,AnnouncementTime,Code,ReferenceNumber,StatusCode,BoardMeetingDate,InterimFinalCode,ForecastResultCode,InterimFinalTerm,GrossDividendRate,...,PayableDate,CAReferenceNumber,DistributionAmount,RetainedEarnings,DeemedDividend,DeemedCapitalGains,NetAssetDecreaseRatio,CommemorativeSpecialCode,CommemorativeDividendRate,SpecialDividendRate
0,2023-07-28,15:11,17240,202307281B00019,1,2023-07-28,1,1,2023-06,6.0,...,2023-08-30,202307281B00019,,,,,,0,,
1,2023-07-28,15:08,22110,202307281B00016,2,2023-07-28,2,2,2023-12,30.0,...,-,202302131B00069,,,,,,0,,
2,2023-07-28,15:08,22110,202307281B00017,1,2023-07-28,1,1,2023-06,0.0,...,,202307281B00017,,,,,,0,,
3,2023-07-28,15:30,22180,202307281B00034,2,2023-07-28,2,1,2023-03,15.0,...,2023-06-30,202305301B00003,,,,,,0,,
4,2023-07-28,15:30,22180,202307281B00035,1,2023-07-28,2,2,2024-03,15.0,...,-,202307281B00035,,,,,,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,2023-08-01,16:03,79620,202308011B00018,1,2023-08-01,2,1,2023-06,7.0,...,2023-09-15,202308011B00018,,,,,,0,,
141,2023-08-01,08:57,88870,202308011B00002,1,2023-07-31,1,2,2023-11,20.0,...,-,202308011B00002,,,,,,0,,
142,2023-08-01,08:57,88870,202308011B00003,1,2023-07-31,2,2,2024-05,20.0,...,-,202308011B00003,,,,,,0,,
143,2023-08-01,08:57,88870,202308011B00001,1,2023-07-31,2,1,2023-05,20.0,...,2023-08-30,202308011B00001,,,,,,0,,


In [None]:
if not FETCH_ALL_DATA:
    if df_di is None:
        print(f"There's no new data. ({start_dt.strftime('%Y%m%d')} to {end_dt.strftime('%Y%m%d')})")
    else:
        df_dividend = pd.concat((df_dividend, df_di)).drop_duplicates(subset=["ReferenceNumber"], keep="last").sort_values(by="AnnouncementDate").reset_index(drop=True)
        df_dividend.to_pickle(newFileName_dividend)
        print(f"save file: {newFileName_dividend}")
        # 古いファイル消す
        os.remove(path_dividend[0])
        print(f"removed old file: {path_dividend[0]}")
        del df_dividend

else:
    df_di.to_pickle(newFileName_dividend)
    print(f"save file: {newFileName_dividend}")

save file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/dividend_20080101_20230801.pkl
removed old file: /content/drive/MyDrive/日本株/raw/J-QuantsAPI/market_data/dividend_20080101_20230729.pkl


In [None]:
del df_di
gc.collect()

15