In [1]:
import pandas as pd
import tqdm

In [2]:
import os
import json
import requests
import time

class JQuants():
    def __init__(self):
        self.rtoken = os.environ["REFRESH_TOKEN"]
        self.itoken, self.expiresin = self.get_idtoken()
        
    
    def get_idtoken(self):
        headers = {"accept": "application/json"}
        data = {"refresh-token": self.rtoken}

        response = requests.post(
            "https://api.jpx-jquants.com/refresh", headers=headers,
            data=json.dumps(data)
        )
 
        self.ref_time = time.time()
        
        #resjson = json.loads(response.text)
        resjson = response.json()
        return resjson["idToken"], resjson["expiresIn"]
    
    def _call_api(self, params, apitype, code=None):
        now = time.time()
        time_diff = now - self.ref_time
        if time_diff > (self.expiresin - 10):
            self.itoken, self.expiresin = self.get_idtoken()
            
        datefrom = params.get("datefrom", None)
        dateto = params.get("dateto", None)
        date = params.get("date", None)
        includedetails = params.get("includedetails", "false")
        keyword = params.get("keyword", None)
        headline = params.get("headline", None)
        paramcode = params.get("code", None)
        nexttoken = params.get("nextToken", None)
        headers = {"accept": "application/json", "Authorization": self.itoken}
        
            
        data = {
            "from": datefrom,
            "to": dateto,
            "includeDetails": includedetails,
            "nextToken": nexttoken,
            "date": date,
            "keyword": keyword,
            "headline": headline,
            "code": paramcode,
        }
        

        if code:
            code = "/" + str(code)
            r = requests.get(
                "https://api.jpx-jquants.com/" + apitype + code,
                params=data,
                headers=headers,
            )
        else:
            r = requests.get(
                "https://api.jpx-jquants.com/" + apitype, params=data, headers=headers
            )

        #resjson = json.loads(r.text)
        resjson = r.json()
        
        return resjson
    
    def get_dataframe(self, params, apitype, content_key, code=None, max_iters=100, sleep=1):
        import time
        import warnings
        
        dfs = []
        params = params.copy()
        #next_token = None
        for i in range(max_iters):
            #print(i)
            resjson = self._call_api(params, apitype, code)
            #print(resjson.keys())
            if content_key in resjson:
                res_df = pd.DataFrame.from_dict(resjson[content_key])
            else:
                raise KeyError(f"Error: {params} {resjson}")
            #display(res_df.shape[0])
            dfs.append(res_df)
            if "nextToken" in resjson:
                #print(resjson["nextToken"])
                #params = {}
                params["nextToken"] = resjson["nextToken"]
                
            else:
                break
            
            time.sleep(sleep)
        else:
            warnings.warn("reached to max_iters")
        df = pd.concat(dfs, axis=0, sort=True)
        return df
            



In [3]:
jq = JQuants()

In [4]:
paramdict = {}
paramdict["includedetails"] = "True"

ls_df = jq.get_dataframe(paramdict, "lists", "list")
ls_df.shape

(11139, 14)

In [5]:
#ls_df["prediction_target"] = ls_df.prediction_target.astype("bool")

In [6]:
ls_df.head()

Unnamed: 0,17 Sector(Code),17 Sector(name),33 Sector(Code),33 Sector(name),Effective Date,IssuedShareEquityQuote AccountingStandard,IssuedShareEquityQuote IssuedShare,IssuedShareEquityQuote ModifyDate,Local Code,Name (English),Section/Products,Size (New Index Series),Size Code (New Index Series),prediction_target
0,10.0,"IT & SERVICES, OTHERS",5250.0,Information & Communication,20201230,ConsolidatedJP,129268000.0,2020/10/26,3635,"KOEI TECMO HOLDINGS CO.,LTD.",First Section (Domestic),TOPIX Mid400,4,True
1,10.0,"IT & SERVICES, OTHERS",5250.0,Information & Communication,20210129,ConsolidatedJP,129268000.0,2021/01/25,3635,"KOEI TECMO HOLDINGS CO.,LTD.",First Section (Domestic),TOPIX Mid400,4,True
2,10.0,"IT & SERVICES, OTHERS",5250.0,Information & Communication,20210226,ConsolidatedJP,129268000.0,2021/01/25,3635,"KOEI TECMO HOLDINGS CO.,LTD.",First Section (Domestic),TOPIX Mid400,4,True
3,14.0,RETAIL TRADE,6100.0,Retail Trade,20201230,ConsolidatedJP,29859900.0,2020/10/30,9990,SAC'S BAR HOLDINGS INC.,First Section (Domestic),TOPIX Small 2,7,True
4,14.0,RETAIL TRADE,6100.0,Retail Trade,20210129,ConsolidatedJP,29859900.0,2021/01/29,9990,SAC'S BAR HOLDINGS INC.,First Section (Domestic),TOPIX Small 2,7,True


In [7]:
dates = pd.date_range("2016-01-01", "2021-03-26")
dates = dates[dates.dayofweek < 5].astype("str")
dates

Index(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06', '2016-01-07',
       '2016-01-08', '2016-01-11', '2016-01-12', '2016-01-13', '2016-01-14',
       ...
       '2021-03-15', '2021-03-16', '2021-03-17', '2021-03-18', '2021-03-19',
       '2021-03-22', '2021-03-23', '2021-03-24', '2021-03-25', '2021-03-26'],
      dtype='object', length=1366)

In [8]:
api_conts = {
    "stock_labels": ("stocklabels", "labels"),
    "stock_fin": ("stockfins", "stockfin"),
    "stock_price": ("prices", "prices"),
}

dfs = {k:pd.DataFrame() for k in api_conts.keys()}

for date in tqdm.tqdm(dates):
    #print(f"== [{i}] code:{code} ==")
    for k, v in api_conts.items():
        tmp_df = jq.get_dataframe({"includedetails": "True", "date": date}, v[0], v[1])
        dfs[k] = dfs[k].append(tmp_df, ignore_index=True)

100%|██████████| 1366/1366 [8:10:06<00:00, 24.22s/it]


In [9]:
dfs["stock_list"] = ls_df

In [10]:
dfs["stock_fin"]

Unnamed: 0,Forecast_Dividend AnnualDividendPerShare,Forecast_Dividend FiscalPeriodEnd,Forecast_Dividend FiscalYear,Forecast_Dividend ModifyDate,Forecast_Dividend QuarterlyDividendPerShare,Forecast_Dividend RecordDate,Forecast_Dividend ReportType,Forecast_FinancialStatement AccountingStandard,Forecast_FinancialStatement ChangeOfFiscalYearEnd,Forecast_FinancialStatement CompanyType,...,Result_FinancialStatement FiscalYear,Result_FinancialStatement ModifyDate,Result_FinancialStatement NetAssets,Result_FinancialStatement NetIncome,Result_FinancialStatement NetSales,Result_FinancialStatement OperatingIncome,Result_FinancialStatement OrdinaryIncome,Result_FinancialStatement ReportType,Result_FinancialStatement TotalAssets,base_date
0,90,2016/03,2016,2016/01/04,45,2016/03/31,Annual,ConsolidatedJP,False,GB,...,2016,2016/01/04,16962,1645,22354,2391,2466,Q3,21251,2016/01/04
1,65,2016/02,2016,2016/01/04,32.5,2016/02/29,Annual,ConsolidatedJP,False,GB,...,2016,2016/01/04,7145,697,21550,1038,1053,Q3,24949,2016/01/04
2,0,2016/03,2016,2016/01/04,0,2016/03/31,Annual,NonConsolidated,False,GB,...,,,,,,,,,,2016/01/04
3,0,2016/03,2016,2016/01/04,0,2016/03/31,Annual,ConsolidatedJP,False,GB,...,,,,,,,,,,2016/01/04
4,18,2016/03,2016,2016/01/04,18,2016/03/31,Annual,ConsolidatedJP,False,GB,...,2016,2016/01/04,30980,2685,14307,4518,4136,Q3,38498,2016/01/04
5,25,2016/05,2016,2016/01/05,14,2016/05/31,Annual,ConsolidatedJP,False,GB,...,2016,2016/01/05,15629,354,24881,491,546,Q2,33055,2016/01/05
6,36,2016/02,2016,2016/01/05,36,2016/02/29,Annual,ConsolidatedJP,False,GB,...,2016,2016/01/05,92277,6212,128716,10206,10451,Q3,118788,2016/01/05
7,0,2015/12,2015,2016/01/05,0,2015/12/31,Annual,ConsolidatedJP,False,GB,...,,,,,,,,,,2016/01/05
8,60,2016/03,2016,2016/01/05,60,2016/03/31,Annual,ConsolidatedJP,False,GB,...,,,,,,,,,,2016/01/05
9,12.5,2016/02,2016,2016/01/05,6.25,2016/02/29,Annual,NonConsolidated,False,GB,...,2016,2016/01/05,38984,-183,119395,118,872,Q3,96633,2016/01/05


In [11]:
import joblib
joblib.dump(
    dfs,
    "../tmp/dfs.joblib",
    compress=True
)

['../tmp/dfs.joblib']