In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import glob
import time
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from preprocess import *
from utils import *
from edinet import *

In [None]:
pd.set_option('display.max_rows', 1500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

In [None]:
tickers = [
    1407,
    2471,
    2491,
    3038,
    3150,
    3697,
    3854,
    3923,
    4356,
    4880,
    4970,
    6099,
    6200,
    6254,
    6532,
    6544,
    6920,
    6951,
    6966,
    7033,
    7094,
    8919
]

labels = [
    '売上高_変化率(前年比)',
    '経常利益_変化率(前年比)',
    '純資産額_変化率(前年比)',
    'ROE（自己資本利益率）_変化率(前年比)',
    '期末残高_変化率(前年比)',
    '自己資本比率',
    'BPS（１株当たり純資産額）',
    'EPS（１株当たり当期純利益）',
    '営業CF',
    '投資CF',
    '財務CF',
    '営業CFマージン',
    '理論株価',
    '株価'
]

In [None]:
from edinet_xbrl.edinet_xbrl_parser import EdinetXbrlParser
from utils import read_ticker
from xbrl_to_csv import *
from taxonomy import *
from check_xbrl import *
from arelle import Cntlr


In [None]:
sec_code = 1407
parser = EdinetXbrlParser()
xbrl_paths = glob.glob(f"../data/raw/edinet/xbrl/四半期報告書/{sec_code}/*xbrl")

In [None]:
df_doc_summary = pd.read_csv("../data/preprocess/edinet/doc_all_summary.csv")
df_doc_summary["secCode"] = df_doc_summary["secCode"]/10
df_doc_summary["secCode"] = df_doc_summary["secCode"].astype(int)

In [None]:
doc_type_codes=['140','150']

for doc_type_code in doc_type_codes:
    doc_type_name = doc_type_code_dict[doc_type_code]
    print(doc_type_name)
    save_dir = f"../data/raw/edinet/xbrl/{doc_type_name}"
    df_doc = df_doc_summary[df_doc_summary["docTypeCode"]==int(doc_type_code)]
    sec_codes = list(set(df_doc["secCode"]))
    sec_codes.sort()
    for sec_code in sec_codes:
        df_sec = df_doc[df_doc["secCode"]==sec_code]
        for i in df_sec.index:
            date = df_sec.loc[i,'submitDateTime'][:10].split('-')
            xbrl_path = f"../data/raw/edinet/document/{date[0]}/{date[1]}/{date[2]}/{df_sec.loc[i,'docID']}/XBRL/PublicDoc/*.xbrl"
            glob_path = glob.glob(xbrl_path)
            print(glob_path)
            break
    break

In [None]:
glob_path

In [None]:
cnt = 0
consolidated_type = "Consolidated"
for xbrl_path in xbrl_paths:
    edinet_xbrl_object = parser.parse_file(xbrl_path)
    accounting_standards, current_period_end_date, type_of_current_period = get_accounting_data(edinet_xbrl_object, doc_type_code, sec_code)
    if not accounting_standards or not current_period_end_date:
        continue    
    else:
        year = int(current_period_end_date.split('-')[0])
        month = current_period_end_date.split('-')[1]
        _, taxonomy_dict = taxonomy_functions_dict[f"{accounting_standards}_{doc_type_name}_{consolidated_type}"]()
        financial_summary_dict = {}
        for label in taxonomy_dict.keys():
            key = taxonomy_dict[label]['key']
            context_refs = taxonomy_dict[label]['context_ref']
            financial_summary_dict[label] = get_values_from_xbrl_by_some_keys(edinet_xbrl_object, key, context_refs)
        df_finance = pd.DataFrame(financial_summary_dict).T
        cols = [f'{year-i}_{month}{type_of_current_period}' for i in reversed(range(0,5))]
        df_finance.columns = cols
    if cnt==0:
        df_finances = df_finance
        cnt += 1
    else:
        delete_cols = [c for c in df_finances.columns if c in cols]
        df_finances.drop(columns=delete_cols,inplace=True)
        df_finances = df_finances.join(df_finance)
if cnt >= 1:
    save_path = os.path.join(save_dir,consolidated_type,f'{sec_code}_{company_name}.csv')
    df_finances = df_finances.T

In [None]:
# save_path = f"../data/preprocess/edinet/etc/xbrl_summary/四半期報告書/{sec_code}.csv"
# get_data_from_xbrl(xbrl_path, save_path)

### Ananlysis stock price

In [None]:
df_price = pd.read_csv("../data/preprocess/stock_price/stock_price_summary.csv", index_col=0)
code_to_company_name = read_code_to_company_name()

In [None]:
# df_tmp = df_price[df_price['rate_of_increase_2020_min_2021_max'] > 400]
# print(len(df_tmp))
# df_tmp = df_price[df_price['rate_of_increase_2019_min_2021_max'] > 800]
# print(len(df_tmp))
# df_tmp = df_price[df_price['rate_of_increase_2018_min_2021_max'] > 1600]
print(len(df_tmp))
df_tmp = df_price[df_price['rate_of_increase_2017_min_2021_max'] > 1000]
# print(len(df_tmp))

tickers = df_tmp.index

# save_dir = "../result/chart/"
# for ticker in df_tmp.index:
#     companu_name = code_to_company_name[ticker]
#     print(ticker, companu_name, df_tmp.loc[ticker,'rate_of_increase_2019_min_2021_max'])
#     plot_stock_price(ticker, companu_name, save_dir=save_dir)

In [None]:
df_tmp['rate_of_increase_2017_min_2021_max']

In [None]:
df_company = pd.read_csv("../data/preprocess/etc/company_info.csv", index_col=1)

In [None]:
df_tmp = df_company.loc[tickers]

In [None]:
df_tmp[df_tmp['17業種区分']=='情報通信・サービスその他 ']

### Analysis Financial Summary

In [None]:
save_dir = "../result/financial_summary/"
for ticker in tickers:
    company_name = code_to_company_name[ticker]
    df = read_financial_summary_by_ticker(ticker)
    # plot_stock_price(ticker, company_name)
    title = f"{ticker}_{company_name}"
    plot_financial_summary(df, title, save_dir=save_dir)

### Evaluate Company

In [None]:
tickers = []
paths = glob.glob("../data/preprocess/edinet/有価証券報告書/Summary/*.csv")
for i,path in enumerate(paths):
    if i !=0 and i%1000 == 0:
        print(i)
    df = pd.read_csv(path, index_col=0)
    df_tmp = df["売上高_変化率(前年比)"]
    df_tmp.dropna(inplace=True)
    if len(df_tmp) >=3:
        if len(df_tmp[df_tmp > 100]) == len(df_tmp):
            ticker = path.split('/')[-1].split('_')[0]
            tickers.append(int(ticker))

In [None]:
print(f"total tickers: {len(tickers)}")
for ticker in tickers:
    company_name = code_to_company_name[int(ticker)]
    plot_stock_price(int(ticker), company_name)