In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive




#### NASDAQ, NYSE, AMEX, (stock only)

In [3]:
import pandas as pd
import numpy as np
import ssl
import certifi
from urllib.request import urlopen
import json
from tqdm import tqdm
from datetime import datetime, date
import calendar
from urllib.error import HTTPError
import time

In [4]:
def get_adata_annual(adata, data_name):
    data = adata.pivot_table(index='date', columns='ticker', values=data_name).astype('float')
    data = data.ffill(limit=11)  # Replace fillna(method='ffill') with ffill()
    data = data.loc[[x for x in data.index if x.month == 6]].reindex(data.index).ffill(limit=11).dropna(how='all')

    return data

def get_last_quarter_end():
    # 현재 날짜 가져오기
    today = date.today()

    # 분기 마지막 월 정의 (Q1: 3월, Q2: 6월, Q3: 9월, Q4: 12월)
    quarter_months = [3, 6, 9, 12]

    # 현재 분기의 시작 월 찾기
    for m in quarter_months:
        if today.month <= m:
            last_quarter_month = quarter_months[quarter_months.index(m) - 1]
            break
    else:
        last_quarter_month = 12  # 현재가 1월이라면, 작년 12월이 마지막 분기

    # 연도 조정
    year = today.year if last_quarter_month != 12 else today.year - 1

    # 해당 월의 마지막 날짜 계산
    last_day = calendar.monthrange(year, last_quarter_month)[1]

    return date(year, last_quarter_month, last_day)

def get_last_month_end():
    # 현재 날짜 가져오기
    today = date.today()

    # 지난달 계산
    last_month = today.month - 1 if today.month > 1 else 12
    last_year = today.year if today.month > 1 else today.year - 1

    # 지난달의 마지막 날 계산
    last_day = calendar.monthrange(last_year, last_month)[1]

    # 날짜 객체 생성
    last_month_end = date(last_year, last_month, last_day)

    # 문자열 변환 (YYYY-MM-DD 형식)
    return last_month_end.strftime("%Y-%m-%d")

In [5]:
# JSON 데이터를 불러오는 함수
def get_jsonparsed_data(url):
    context = ssl.create_default_context(cafile=certifi.where())  # SSLContext 생성
    with urlopen(url, context=context) as response:  # SSLContext 사용
        data = response.read().decode("utf-8")
        return json.loads(data)

# 데이터 가져오기
url = "https://financialmodelingprep.com/api/v3/stock/list?apikey=hT0gAk87j9xZx4PlBApvBqfVL5IahvgV"
stock_list = get_jsonparsed_data(url)

# DataFrame으로 변환
stock_listed = pd.DataFrame(stock_list)

# 고유한 exchangeShortName 확인
stock_listed['exchangeShortName'].unique().tolist()

# 조건 설정
con1 = stock_listed['exchangeShortName'] == 'NASDAQ'
con2 = stock_listed['exchangeShortName'] == 'NYSE'
con3 = stock_listed['exchangeShortName'] == 'AMEX'
con4 = stock_listed['type'] == 'stock'

# 조건 필터링
stock_screen1 = stock_listed[con1 | con2 | con3].copy()

# 조건에 따른 필터링 수행 시 인덱스 정렬
con4_aligned = con4.reindex(stock_screen1.index)  # 인덱스를 정렬하여 일치시킴
stock_screen2 = stock_screen1[con4_aligned].copy()

us_stock_info = stock_screen2

# 결과 확인
# print(us_stock_info.head())



In [None]:
ticker_list = us_stock_info['symbol'].unique().tolist()
len(ticker_list)

11513

# 업종 데이터 및 기업 일반정보

In [None]:
# dates_list = pd.date_range('2004-01-31',last_month_date, freq='M')

info_list = []

for tick in tqdm(ticker_list, desc="Downloading stock profiles"):
    url = f"https://financialmodelingprep.com/api/v3/profile/{tick}?apikey=hT0gAk87j9xZx4PlBApvBqfVL5IahvgV"

    try:
        info_raw = get_jsonparsed_data(url)
        if info_raw:
            temp_info = pd.DataFrame(info_raw)
            info_list.append(temp_info)

    except HTTPError as e:
        if e.code == 429:
            print(f"Rate limit exceeded at {tick}. Sleeping for 60 seconds...")
            time.sleep(60)  # 혹은 더 짧게 10~20초
            continue
        else:
            print(f"HTTPError for {tick}: {e}")
            continue

    except Exception as e:
        print(f"Error fetching {tick}: {e}")
        continue

    time.sleep(0.2)  # 요청 간 200ms 딜레이로 속도 제한 회피

# 리스트가 비어있지 않을 경우에만 concat
if info_list:
    info = pd.concat(info_list, ignore_index=True)
else:
    info = pd.DataFrame()


Downloading stock profiles: 100%|██████████| 11513/11513 [3:22:59<00:00,  1.06s/it]
  info = pd.concat(info_list, ignore_index=True)


In [None]:
info = pd.concat(info_list, ignore_index=True)
info

  info = pd.concat(info_list, ignore_index=True)


Unnamed: 0,symbol,price,beta,volAvg,mktCap,lastDiv,range,changes,companyName,currency,...,zip,dcfDiff,dcf,image,ipoDate,defaultImage,isEtf,isActivelyTrading,isAdr,isFund
0,DXYZ,47.8700,2.887793,543466.0,520821052,0.00000,7.75-77.35,1.1200,Destiny Tech100 Inc.,USD,...,,45.61320,2.256801,https://images.financialmodelingprep.com/symbo...,2024-03-26,False,False,True,False,False
1,ACVA,16.8900,1.721000,2548348.0,3047445827,0.00000,11.88-23.456,0.3700,ACV Auctions Inc.,USD,...,14203,16.97098,-0.080979,https://images.financialmodelingprep.com/symbo...,2021-03-24,False,False,True,False,False
2,TPZ,20.0300,0.830000,35778.0,117980105,0.99447,15.1-22.3,0.1000,"Tortoise Power and Energy Infrastructure Fund,...",USD,...,66211-1938,23.47011,-3.440112,https://images.financialmodelingprep.com/symbo...,2009-07-29,False,False,True,False,True
3,NVR,7210.6500,1.018000,23165.0,21082714790,0.00000,6562.85-9964.77,144.2600,"NVR, Inc.",USD,...,20190,-2829.96375,10040.613752,https://images.financialmodelingprep.com/symbo...,1985-07-22,False,False,True,False,False
4,SMR,35.5200,1.629000,9023990.0,4737906240,0.00000,6.61-35.77,5.2800,NuScale Power Corporation,USD,...,97224,,0.000000,https://images.financialmodelingprep.com/symbo...,2022-03-01,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11508,VHNAW,0.1229,0.000000,0.0,0,0.00000,0.1102-0.17,0.0129,Vahanna Tech Edge Acquisition I Corp.,USD,...,10020,,0.000000,https://images.financialmodelingprep.com/symbo...,,True,False,False,False,False
11509,GOGN-WT,1.0500,0.000000,0.0,0,0.00000,1.05-1.19,0.0500,GoGreen Investments Corporation,USD,...,,,0.000000,https://images.financialmodelingprep.com/symbo...,,True,False,False,False,False
11510,TENKR,0.4800,0.000000,0.0,3199680,0.00000,0.48-0.5,-0.0291,TenX Keane Acquisition Right,USD,...,,,0.000000,https://images.financialmodelingprep.com/symbo...,,False,False,False,False,False
11511,XPAXW,0.0850,0.000000,0.0,0,0.00000,0.08-0.088,0.0130,XPAC Acquisition Corp.,USD,...,,,0.000000,https://images.financialmodelingprep.com/symbo...,,True,False,False,False,False


In [6]:
# 초기 세팅
# 오늘 날짜를 구하는 코드
today_date = datetime.now().strftime("%Y-%m-%d")

# 날짜 분기 입력

# 실행
print(get_last_quarter_end())
last_quarter_end = get_last_quarter_end()
# 날짜 데이터
dt = last_quarter_end
# 문자열로 변환 (예: "YYYY-MM-DD" 형식)
quarter_date = dt.strftime("%Y-%m-%d")

last_month_date = get_last_month_end()
print(last_month_date)

2025-03-31
2025-04-30


In [None]:
# import os

# path = f'/content/drive/MyDrive/Stock_Investment_Strategy/Factor_Model/Data/US_stock_prc/ticker_list_{today_date}.csv'

# if os.path.exists(path):
#     info = pd.read_csv(path)
#     if 'Unnamed: 0' in info.columns:
#         info = info.drop(columns='Unnamed: 0')
#     if 'ticker' in info.columns:
#         tic_list = info['ticker'].unique().tolist()
#         print(tic_list[:5])  # 일부 확인
#     else:
#         print("❌ 'ticker' column not found.")
# else:
#     print("❌ File not found:", path)

In [None]:
info_df = info[['symbol', 'sector', 'industry']]

con1 = info_df['sector'] != 'Financial Services'
con2 = info_df['sector'] != 'Real Estate'
con3 = info_df['sector'] != 'Utilities'
con4 = info_df['sector'] != ''

info_re = info_df[con1 & con2 & con3 & con4].copy()
tics_list = info_re['symbol'].unique().tolist()

# 오늘 날짜를 구하는 코드
today_date = datetime.now().strftime("%Y-%m-%d")

pd.DataFrame(tics_list, index=None, columns=['ticker']).to_csv('/content/drive/MyDrive/Stock_Investment_Strategy/Factor_Model/Data/US_stock_prc/ticker_list_{}.csv'.format(today_date))

# ticker_data = pd.read_csv('/content/drive/MyDrive/Stock_Investment_Strategy/Factor_Model/Data/US_stock_prc/ticker_list_{}.csv'.format(today_date))
# tic_list = ticker_data['ticker'].unique().tolist()

In [8]:
path = '/content/drive/MyDrive/Stock_Investment_Strategy/Factor_Model/Data/US_stock_prc/ticker_list_2025-05-28.csv'
ticker_data = pd.read_csv(path)
tic_list = ticker_data['ticker'].unique().tolist()

# 연간 데이터

#### 1. IS data 연결

In [None]:
compustat_is = ['report_date', 'ticker', 'period', 'sale', 'cogs', 'gp', 'xrd', 'xsga', 'idit', 'xint', 'dp', 'ebitda', 'xopr', 'opiti', 'opir', 'pi', 'pir', 'txt', 'ni', 'nir', 'eps', 'epsdi', 'shrout', 'shroutdi']
dates_list = pd.date_range('2004-01-31',last_month_date, freq='M')

is_list = []
error_list = []

# tqdm을 사용하여 진행 상황을 시각화
# tic_list = ['AAPL']
for tick in tqdm(tic_list, desc="Processing Tickers"):

    print(tick)

    try:
        url = ("https://financialmodelingprep.com/api/v3/income-statement/{}?period=quarter&apikey=hT0gAk87j9xZx4PlBApvBqfVL5IahvgV".format(tick))
        fs_raw = get_jsonparsed_data(url)
        temp_df = pd.DataFrame(fs_raw)

        is_df = temp_df[['date', 'symbol','period', 'revenue', 'costOfRevenue', 'grossProfit','researchAndDevelopmentExpenses', 'sellingGeneralAndAdministrativeExpenses',
                        'interestIncome', 'interestExpense', 'depreciationAndAmortization', 'ebitda', 'operatingExpenses', 'operatingIncome',
                        'operatingIncomeRatio', 'incomeBeforeTax', 'incomeBeforeTaxRatio','incomeTaxExpense', 'netIncome',
                        'netIncomeRatio', 'eps', 'epsdiluted', 'weightedAverageShsOut','weightedAverageShsOutDil']]

        is_df.columns = compustat_is
    except:error_list.append(tick)

    is_df_sorted = is_df.sort_values(by='report_date')
    is_df_sorted['report_date'] = pd.to_datetime(is_df_sorted['report_date'])
    # 'YYYY-MM' 형태로 변환
    is_df_sorted['date_month'] = is_df_sorted['report_date'].dt.to_period('M').astype(str)
    # 'YYYY-MM'을 월말 날짜로 변환
    is_df_sorted['date'] = pd.to_datetime(is_df_sorted['date_month']) + pd.offsets.MonthEnd(0)
    # 데이터프레임 생성
    date_df = pd.DataFrame(dates_list, columns=['date'])
    temp_is = pd.merge(date_df, is_df_sorted, on=['date'], how='left').ffill()
    is_list.append(temp_is)

is_df = pd.concat(is_list)


In [None]:
is_df = is_df[is_df['ticker'].notna()]
is_df.to_csv(fr'C:\Users\82108\OneDrive\바탕 화면\investment\investment_strategy/us_is_{today_date}.csv')

In [None]:
is_df.head(5)

Unnamed: 0,date,report_date,ticker,period,sale,cogs,gp,xrd,xsga,idit,...,pi,pir,txt,ni,nir,eps,epsdi,shrout,shroutdi,date_month
182,2019-03-31,2019-03-31,ACVA,Q1,26711750.0,20694500.0,6017250.0,0.0,25837750.0,0.0,...,-19297250.0,-0.722426,6750.0,-19304000.0,-0.722678,-0.14,-0.14,133749588.0,133749588.0,2019-03
183,2019-04-30,2019-03-31,ACVA,Q1,26711750.0,20694500.0,6017250.0,0.0,25837750.0,0.0,...,-19297250.0,-0.722426,6750.0,-19304000.0,-0.722678,-0.14,-0.14,133749588.0,133749588.0,2019-03
184,2019-05-31,2019-03-31,ACVA,Q1,26711750.0,20694500.0,6017250.0,0.0,25837750.0,0.0,...,-19297250.0,-0.722426,6750.0,-19304000.0,-0.722678,-0.14,-0.14,133749588.0,133749588.0,2019-03
185,2019-06-30,2019-06-30,ACVA,Q2,26711750.0,20694500.0,6017250.0,0.0,25837750.0,0.0,...,-19297250.0,-0.722426,6750.0,-19304000.0,-0.722678,-0.14,-0.14,133749588.0,133749588.0,2019-06
186,2019-07-31,2019-06-30,ACVA,Q2,26711750.0,20694500.0,6017250.0,0.0,25837750.0,0.0,...,-19297250.0,-0.722426,6750.0,-19304000.0,-0.722678,-0.14,-0.14,133749588.0,133749588.0,2019-06


#### 2. BS data 연결

In [None]:
compustat_bs = ['report_date', 'ticker', 'at', 'ca', 'rec', 'cash', 'invt', 'intan', 'ivao', 'ppen',
                'ao', 'lt', 'lo', 'debtst', 'ap', 'txp', 'debtlt', 'pstk', 'be', 'debt', 'netdebt']

error_list = []
bs_list = []
dates_list = pd.date_range('2004-01-31', last_month_date, freq='M')

for tick in tqdm(tic_list, desc="Processing Tickers"):
    try:
        print(tick)
        url = f"https://financialmodelingprep.com/api/v3/balance-sheet-statement/{tick}?period=quarter&apikey=hT0gAk87j9xZx4PlBApvBqfVL5IahvgV"
        fs_raw = get_jsonparsed_data(url)

        # 응답이 유효하지 않으면 건너뜀
        if not fs_raw or not isinstance(fs_raw, list):
            error_list.append(tick)
            continue

        temp_df = pd.DataFrame(fs_raw)

        # 필수 컬럼 존재 여부 확인
        if 'date' not in temp_df.columns:
            error_list.append(tick)
            continue

        temp_df = temp_df.rename(columns={'date': 'report_date'})
        temp_df['report_date'] = pd.to_datetime(temp_df['report_date'])

        # 정렬 및 가공
        bs_df_sorted = temp_df.sort_values(by='report_date')
        bs_df_sorted['date_month'] = bs_df_sorted['report_date'].dt.to_period('M').astype(str)
        bs_df_sorted['date'] = pd.to_datetime(bs_df_sorted['date_month']) + pd.offsets.MonthEnd(0)

        # 기준 날짜와 병합 후 결측값 보간
        date_df = pd.DataFrame(dates_list, columns=['date'])
        temp_bs = pd.merge(date_df, bs_df_sorted, on='date', how='left').ffill()
        bs_list.append(temp_bs)

    except Exception as e:
        print(f"Error for {tick}: {e}")
        error_list.append(tick)

# 모든 티커에 대해 수집된 데이터 병합
if bs_list:
    bs_df = pd.concat(bs_list, ignore_index=True)
else:
    bs_df = pd.DataFrame()


In [None]:
# is_df.to_csv(f'/content/drive/MyDrive/Stock_Investment_Strategy/Factor_Model/Data/US_IS/us_is_{today_date}.csv')
bs_df.to_csv(fr'C:\Users\82108\OneDrive\바탕 화면\investment\investment_strategy/us_bs_{today_date}.csv')

#### 3. CF data 연결

In [None]:
compustat_cf = ['report_date', 'ticker', 'capx', 'ocf', 'eqbb', 'eqis', 'dstnetis', 'dltnetis', 'fincf', 'fcf' ]
dates_list = pd.date_range('2004-01-31',last_month_date, freq='M')

cf_error_list = []
cf_list = []

for tick in tqdm(tic_list, desc="Processing Tickers"):

    try:
        url = ("https://financialmodelingprep.com/api/v3/cash-flow-statement/{}?period=quarter&apikey=hT0gAk87j9xZx4PlBApvBqfVL5IahvgV".format(tick))
        fs_raw = get_jsonparsed_data(url)
        temp_df = pd.DataFrame(fs_raw)

        cf_df = temp_df[['date', 'symbol', 'capitalExpenditure', 'operatingCashFlow', 'commonStockRepurchased',
                        'commonStockIssued', 'debtRepayment', 'otherFinancingActivites', 'netCashUsedProvidedByFinancingActivities', 'freeCashFlow']]

        cf_df.columns = compustat_cf
    except:cf_error_list.append(tick)

    cf_df_sorted = cf_df.sort_values(by='report_date')
    cf_df_sorted['report_date'] = pd.to_datetime(cf_df_sorted['report_date'])
    # 'YYYY-MM' 형태로 변환
    cf_df_sorted['date_month'] = cf_df_sorted['report_date'].dt.to_period('M').astype(str)
    # 'YYYY-MM'을 월말 날짜로 변환
    cf_df_sorted['date'] = pd.to_datetime(cf_df_sorted['date_month']) + pd.offsets.MonthEnd(0)
    # 데이터프레임 생성
    date_df = pd.DataFrame(dates_list, columns=['date'])
    temp_cf = pd.merge(date_df, cf_df_sorted, on=['date'], how='left').ffill()
    cf_list.append(temp_cf)

cf_df = pd.concat(cf_list)

ocf = get_adata_annual(cf_df, 'ocf')

ocf

In [None]:
cf_df.to_csv(fr'C:\Users\82108\OneDrive\바탕 화면\investment\investment_strategy/us_cf_{today_date}.csv')

# Ratio data

In [None]:
ratio_list = []

for tick in ['MMM']:

    url = ("https://financialmodelingprep.com/api/v3/ratios/{}?period=annual&apikey=hT0gAk87j9xZx4PlBApvBqfVL5IahvgV".format(tick))
    fs_raw = get_jsonparsed_data(url)
    temp_df = pd.DataFrame(fs_raw)
    ratio_list.append(temp_df)

ratio_raw = pd.concat(ratio_list)




# Price Ratio data

In [None]:
price_ratio_list = []

for tick in ['MMM']:

    url = ("https://financialmodelingprep.com/api/v3/ratios-ttm/{}?apikey=hT0gAk87j9xZx4PlBApvBqfVL5IahvgV".format(tick))
    fs_raw = get_jsonparsed_data(url)
    temp_df = pd.DataFrame(fs_raw)
    price_ratio_list.append(temp_df)

prc_ratio_raw = pd.concat(price_ratio_list)

In [None]:
prc_ratio_raw

Unnamed: 0,dividendYielTTM,dividendYielPercentageTTM,peRatioTTM,pegRatioTTM,payoutRatioTTM,currentRatioTTM,quickRatioTTM,cashRatioTTM,daysOfSalesOutstandingTTM,daysOfInventoryOutstandingTTM,...,priceToSalesRatioTTM,priceEarningsRatioTTM,priceToFreeCashFlowsRatioTTM,priceToOperatingCashFlowsRatioTTM,priceCashFlowRatioTTM,priceEarningsToGrowthRatioTTM,priceSalesRatioTTM,enterpriseValueMultipleTTM,priceFairValueTTM,dividendPerShareTTM
0,0.029194,2.919421,74.630863,130.088807,3.050901,1.360404,1.077379,0.702893,42.666678,86.207097,...,2.282703,74.630863,16.322634,12.381581,12.381581,130.088807,2.282703,26.148115,17.97163,3.71
