In [1]:
pip install -U finance-datareader

Note: you may need to restart the kernel to use updated packages.


In [3]:
import FinanceDataReader as fdr
import pandas as pd

df_krx = fdr.StockListing('KRX')
type(df_krx)
pd.core.frame.DataFrame

df_code_name = df_krx[['Code', 'Name']]
code_name_list = df_code_name.values.tolist()
print(code_name_list[:10])

[['005930', '삼성전자'], ['000660', 'SK하이닉스'], ['373220', 'LG에너지솔루션'], ['207940', '삼성바이오로직스'], ['005380', '현대차'], ['005935', '삼성전자우'], ['000270', '기아'], ['105560', 'KB금융'], ['068270', '셀트리온'], ['035420', 'NAVER']]


In [None]:
import requests
from datetime import datetime

# API 기본 설정
MOCK_DOMAIN = "https://openapivts.koreainvestment.com:29443"
API_ENDPOINT = "/uapi/domestic-stock/v1/quotations/inquire-time-itemconclusion"
api_url = MOCK_DOMAIN + API_ENDPOINT

APP_KEY = 
APP_SECRET = 
ACCESS_TOKEN = 

def get_stock_data(stock_code):
    current_time = datetime.now().strftime("%H%M%S")

    headers = {
        "content-type": "application/json; charset=utf-8",
        "authorization": f"Bearer {ACCESS_TOKEN}",
        "appkey": APP_KEY,
        "appsecret": APP_SECRET,
        "tr_id": "FHPST01060000"
    }
    
    # API 요청 파라미터
    params = {
        "FID_COND_MRKT_DIV_CODE": "J",  # 주식 시장 분류 코드
        "FID_INPUT_ISCD": stock_code,   # 종목 코드
        "FID_INPUT_HOUR_1": current_time  # 현재 시간
    }

    # API 요청
    response = requests.get(api_url, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json()
        output1 = data.get("output1", {})
        return {
            "종목 코드": stock_code,
            "현재가": output1.get("stck_prpr"),
            "누적 거래량": output1.get("acml_vol"),
        }
    else:
        return {
            "종목 코드": stock_code,
            "오류 코드": response.status_code,
            "오류 메시지": response.text
        }

# 사용 예시
stock_code = "000660"  # SK하이닉스
result = get_stock_data(stock_code)

if "현재가" in result:
    print(f"\n종목 코드: {result['종목 코드']}")
    print(f"현재가: {result['현재가']}")
    print(f"누적 거래량: {result['누적 거래량']}")
else:
    print(f"오류 발생: {result}")



종목 코드: 000660
현재가: 176700
누적 거래량: 3919973


In [7]:
import csv
from datetime import datetime
import time

# Initialize a list to store all data
all_stock_data = []

# Process all stocks first
for data in code_name_list:
    try:
        # Get stock data
        stock_info = get_stock_data(data[0])
        
        # Prepare row data
        row_data = {
            '종목명': data[1],
            '종목코드': data[0],
            '현재가': stock_info['현재가'],
            '누적거래량': stock_info['누적 거래량']
        }
        
        # Add to list
        all_stock_data.append(row_data)
        
        # Print progress
        print(f"Processed {data[1]} ({data[0]})")
        
        # Add delay to prevent API rate limiting
        time.sleep(0.08)
        
    except Exception as e:
        print(f"Error processing {data[1]} ({data[0]}): {str(e)}")
        continue

Processed 삼성전자 (005930)
Processed SK하이닉스 (000660)
Processed LG에너지솔루션 (373220)
Processed 삼성바이오로직스 (207940)
Processed 현대차 (005380)
Processed 삼성전자우 (005935)
Processed 기아 (000270)
Processed KB금융 (105560)
Processed 셀트리온 (068270)
Processed NAVER (035420)
Processed 신한지주 (055550)
Processed POSCO홀딩스 (005490)
Error processing 현대모비스 (012330): '현재가'
Processed 삼성물산 (028260)
Processed 삼성생명 (032830)
Processed HD현대중공업 (329180)
Processed LG화학 (051910)
Processed 메리츠금융지주 (138040)
Processed 고려아연 (010130)
Processed 삼성SDI (006400)
Processed 삼성화재 (000810)
Processed 하나금융지주 (086790)
Processed SK이노베이션 (096770)
Processed 한화에어로스페이스 (012450)
Processed HMM (011200)
Processed 카카오 (035720)
Processed 알테오젠 (196170)
Processed LG전자 (066570)
Processed 한국전력 (015760)
Processed KT&G (033780)
Processed 크래프톤 (259960)
Processed HD한국조선해양 (009540)
Processed 두산에너빌리티 (034020)
Processed 포스코퓨처엠 (003670)
Processed 에코프로비엠 (247540)
Processed HD현대일렉트릭 (267260)
Processed 우리금융지주 (316140)
Processed SK텔레콤 (017670)
Processed LG (003550)
Proce

In [9]:
# Create DataFrame from collected data
df = pd.DataFrame(all_stock_data)

# Save as CSV
df.to_csv('stock_data.csv', index=False, encoding='utf-8-sig')
print("CSV file has been created successfully!")

# Save as Parquet
df.to_parquet('stock_data.parquet', engine='pyarrow', index=False)
print("Parquet file has been created successfully!")

# Display data summary
print("\nData Summary:")
print(f"Total records processed: {len(df)}")
print("\nFirst few records:")
print(df.head())

CSV file has been created successfully!
Parquet file has been created successfully!

Data Summary:
Total records processed: 2759

First few records:
        종목명    종목코드     현재가     누적거래량
0      삼성전자  005930   56000  15281543
1    SK하이닉스  000660  176700   3919973
2  LG에너지솔루션  373220  405000    142194
3  삼성바이오로직스  207940  934000     79868
4       현대차  005380  217000    419056


In [13]:
from datetime import datetime, timedelta

def get_monthly_stock_data(stock_code, stock_name):
    try:
        # Calculate date range for the last month
        end_date = datetime.now()
        start_date = end_date - timedelta(days=30)
        
        # Get stock data
        df = fdr.DataReader(stock_code, start_date, end_date)
        
        # Add stock code and name columns
        df['Code'] = stock_code
        df['Name'] = stock_name
        
        # Reset index to make Date a column
        df = df.reset_index()
        
        return df
    except Exception as e:
        print(f"Error getting data for {stock_name} ({stock_code}): {str(e)}")
        return None

In [17]:
all_stock_monthly_data = []

# Process each stock (you might want to limit the number of stocks for testing)
for idx, row in df_krx.iterrows():
    print(f"Processing {row['Name']} ({row['Code']})")
    
    # Get monthly data for the stock
    stock_df = get_monthly_stock_data(row['Code'], row['Name'])
    
    if stock_df is not None:
        all_stock_monthly_data.append(stock_df)
print(all_stock_monthly[:10])

Processing 삼성전자 (005930)
Processing SK하이닉스 (000660)
Processing LG에너지솔루션 (373220)
Processing 삼성바이오로직스 (207940)
Processing 현대차 (005380)
Processing 삼성전자우 (005935)
Processing 기아 (000270)
Processing KB금융 (105560)
Processing 셀트리온 (068270)
Processing NAVER (035420)
Processing 신한지주 (055550)
Processing POSCO홀딩스 (005490)
Processing 현대모비스 (012330)
Processing 삼성물산 (028260)
Processing 삼성생명 (032830)
Processing HD현대중공업 (329180)
Processing LG화학 (051910)
Processing 메리츠금융지주 (138040)
Processing 고려아연 (010130)
Processing 삼성SDI (006400)
Processing 삼성화재 (000810)
Processing 하나금융지주 (086790)
Processing SK이노베이션 (096770)
Processing 한화에어로스페이스 (012450)
Processing HMM (011200)
Processing 카카오 (035720)



KeyboardInterrupt



In [21]:
print(all_stock_monthly_data[:10])

[         Date   Open   High    Low  Close    Volume    Change    Code  Name
0  2024-10-28  55700  58500  55700  58100  27775009  0.039356  005930  삼성전자
1  2024-10-29  58000  59600  57300  59600  28369314  0.025818  005930  삼성전자
2  2024-10-30  59100  59800  58600  59100  19838511 -0.008389  005930  삼성전자
3  2024-10-31  58500  61200  58300  59200  35809196  0.001692  005930  삼성전자
4  2024-11-01  59000  59600  58100  58300  19083180 -0.015203  005930  삼성전자
5  2024-11-04  58600  59400  58400  58700  15586947  0.006861  005930  삼성전자
6  2024-11-05  57800  58100  57200  57600  17484474 -0.018739  005930  삼성전자
7  2024-11-06  57600  58000  56300  57300  22092218 -0.005208  005930  삼성전자
8  2024-11-07  56900  58100  56800  57500  17043102  0.003490  005930  삼성전자
9  2024-11-08  58000  58300  57000  57000  13877396 -0.008696  005930  삼성전자
10 2024-11-11  56700  56800  55000  55000  29811326 -0.035088  005930  삼성전자
11 2024-11-12  54600  54600  53000  53000  37962881 -0.036364  005930  삼성전자
12 2024-11-

In [25]:
final_df = pd.concat(all_stock_monthly_data, ignore_index=True)
final_df.to_csv('monthly_stock_data.csv', index=False, encoding='utf-8-sig')
final_df.to_parquet('monthly_stock_data.parquet', index=False)

In [None]:
import boto3

def upload_s3():
    try:
        # s3 클라이언트 생성
        s3 = boto3.client(
            service_name="s3",
            region_name="us-west-2",
            aws_access_key_id= ,
            aws_secret_access_key= ,
        )
        print("s3 bucket connected!") 
    except Exception as e:
        print(e)
    
    try:
        s3.upload_file("stock_data.csv","devcourse-project3","domestic/stock_price_data.csv")
        s3.upload_file("stock_data.parquet","devcourse-project3","domestic/stock_price_data.parquet")
        s3.upload_file("monthly_stock_data.csv","devcourse-project3","domestic/monthly_stock_data.csv")
        s3.upload_file("monthly_stock_data.parquet","devcourse-project3","domestic/monthly_stock_data.parquet")
    except Exception as e:
        print(e)

upload_s3()

s3 bucket connected!
