In [None]:
%cd '/Data'

# S&P 500 (StockNet)

In [None]:
import yfinance as yf
import pandas as pd
import os
import pickle

def create_stock_data(ticker, start, end, interval):
    print("ticker:", ticker, " start:", start, " end:", end, " interval:", interval)
    df = yf.download(ticker, start=start, end=end, interval=interval)
    if df.empty:
        print(f"[Warning] {ticker} has no data in given period.")
        return

    df['Date'] = pd.to_datetime(df.index)

    # RSI 계산
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    df = df[['Date', 'Close', 'Volume', 'RSI']]

    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    current_directory = os.getcwd()
    dir = os.path.join(current_directory, "Stock", interval + "-final")
    if not os.path.exists(dir):
        os.makedirs(dir)

    csv_path = os.path.join(dir, ticker + '.csv')
    df.to_csv(csv_path, index=False)


# 티커 로드 및 다운로드 실행
with open('/SIMPC/Traditional_pattern/sp500tickers.pickle', 'rb') as f:
    tickers = pickle.load(f)
    tickers = [x for x in tickers if ".B" not in x]

for ticker in tickers:
    create_stock_data(ticker, start='2008-01-01', end='2021-07-31', interval='1d')


In [None]:
import yfinance as yf
import pandas as pd
import os
import pickle


def create_stock_data(ticker, period, interval):
    print("ticker: ", ticker, " period: ", period, " interval: ", interval)
    df = yf.download(ticker, period=period, interval=interval)
    df['Date'] = pd.to_datetime(df.index)  # Convert date column to datetime

    # Calculate RSI
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))


    df = df[['Date', 'Close', 'Volume', 'RSI', 'MA5', 'MA20', 'MA50', 'MA200']]

    if df.isna().any().any():
        df.dropna(inplace=True)
        
    df.reset_index(drop=True, inplace=True)  # 인덱스 재설정
    
    current_directory = os.getcwd()
    dir = os.path.join(current_directory, "Stock", interval)
    if not os.path.exists(dir):
        os.makedirs(dir)
    
    csv_path = os.path.join(current_directory, "Stock", interval+"-final", ticker + '.csv')
    df.to_csv(csv_path, index=False)


with open('/SIMPC/Traditional_pattern/sp500tickers.pickle', 'rb') as f:
    tickers = pickle.load(f)
    tickers= [x for x in tickers if ".B" not in x]

for ticker in tickers:
    create_stock_data(ticker, period='10y', interval='1d')

# BTC/USD 1d

In [None]:
import pandas as pd
import os


def create_BTC_data(df):
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
    df.sort_values('date', inplace=True)  # Sort data by date

    # Calculate moving averages
    df['MA5'] = df['close'].rolling(window=5).mean() - df['close']
    df['MA20'] = df['close'].rolling(window=20).mean() - df['close']
    df['MA50'] = df['close'].rolling(window=50).mean() - df['close']
    df['MA200'] = df['close'].rolling(window=200).mean() - df['close']

    # Calculate RSI
    delta = df['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))


    df = df[['date', 'close', 'v', 'RSI', 'MA5', 'MA20', 'MA50', 'MA200']]
    
    if df.isna().any().any():
        df.dropna(inplace=True)
    
    current_directory = os.getcwd()
    dir = os.path.join(current_directory, "BTC")
    if not os.path.exists(dir):
        os.makedirs(dir)
    
    csv_path = os.path.join(current_directory, "BTC", 'BTC.csv')
    df.to_csv(csv_path, index=False)
    
    
df = pd.read_csv('./BTC_USD_raw.csv')
create_BTC_data(df)

In [None]:
import pandas as pd
import os

# 거래량 단위 정리
def clean_volume(val):
    if isinstance(val, str):
        val = val.strip().upper()
        if val.endswith('B'):
            return float(val[:-1]) * 1000  # 1B = 1000M
        elif val.endswith('M'):
            return float(val[:-1])        # 1M = 그대로
        elif val.endswith('K'):
            return float(val[:-1]) / 1000 # 선택적으로 1K = 0.001M
        else:
            return float(val)             # 숫자형 문자열일 경우
    return val  # 이미 숫자면 그대로

df['v'] = df['v'].apply(clean_volume)


def create_STOCK_data(df, STOCK):
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
    df.sort_values('date', inplace=True)  # Sort data by date


    # Calculate RSI
    delta = df['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))


    df = df[['date', 'close', 'v', 'RSI']]
    
    if df.isna().any().any():
        df.dropna(inplace=True)
    
    current_directory = os.getcwd()
    dir = os.path.join(current_directory, STOCK)
    if not os.path.exists(dir):
        os.makedirs(dir)
    
    csv_path = os.path.join(current_directory, STOCK, f'{STOCK}.csv')
    df.to_csv(csv_path, index=False)
    
STOCK = 'BRKb' # AAPL, XOM, BRKb     
    
df = pd.read_csv(f'./{STOCK}_raw.csv')
df['v'] = df['v'].apply(clean_volume)
create_STOCK_data(df, STOCK)