In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import yfinance as yf
import datetime
import warnings
import pandas_datareader as pdr
from sklearn.linear_model import Ridge

warnings.filterwarnings('ignore')
sns.set(style="whitegrid")


def stock_predict(ticker):
    
    end_date = datetime.datetime.now()  # 24일까지만 가져오기
    # past_date = end_date - datetime.timedelta(days=7300)
    start_date = '2010-01-01'
    
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    stock_df = pd.DataFrame(stock_data)

    stock_df.reset_index(inplace=True)
    stock_df['Date'] = stock_df['Date'].dt.strftime('%Y-%m-%d')
    stock_df.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
    stock_df.drop(columns=['Adj Close', "High", "Low", "Volume"], inplace=True)

    stock_df['Date'] = pd.to_datetime(stock_df['Date'])

    indicators = {
    'InterestRate': 'FEDFUNDS',  # 미국 단기 이자율 (FRED)
    'VIX': 'VIXCLS',  # VIX (변동성 지수) (FRED)
    'TEDSpread': 'TEDRATE',  # TED 스프레드 (FRED)
    'EFFR': 'EFFR',  # 유효 연방 기금 금리 (FRED)

    }

# 각 지표에 대한 데이터를 저장할 딕셔너리 초기화

    macro_data = {}
    for name, code in indicators.items():
        try:
            macro_data[name] = pdr.get_data_fred(code, start_date, end_date)
            print(f"{name} 데이터 가져오기 성공!")
        except Exception as e:
            print(f"{name} 데이터 가져오기 실패: {e}")

    for name, data in macro_data.items():
        if data is not None and not data.empty:
            print(f"\n{name} 데이터:\n", data.head())
        else:
            print(f"{name} 데이터가 없습니다.")


    macro_df = pd.concat(macro_data, axis=1)
    macro_df.columns = [col[1] for col in macro_df.columns]  # MultiIndex 열 이름 정리
    macro_df.fillna(method='ffill', inplace=True)
    macro_df.reset_index(inplace=True)
    macro_df.columns = ['Date','InterestRate','VIX','TEDSpread','EFFR']
    macro_df = macro_df.drop(index=0).reset_index(drop=True)
    macro_df['TEDSpread'] = macro_df['TEDSpread'].fillna(0.09)


    df_oil = yf.download('CL=F', start=start_date, end=end_date)
    df_usdkrw = yf.download('EURUSD=X', start=start_date, end=end_date)
    df_gold = yf.download('GC=F', start=start_date, end=end_date)

    df_oil.reset_index(inplace=True)
    df_oil['Date'] = df_oil['Date'].dt.strftime('%Y-%m-%d')
    df_oil.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
    df_oil.drop(columns=['Adj Close', 'High', 'Low', 'Open', 'Volume'], inplace=True)

    df_usdkrw.reset_index(inplace=True)
    df_usdkrw['Date'] = df_usdkrw['Date'].dt.strftime('%Y-%m-%d')
    df_usdkrw.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
    df_usdkrw.drop(columns=['Adj Close', 'High', 'Low', 'Open', 'Volume'], inplace=True)

    df_gold.reset_index(inplace=True)
    df_gold['Date'] = df_gold['Date'].dt.strftime('%Y-%m-%d')
    df_gold.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
    df_gold.drop(columns=['Adj Close', 'High', 'Low', 'Open', 'Volume'], inplace=True)


    macro_df['Date'] = pd.to_datetime(macro_df['Date'])
    df_oil['Date'] = pd.to_datetime(df_oil['Date'])
    df_usdkrw['Date'] = pd.to_datetime(df_usdkrw['Date'])
    df_gold['Date'] = pd.to_datetime(df_gold['Date'])

    # Date 열을 기준으로 데이터프레임 병합
    merged_df = macro_df.merge(df_oil, on='Date', how='inner') \
                        .merge(df_usdkrw, on='Date', how='inner') \
                        .merge(df_gold, on='Date', how='inner')
    merged_df = merged_df.rename(columns={'Close_x': 'Oil', 'Close_y': 'ExchangeRate','Close': 'Gold'})


    merged_df_end = pd.merge(stock_df, merged_df, on='Date', how='outer')
    merged_df_end['Close_InterestRate_Corr'] = merged_df_end['Close'].rolling(252).corr(merged_df_end['InterestRate'])
    merged_df_end['Close_VIX_Corr'] = merged_df_end['Close'].rolling(252).corr(merged_df_end['VIX'])
    merged_df_end['Rolling_Volatility'] = merged_df_end['Close'].rolling(window=30).std()
    merged_df_end['Daily_Return'] = merged_df_end['Close'].pct_change() # 현재와 이전 데이터와 차이의 분수값
    merged_df_end['Rolling_Mean_Close'] = merged_df_end['Close'].rolling(window=30).mean()

    merged_df_end['Date'] = pd.to_datetime(merged_df_end['Date'])
    merged_df_end.set_index('Date', inplace=True)
    merged_df_end.columns = merged_df_end.columns.str.replace(' ', '')
    merged_df_end.fillna(method='ffill', inplace=True)
    merged_df_end = merged_df_end.fillna(0)
    


    # Feature와 Target 설정
    X = merged_df_end.drop(['Close'], axis=1)
    y = merged_df_end['Close']

    # 결측치 및 무한 값 처리
    X.replace([np.inf, -np.inf], np.nan, inplace=True)
    X.fillna(method='ffill', inplace=True)

    # Train/Test 분할
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # 데이터 표준화
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 예측 데이터 #당일의 open값과 전날의 거시지표 + 상관계수 사용
    predict_data = merged_df_end.iloc[[-2]]  
    

    # yfinance로 데이터 다운로드
    data = yf.download('AAPL', period="1d")

    predict_open = data['Open'].values[0]

    predict_data['Open'] = predict_open

    #test
    predict_data.drop(['Close'], axis=1, inplace=True)
    predict_scaled = scaler.transform(predict_data)

    # RandomForest 모델 학습
    rf = RandomForestRegressor(n_estimators=8, max_depth=32, min_samples_leaf=1, random_state=42)
    rf.fit(X_train_scaled, y_train)
    
    # 다음날 종가에 대한 예측
    predict_close = rf.predict(predict_scaled)


    

    # 예측 결과 출력
    print(f'Predicted Close price for {ticker} on Next day: {predict_close[0]}')


    predict =''
    if (predict_close[0] - y[-1])>0:
        predict = 'UP'
    else:
        predict = 'Down'
    
    return predict_data['Open'],predict_close[0],predict

predict = stock_predict('AAPL')
predict


[*********************100%***********************]  1 of 1 completed


InterestRate 데이터 가져오기 성공!
VIX 데이터 가져오기 성공!
TEDSpread 데이터 가져오기 성공!
EFFR 데이터 가져오기 성공!

InterestRate 데이터:
             FEDFUNDS
DATE                
2010-01-01      0.11
2010-02-01      0.13
2010-03-01      0.16
2010-04-01      0.20
2010-05-01      0.20

VIX 데이터:
             VIXCLS
DATE              
2010-01-01     NaN
2010-01-04   20.04
2010-01-05   19.35
2010-01-06   19.16
2010-01-07   19.06

TEDSpread 데이터:
             TEDRATE
DATE               
2010-01-01      NaN
2010-01-04     0.17
2010-01-05     0.18
2010-01-06     0.19
2010-01-07     0.20

EFFR 데이터:
             EFFR
DATE            
2010-01-01   NaN
2010-01-04  0.12
2010-01-05  0.12
2010-01-06  0.12
2010-01-07  0.10


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Predicted Close price for AAPL on Next day: 223.19999885559082





(Date
 2024-11-05    222.610001
 Name: Open, dtype: float64,
 np.float64(223.19999885559082),
 'UP')

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import yfinance as yf
import datetime
import warnings
import pandas_datareader as pdr
from sklearn.linear_model import Ridge

warnings.filterwarnings('ignore')
sns.set(style="whitegrid")


# def stock_predict(ticker):
ticker = 'AAPL'  
end_date = datetime.datetime.now()  # 24일까지만 가져오기
# past_date = end_date - datetime.timedelta(days=7300)
start_date = '2010-01-01'

stock_data = yf.download(ticker, start=start_date, end=end_date)
stock_df = pd.DataFrame(stock_data)

stock_df.reset_index(inplace=True)
stock_df['Date'] = stock_df['Date'].dt.strftime('%Y-%m-%d')
stock_df.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
stock_df.drop(columns=['Adj Close', "High", "Low", "Volume"], inplace=True)

stock_df['Date'] = pd.to_datetime(stock_df['Date'])

indicators = {
'InterestRate': 'FEDFUNDS',  # 미국 단기 이자율 (FRED)
'VIX': 'VIXCLS',  # VIX (변동성 지수) (FRED)
'TEDSpread': 'TEDRATE',  # TED 스프레드 (FRED)
'EFFR': 'EFFR',  # 유효 연방 기금 금리 (FRED)

}

# 각 지표에 대한 데이터를 저장할 딕셔너리 초기화

macro_data = {}
for name, code in indicators.items():
    try:
        macro_data[name] = pdr.get_data_fred(code, start_date, end_date)
        print(f"{name} 데이터 가져오기 성공!")
    except Exception as e:
        print(f"{name} 데이터 가져오기 실패: {e}")

for name, data in macro_data.items():
    if data is not None and not data.empty:
        print(f"\n{name} 데이터:\n", data.head())
    else:
        print(f"{name} 데이터가 없습니다.")


macro_df = pd.concat(macro_data, axis=1)
macro_df.columns = [col[1] for col in macro_df.columns]  # MultiIndex 열 이름 정리
macro_df.fillna(method='ffill', inplace=True)
macro_df.reset_index(inplace=True)
macro_df.columns = ['Date','InterestRate','VIX','TEDSpread','EFFR']
macro_df = macro_df.drop(index=0).reset_index(drop=True)
macro_df['TEDSpread'] = macro_df['TEDSpread'].fillna(0.09)


df_oil = yf.download('CL=F', start=start_date, end=end_date)
df_usdkrw = yf.download('EURUSD=X', start=start_date, end=end_date)
df_gold = yf.download('GC=F', start=start_date, end=end_date)

df_oil.reset_index(inplace=True)
df_oil['Date'] = df_oil['Date'].dt.strftime('%Y-%m-%d')
df_oil.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
df_oil.drop(columns=['Adj Close', 'High', 'Low', 'Open', 'Volume'], inplace=True)

df_usdkrw.reset_index(inplace=True)
df_usdkrw['Date'] = df_usdkrw['Date'].dt.strftime('%Y-%m-%d')
df_usdkrw.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
df_usdkrw.drop(columns=['Adj Close', 'High', 'Low', 'Open', 'Volume'], inplace=True)

df_gold.reset_index(inplace=True)
df_gold['Date'] = df_gold['Date'].dt.strftime('%Y-%m-%d')
df_gold.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
df_gold.drop(columns=['Adj Close', 'High', 'Low', 'Open', 'Volume'], inplace=True)


macro_df['Date'] = pd.to_datetime(macro_df['Date'])
df_oil['Date'] = pd.to_datetime(df_oil['Date'])
df_usdkrw['Date'] = pd.to_datetime(df_usdkrw['Date'])
df_gold['Date'] = pd.to_datetime(df_gold['Date'])

# Date 열을 기준으로 데이터프레임 병합
merged_df = macro_df.merge(df_oil, on='Date', how='inner') \
                    .merge(df_usdkrw, on='Date', how='inner') \
                    .merge(df_gold, on='Date', how='inner')
merged_df = merged_df.rename(columns={'Close_x': 'Oil', 'Close_y': 'ExchangeRate','Close': 'Gold'})


merged_df_end = pd.merge(stock_df, merged_df, on='Date', how='outer')
merged_df_end['Close_InterestRate_Corr'] = merged_df_end['Close'].rolling(252).corr(merged_df_end['InterestRate'])
merged_df_end['Close_VIX_Corr'] = merged_df_end['Close'].rolling(252).corr(merged_df_end['VIX'])
merged_df_end['Rolling_Volatility'] = merged_df_end['Close'].rolling(window=30).std()
merged_df_end['Daily_Return'] = merged_df_end['Close'].pct_change() # 현재와 이전 데이터와 차이의 분수값
merged_df_end['Rolling_Mean_Close'] = merged_df_end['Close'].rolling(window=30).mean()

merged_df_end['Date'] = pd.to_datetime(merged_df_end['Date'])
merged_df_end.set_index('Date', inplace=True)
merged_df_end.columns = merged_df_end.columns.str.replace(' ', '')
merged_df_end.fillna(method='ffill', inplace=True)
merged_df_end = merged_df_end.fillna(0)



# Feature와 Target 설정
X = merged_df_end.drop(['Close'], axis=1)
y = merged_df_end['Close']

# 결측치 및 무한 값 처리
X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.fillna(method='ffill', inplace=True)

# Train/Test 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터 표준화
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)


# 예측 데이터 #당일의 open값과 전날의 거시지표 + 상관계수 사용
predict_data = merged_df_end.iloc[[-2]]  


# yfinance로 데이터 다운로드
data = yf.download('AAPL', period="1d")

predict_open = data['Open'].values[0]

predict_data['Open'] = predict_open

#test
predict_data.drop(['Close'], axis=1, inplace=True)
predict_scaled = scaler.transform(predict_data)

# RandomForest 모델 학습
rf = RandomForestRegressor(n_estimators=8, max_depth=32, min_samples_leaf=1, random_state=42)
rf.fit(X_train_scaled, y_train)

# 다음날 종가에 대한 예측
predict_close = rf.predict(predict_scaled)

# 예측 결과 출력
print(f'Predicted Close price for {ticker} on Next day: {predict_close[0]}')

print(predict_data['Open'])
print(predict_close[0])

predict =''
if (predict_close[0] - y[-1])>0:
    predict = 'UP'
    print(f'오늘은 {ticker} 종가가 {predict}할 것으로 예상됩니다.')
else:
    predict = 'Down'
    print(f'오늘은 {ticker} 종가가 {predict}할 것으로 예상됩니다.')


[*********************100%***********************]  1 of 1 completed


InterestRate 데이터 가져오기 성공!
VIX 데이터 가져오기 성공!
TEDSpread 데이터 가져오기 성공!
EFFR 데이터 가져오기 성공!

InterestRate 데이터:
             FEDFUNDS
DATE                
2010-01-01      0.11
2010-02-01      0.13
2010-03-01      0.16
2010-04-01      0.20
2010-05-01      0.20

VIX 데이터:
             VIXCLS
DATE              
2010-01-01     NaN
2010-01-04   20.04
2010-01-05   19.35
2010-01-06   19.16
2010-01-07   19.06

TEDSpread 데이터:
             TEDRATE
DATE               
2010-01-01      NaN
2010-01-04     0.17
2010-01-05     0.18
2010-01-06     0.19
2010-01-07     0.20

EFFR 데이터:
             EFFR
DATE            
2010-01-01   NaN
2010-01-04  0.12
2010-01-05  0.12
2010-01-06  0.12
2010-01-07  0.10


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

0.9996980572331626
0.999738967918224
Predicted Close price for AAPL on Next day: 223.19999885559082
Date
2024-11-05    222.610001
Name: Open, dtype: float64
223.19999885559082
오늘은 AAPL 종가가 UP할 것으로 예상됩니다.





In [4]:
# 하이퍼 파라미터 준비
parameters = {
    'n_estimators': [8, 50, 100, 200, 300],
    'max_depth': [None, 10, 20, 32],
    'min_samples_leaf': [1, 2, 4]
}

# GridSearchCV 객체 생성
from sklearn.model_selection import GridSearchCV
gscv = GridSearchCV(rf, param_grid=parameters, cv=5, refit=True)

# 하이퍼 파라미터를 순차적으로 변경하면서 학습/평가 수행
gscv.fit(X_train_scaled, y_train)
predict_close_gscv = gscv.predict(predict_scaled)

print('Best parameters found: ', gscv.best_params_)

print(predict_data['Open'])
print(predict_close_gscv[0])

predict =''
if (predict_close_gscv[0] - y[-1])>0:
    predict = 'UP'
    print(f'오늘은 {ticker} 종가가 {predict}할 것으로 예상됩니다.')
else:
    predict = 'Down'
    print(f'오늘은 {ticker} 종가가 {predict}할 것으로 예상됩니다.')

Best parameters found:  {'max_depth': None, 'min_samples_leaf': 2, 'n_estimators': 300}
Date
2024-11-05    222.610001
Name: Open, dtype: float64
223.45981735637008
오늘은 AAPL 종가가 UP할 것으로 예상됩니다.


In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import lightgbm as lgb
import xgboost as xgb
import warnings
# Train a baseline model
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
 
# Make predictions
y_pred_lr = lr.predict(X_test_scaled)
 
# Evaluate the model
print("Linear Regression R2 Score:", r2_score(y_test, y_pred_lr))
print("Linear Regression RMSE:", mean_squared_error(y_test, y_pred_lr, squared=False))

Linear Regression R2 Score: 0.9997581689332079
Linear Regression RMSE: 0.9918378611099604
