# 문제 정의: 적당한 주가 예측의 어려움

In [1]:
import pandas as pd

In [2]:
stock_info = pd.read_csv("../00_data/stock_daily_prices.csv", index_col="Date")
stock_info

Unnamed: 0_level_0,AAPL,BA,T,MGM,AMZN,IBM,TSLA,GOOG,sp500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2012-01-12,60.198570,75.510002,30.120001,12.130000,175.929993,180.550003,28.250000,313.644379,1295.500000
2012-01-13,59.972858,74.599998,30.070000,12.350000,178.419998,179.160004,22.790001,311.328064,1289.089966
2012-01-17,60.671429,75.239998,30.250000,12.250000,181.660004,180.000000,26.600000,313.116364,1293.670044
2012-01-18,61.301430,75.059998,30.330000,12.730000,189.440002,181.070007,26.809999,315.273285,1308.040039
2012-01-19,61.107143,75.559998,30.420000,12.800000,194.449997,180.520004,26.760000,318.590851,1314.500000
...,...,...,...,...,...,...,...,...,...
2020-08-05,440.250000,174.279999,29.850000,16.719999,3205.030029,125.449997,1485.020020,1473.609985,3327.770020
2020-08-06,455.609985,172.199997,29.840000,18.459999,3225.000000,126.120003,1489.579956,1500.099976,3349.159912
2020-08-07,444.450012,170.020004,30.020000,19.030001,3167.459961,124.959999,1452.709961,1494.489990,3351.280029
2020-08-10,450.910004,179.410004,30.200001,21.650000,3148.159912,127.110001,1418.569946,1496.099976,3360.469971


In [3]:
# 기초 통계 계산
average_prices = stock_info.mean()
print(f"각 종목의 평균 주가 : \n {average_prices}")

print()
std_dev_prices = stock_info.std()
print(f"각 종목의 변동성 (표준편차) : \n{std_dev_prices}")

각 종목의 평균 주가 : 
 AAPL      140.819823
BA        189.942700
T          35.162899
MGM        23.105743
AMZN      915.665665
IBM       161.853001
TSLA      259.600815
GOOG      783.712512
sp500    2218.749554
dtype: float64

각 종목의 변동성 (표준편차) : 
AAPL      70.827601
BA       103.678586
T          3.207490
MGM        6.963847
AMZN     697.838905
IBM       25.561938
TSLA     210.988003
GOOG     334.448057
sp500    537.321727
dtype: float64


In [4]:
# 특정 날짜의 종목 비교

specific_day_prices = stock_info.loc["2012-01-25"]

# 2012-01-25에 가장 높은 주가를 기록한 종목
max_price_stock = specific_day_prices.idxmax()
max_price_value = specific_day_prices.max()
print(f"2012-01-25에 가장 높은 주가를 기록한 종목: {max_price_stock}, 주가: {max_price_value}")

2012-01-25에 가장 높은 주가를 기록한 종목: sp500, 주가: 1326.060059


In [5]:
daily_returns = stock_info.pct_change() * 100

max_return_stock = daily_returns.idxmax().max()
max_return_value = daily_returns.max().max()

print(f"가장 높은 수익률을 기록한 종목: {max_return_stock}, 주가: {max_return_value:.3f}%")
print(f"가장 높은 수익률을 기록한 날의 수익률 데이터:\n{daily_returns.max()}")

가장 높은 수익률을 기록한 종목: 2020-03-25, 주가: 33.115%
가장 높은 수익률을 기록한 날의 수익률 데이터:
AAPL     11.980826
BA       24.318606
T        10.022346
MGM      33.114754
AMZN     15.745701
IBM      11.301051
TSLA     24.395054
GOOG     16.052431
sp500     9.382774
dtype: float64


### 주식간의 상관관계 분석

In [6]:
stock_info_cp = stock_info.copy()
stock_info_cp.reset_index(inplace=True)
correlation_matrix = stock_info_cp.drop(columns="Date").corr()
correlation_matrix

Unnamed: 0,AAPL,BA,T,MGM,AMZN,IBM,TSLA,GOOG,sp500
AAPL,1.0,0.650275,-0.208947,0.390912,0.938721,-0.726157,0.844056,0.910145,0.892584
BA,0.650275,1.0,-0.138619,0.746731,0.7723,-0.636949,0.333445,0.822399,0.857329
T,-0.208947,-0.138619,1.0,0.252347,-0.243089,0.202868,-0.194363,-0.083084,-0.056531
MGM,0.390912,0.746731,0.252347,1.0,0.468135,-0.47906,0.27163,0.653775,0.713081
AMZN,0.938721,0.7723,-0.243089,0.468135,1.0,-0.772448,0.760451,0.957678,0.923259
IBM,-0.726157,-0.636949,0.202868,-0.47906,-0.772448,1.0,-0.610686,-0.813388,-0.795667
TSLA,0.844056,0.333445,-0.194363,0.27163,0.760451,-0.610686,1.0,0.735988,0.704308
GOOG,0.910145,0.822399,-0.083084,0.653775,0.957678,-0.813388,0.735988,1.0,0.978315
sp500,0.892584,0.857329,-0.056531,0.713081,0.923259,-0.795667,0.704308,0.978315,1.0


### 주식 변동률 계산

In [7]:
change_pct = (stock_info.iloc[-1] - stock_info.iloc[0]) / stock_info.iloc[0] * 100

num_days = len(stock_info)
avg_daily_change_pct = change_pct / num_days

max_change_stock = avg_daily_change_pct.idxmax()
max_change_value = avg_daily_change_pct.max()

print("주식별 하루 평균 변동률 [%]")
print(avg_daily_change_pct)
print(f"가장 높은 변동률을 가진 주식: {max_change_stock}, 하루 평균 변동률: {max_change_value}")

주식별 하루 평균 변동률 [%]
AAPL     0.290302
BA       0.064174
T        0.000123
MGM      0.035779
AMZN     0.764742
IBM     -0.013802
TSLA     2.207085
GOOG     0.172290
sp500    0.072871
dtype: float64
가장 높은 변동률을 가진 주식: TSLA, 하루 평균 변동률: 2.207085409092213
