In [49]:
!pip install yfinance --upgrade




#Price-volume data: open, high, low, close (OHLC),
adjusted prices, volume

In [50]:
import yfinance as yf
from datetime import datetime, timedelta

# --- Ngày kết thúc: 11/10/2025 ---
end_date = datetime(2025, 10, 11)

# --- Ngày bắt đầu: lùi 10 năm ---
start_date = end_date - timedelta(days=365 * 10)


In [51]:
tickers = ["AAPL", "MSFT", "GOOG", "AMZN"]

for t in tickers:
    df = yf.download(t, start=start_date, end=end_date, progress=False, auto_adjust=False)
    globals()[f"df_{t}"] = df  # gán trực tiếp vào biến df_AAPL, df_MSFT, ...
    print(f"Downloaded {t}: {len(df)} rows")

# Kiểm tra
for t in tickers:
    print(f"\n{t}:")
    print(globals()[f"df_{t}"].head())


Downloaded AAPL: 2513 rows
Downloaded MSFT: 2513 rows
Downloaded GOOG: 2513 rows
Downloaded AMZN: 2513 rows

AAPL:
Price       Adj Close      Close       High        Low       Open     Volume
Ticker           AAPL       AAPL       AAPL       AAPL       AAPL       AAPL
Date                                                                        
2015-10-14  24.767004  27.552500  27.879999  27.389999  27.822500  177849600
2015-10-15  25.137802  27.965000  28.025000  27.622499  27.732500  150694000
2015-10-16  24.953529  27.760000  28.000000  27.632500  27.945000  156930400
2015-10-19  25.108593  27.932501  27.937500  27.527500  27.700001  119036800
2015-10-20  25.567030  28.442499  28.542500  27.705000  27.834999  195871200

MSFT:
Price       Adj Close      Close       High        Low       Open    Volume
Ticker           MSFT       MSFT       MSFT       MSFT       MSFT      MSFT
Date                                                                       
2015-10-14  40.675846  46.680000  

In [52]:
import pandas as pd

def clean_stock_df(df, symbol):
    """
    Làm phẳng MultiIndex, thêm cột Symbol, 
    làm sạch header trùng ('Date', 'Ticker', ...)
    """
    # Làm phẳng MultiIndex
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)

    # Đưa Date ra khỏi index
    df = df.reset_index()

    # Đặt lại tên cột đầu tiên thành "Date"
    df = df.rename(columns={df.columns[0]: "Date"})

    # Thêm cột Symbol
    df["Symbol"] = symbol

    # Loại bỏ các dòng header trùng lặp hoặc lỗi
    df = df[~df["Date"].isin(["Date", "Ticker"])]
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    df = df.dropna(subset=["Date"])

    # Sắp xếp lại cột
    cols = ["Symbol", "Date", "Adj Close", "Close", "High", "Low", "Open", "Volume"]
    df = df[cols].reset_index(drop=True)

    return df


# Áp dụng cho 4 mã
df_AAPL_clean = clean_stock_df(df_AAPL, "AAPL")
df_MSFT_clean = clean_stock_df(df_MSFT, "MSFT")
df_GOOG_clean = clean_stock_df(df_GOOG, "GOOG")
df_AMZN_clean = clean_stock_df(df_AMZN, "AMZN")


In [53]:
# Gộp tất cả lại thành 1 DataFrame tổng
df_all = pd.concat([df_AAPL_clean, df_MSFT_clean, df_GOOG_clean, df_AMZN_clean], ignore_index=True)

In [54]:
df_all

Price,Symbol,Date,Adj Close,Close,High,Low,Open,Volume
0,AAPL,2015-10-14,24.767004,27.552500,27.879999,27.389999,27.822500,177849600
1,AAPL,2015-10-15,25.137802,27.965000,28.025000,27.622499,27.732500,150694000
2,AAPL,2015-10-16,24.953529,27.760000,28.000000,27.632500,27.945000,156930400
3,AAPL,2015-10-19,25.108593,27.932501,27.937500,27.527500,27.700001,119036800
4,AAPL,2015-10-20,25.567030,28.442499,28.542500,27.705000,27.834999,195871200
...,...,...,...,...,...,...,...,...
10047,AMZN,2025-10-06,220.899994,220.899994,221.729996,216.029999,221.000000,43690900
10048,AMZN,2025-10-07,221.779999,221.779999,222.889999,220.169998,220.880005,31194700
10049,AMZN,2025-10-08,225.220001,225.220001,226.729996,221.190002,222.919998,46686000
10050,AMZN,2025-10-09,227.740005,227.740005,228.210007,221.750000,225.000000,46412100
