# Part 1: Fetching, Cleaning, & Storing Stock Data in Azure Cloud Data Warehouse

**Step 1: Install and Load Packages**

In [1]:
%pip install pandas numpy yfinance

Collecting pandas
  Using cached pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting numpy
  Using cached numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting yfinance
  Downloading yfinance-1.0-py2.py3-none-any.whl.metadata (6.0 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.3-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting requests>=2.31 (from yfinance)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.12.tar.gz (19 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.7-cp310-cp310-macosx_11_0_arm64.whl.metadata (23 kB)
Collecting peew

In [2]:
# Import libraries
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

**Step 2: Load & Clean Stock Data from API**

In [3]:
def fetch_multiple_stocks(ticker_list, start_date="2010-01-01"):
    """
    Fetch and clean stock data for multiple tickers using yfinance.
    
    Parameters:
    - ticker_list: List of stock ticker symbols (e.g., ["AAPL", "MSFT", "GOOGL"])
    - start_date: Start date in YYYY-MM-DD format (default: "2010-01-01")
    
    Returns:
    - Combined long-form DataFrame with columns: Ticker, Date, Open, High, Low, Close, Volume
    """
    all_data = []
    
    for ticker_name in ticker_list:
        # Fetch stock data
        ticker = yf.Ticker(ticker_name)
        uncleaned_stock_data = ticker.history(start=start_date, end=None)
        
        # Select only the columns we need: Open, High, Low, Close, Volume
        cleaned_data = uncleaned_stock_data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        
        # Reset index to make Date a column
        cleaned_data = cleaned_data.reset_index()
        
        # Format date to YYYY-MM-DD
        cleaned_data['Date'] = cleaned_data['Date'].dt.strftime('%Y-%m-%d')
        
        # Add Ticker column
        cleaned_data['Ticker'] = ticker_name
        
        # Reorder columns: Ticker, Date, Open, High, Low, Close, Volume
        cleaned_data = cleaned_data[['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
        
        # Reset index to remove default integer index
        cleaned_data = cleaned_data.reset_index(drop=True)
        
        all_data.append(cleaned_data)
    
    # Combine all dataframes
    combined_data = pd.concat(all_data, ignore_index=True)
    
    return combined_data

# Example usage with multiple stocks
tickers = ["AMZN", "AAPL", "META", "NVDA", "GOOGL", "MSFT", "TSLA", "NFLX", "ADBE", "ORCL"]
stock_data = fetch_multiple_stocks(tickers, "2010-01-01")
stock_data.tail()


Unnamed: 0,Ticker,Date,Open,High,Low,Close,Volume
39534,ORCL,2025-12-29,194.160004,198.509995,192.639999,195.380005,14748100
39535,ORCL,2025-12-30,196.059998,198.380005,195.710007,197.210007,14197400
39536,ORCL,2025-12-31,196.419998,197.199997,194.729996,194.910004,10341500
39537,ORCL,2026-01-02,197.470001,198.589996,194.210007,195.710007,14540700
39538,ORCL,2026-01-05,198.339996,201.690002,192.330002,192.589996,22582500



**Step 3: Store Data in Azure Data Warehouse**