# Part 1: Fetching, Cleaning, & Storing Stock Data in Azure Cloud Data Warehouse

**Step 1: Install and Load Packages**

In [32]:
%pip install pandas numpy yfinance

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [33]:
# Import libraries
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

**Step 2: Load & Clean Stock Data from API**

In [37]:
def fetch_multiple_stocks(ticker_list, start_date="2010-01-01"):
    """
    Fetch and clean stock data for multiple tickers using yfinance.
    
    Parameters:
    - ticker_list: List of stock ticker symbols (e.g., ["AAPL", "MSFT", "GOOGL"])
    - start_date: Start date in YYYY-MM-DD format (default: "2010-01-01")
    
    Returns:
    - Combined long-form DataFrame with columns: Ticker, Date, Open, High, Low, Close, Volume
    """
    all_data = []
    
    for ticker_name in ticker_list:
        # Fetch stock data
        ticker = yf.Ticker(ticker_name)
        uncleaned_stock_data = ticker.history(start=start_date, end=None)
        
        # Select only the columns we need: Open, High, Low, Close, Volume
        cleaned_data = uncleaned_stock_data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        
        # Reset index to make Date a column
        cleaned_data = cleaned_data.reset_index()
        
        # Format date to YYYY-MM-DD
        cleaned_data['Date'] = cleaned_data['Date'].dt.strftime('%Y-%m-%d')
        
        # Add Ticker column
        cleaned_data['Ticker'] = ticker_name
        
        # Reorder columns: Ticker, Date, Open, High, Low, Close, Volume
        cleaned_data = cleaned_data[['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
        
        # Reset index to remove default integer index
        cleaned_data = cleaned_data.reset_index(drop=True)
        
        all_data.append(cleaned_data)
    
    # Combine all dataframes
    combined_data = pd.concat(all_data, ignore_index=True)
    
    return combined_data

# Example usage with multiple stocks
tickers = ["AMZN", "AAPL", "META", "NVDA", "GOOGL", "MSFT", "TSLA", "NFLX", "ADBE", "ORCL"]
stock_data = fetch_multiple_stocks(tickers, "2010-01-01")
stock_data.head()


Unnamed: 0,Ticker,Date,Open,High,Low,Close,Volume
0,AMZN,2010-01-04,6.8125,6.8305,6.657,6.695,151998000
1,AMZN,2010-01-05,6.6715,6.774,6.5905,6.7345,177038000
2,AMZN,2010-01-06,6.73,6.7365,6.5825,6.6125,143576000
3,AMZN,2010-01-07,6.6005,6.616,6.44,6.5,220604000
4,AMZN,2010-01-08,6.528,6.684,6.4515,6.676,196610000



**Step 3: Store Data in Azure Data Warehouse**

In [None]:
# The cleaned_data dataframe is ready to use
# It contains: Date, Open, High, Low, Close, Volume



In [None]:
# Access the cleaned data
cleaned_data


Fetching data for AAPL...


KeyError: "None of [Index(['open', 'high', 'low', 'close'], dtype='object')] are in the [columns]"