In [2]:
# Cell 1: Imports and Setup
import pandas as pd
import os
import sys
from io import StringIO # Keep for read_html if we revisit data extraction

print(f"DEBUG: Running notebook using Python executable: {sys.executable}")
print(f"DEBUG: Using Pandas version: {pd.__version__}")

# --- Configuration ---
# Paths relative to the 'notebooks/' directory
COMMERCIALS_FILENAME = 'wiki_super_bowl_commercials_extracted.csv'
TICKER_MAP_FILENAME = 'advertiser_ticker_mapping.csv'
PROCESSED_DIR = '../data/processed' # Up one level, then into data/processed
RAW_DATA_DIR = '../data/raw'         # Up one level, then into data/raw
FINAL_COLS = ['Product_Type', 'Advertiser_Product_Title', 'Title', 'Plot_Notes', 'Decade', 'Year', 'SuperBowlNum', 'Primary_Advertiser', 'StockTicker', 'ParentCompany'] # Define desired final columns after merge

commercials_path = os.path.join(PROCESSED_DIR, COMMERCIALS_FILENAME)
ticker_map_path = os.path.join(RAW_DATA_DIR, TICKER_MAP_FILENAME)

print("\nSetup complete.")

DEBUG: Running notebook using Python executable: c:\Users\Manny\anaconda3\envs\sb_analysis\python.exe
DEBUG: Using Pandas version: 2.2.3

Setup complete.


In [3]:
# Cell 2: Load Commercials Data
commercials_df = None # Initialize
try:
    commercials_df = pd.read_csv(commercials_path)
    print(f"Successfully loaded commercials data. Shape: {commercials_df.shape}")
    print("Commercials DataFrame Info:")
    commercials_df.info()
    print("\nCommercials DataFrame Head:")
    display(commercials_df.head())
except FileNotFoundError:
    print(f"ERROR: Commercials data file not found at '{commercials_path}'")
except Exception as e:
    print(f"An error occurred loading the commercials CSV: {e}")

Successfully loaded commercials data. Shape: (1345, 7)
Commercials DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1345 entries, 0 to 1344
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Product_Type              1345 non-null   object
 1   Advertiser_Product_Title  1343 non-null   object
 2   Title                     509 non-null    object
 3   Plot_Notes                1291 non-null   object
 4   Decade                    1345 non-null   object
 5   Year                      1345 non-null   int64 
 6   SuperBowlNum              1345 non-null   object
dtypes: int64(1), object(6)
memory usage: 73.7+ KB

Commercials DataFrame Head:


Unnamed: 0,Product_Type,Advertiser_Product_Title,Title,Plot_Notes,Decade,Year,SuperBowlNum
0,Airlines,"TWA ""Old West""",,A contemporary TWA airliner lands in a wild we...,1960s,1969,III
1,Airlines,"TWA ""Tour Our Tours""",,Promotes the airliner's flights to Europe. Fea...,1960s,1969,III
2,Beer,"Schlitz ""Comin' Atcha""",,A man asks for a Schlitz beer at a bar. The ba...,1960s,1969,III
3,Car,"Chrysler ""Scuba Diver""[6]",,A scuba diver in a dreamy sequence with groovy...,1960s,1969,III
4,Car,"Plymouth ""Road Runner""",,Wile E. Coyote chases the Road Runner into a P...,1960s,1969,III


In [4]:
# Cell 3: Load Ticker Map and Prepare Lookups
ticker_map_df = pd.DataFrame() # Initialize
original_case_map = {}         # Initialize
known_brands_sorted = []     # Initialize
try:
    ticker_map_df = pd.read_csv(ticker_map_path)
    print(f"\nTicker mapping loaded successfully from '{ticker_map_path}'. Shape: {ticker_map_df.shape}")

    if not ticker_map_df.empty and 'BrandName' in ticker_map_df.columns and ticker_map_df['BrandName'].notna().any():
        # Ensure BrandName is string type before using .str accessor
        ticker_map_df['BrandName'] = ticker_map_df['BrandName'].astype(str)
        # Create lowercase Series, dropping rows where original BrandName was NaN implicitly via subset selection
        lc_map_temp = ticker_map_df.dropna(subset=['BrandName']).copy() # Work on copy
        lc_map_temp['BrandNameLower'] = lc_map_temp['BrandName'].str.lower()
        # Get index of first occurrence of each unique lowercase brand
        unique_lc_indices = lc_map_temp.drop_duplicates(subset=['BrandNameLower'], keep='first').index
        # Select the corresponding rows from the original DataFrame using these valid indices
        map_subset_df = ticker_map_df.loc[unique_lc_indices].copy()

        # Create dictionary mapping: lowercase brand name -> original BrandName casing
        original_case_map = pd.Series(
            map_subset_df.BrandName.values,
            index=map_subset_df.BrandName.str.lower() # Index is unique lowercase string
        ).to_dict()

        # Create a sorted list of unique lowercase brands for matching function
        known_brands_set = set(original_case_map.keys())
        known_brands_sorted = sorted(list(known_brands_set), key=len, reverse=True)

        print("\nCreated lookup dictionary and sorted brand list.")
        print(f"Total unique lowercase brands found: {len(known_brands_sorted)}")
    else:
        print("Ticker map DataFrame ('ticker_map_df') is empty or missing 'BrandName' column or has no valid BrandNames.")

except FileNotFoundError:
    print(f"ERROR: Ticker mapping file not found at '{ticker_map_path}'")
except Exception as e:
     print(f"Error loading or processing ticker map: {e}")

# Display head of ticker map for verification
if not ticker_map_df.empty:
    print("\nTicker Map DataFrame Head:")
    display(ticker_map_df.head())


Ticker mapping loaded successfully from '../data/raw\advertiser_ticker_mapping.csv'. Shape: (144, 3)

Created lookup dictionary and sorted brand list.
Total unique lowercase brands found: 108

Ticker Map DataFrame Head:


Unnamed: 0,BrandName,StockTicker,ParentCompany
0,Budweiser,BUD,Anheuser-Busch InBev
1,Pepsi,PEP,PepsiCo
2,Doritos,PEP,PepsiCo
3,Honda,HMC,Honda Motor Co.
4,Gillette,PG,Procter & Gamble


In [5]:
# Cell 4: Define Extraction Function

def get_primary_advertiser_final(adv_prod_title, brands_sorted_list, lc_to_orig_map):
    """
    Extracts primary advertiser based on matching known brands (longest first)
    within the adv_prod_title string. Uses a dictionary lookup for original casing.
    Returns the canonical brand name from the map or None.
    """
    if pd.isna(adv_prod_title):
        return None
    text_to_search = str(adv_prod_title).lower()
    match_found_lc = None # The matched brand will be lowercase

    # Check for known brands within the string, starting with longest names
    for brand_lower in brands_sorted_list:
        if brand_lower in text_to_search:
             match_found_lc = brand_lower
             break # Take the first (longest) match found

    # If a known brand substring was found (it will be lowercase)
    if match_found_lc:
         # Lookup original case in the pre-built dictionary using .get() for safety
         original_case_brand = lc_to_orig_map.get(match_found_lc)
         # Ensure the map lookup didn't fail somehow
         if original_case_brand is None:
             print(f"Warning: Lookup failed for already matched lowercase brand '{match_found_lc}'")
         return original_case_brand
    else:
        # No known brand found directly within the string
        return None

print("Function 'get_primary_advertiser_final' defined.")

Function 'get_primary_advertiser_final' defined.


In [6]:
# Cell 5: Apply Extraction Function and Show Summary

# Check if variables exist before proceeding
if 'commercials_df' in locals() and commercials_df is not None and 'known_brands_sorted' in locals() and known_brands_sorted and 'original_case_map' in locals() and original_case_map:
    print("Applying FINAL extraction function to 'Advertiser_Product_Title'...")

    # Apply directly to the Series
    commercials_df['Primary_Advertiser'] = commercials_df['Advertiser_Product_Title'].apply(
        get_primary_advertiser_final, # Pass the function name
        args=(known_brands_sorted, original_case_map) # Pass other args needed by function
    )

    print("FINAL extraction function applied.")

    # --- Summary Statistics ---
    total_rows = len(commercials_df)
    mapped_rows = commercials_df['Primary_Advertiser'].notna().sum()
    unmapped_rows = commercials_df['Primary_Advertiser'].isna().sum()
    percent_mapped = (mapped_rows / total_rows) * 100 if total_rows > 0 else 0

    print("\n--- Mapping Summary ---")
    print(f"Total Commercials: {total_rows}")
    print(f"Mapped to Primary Advertiser: {mapped_rows} ({percent_mapped:.1f}%)")
    print(f"Could NOT be mapped:         {unmapped_rows}")

    if mapped_rows > 0:
         print("\nTop 30 Mapped Primary Advertisers:")
         display(pd.DataFrame(commercials_df['Primary_Advertiser'].value_counts().head(30)))

    if unmapped_rows > 0:
        print("\nTop 50 UNMAPPED Original 'Advertiser_Product_Title' Entries:")
        unmapped_subset = commercials_df[commercials_df['Primary_Advertiser'].isnull()]
        display(pd.DataFrame(unmapped_subset['Advertiser_Product_Title'].value_counts().head(50)))

else:
    print("Prerequisite check failed: Make sure 'commercials_df' is loaded and the ticker mapping ('known_brands_sorted', 'original_case_map') was processed successfully and is not empty.")

Applying FINAL extraction function to 'Advertiser_Product_Title'...
FINAL extraction function applied.

--- Mapping Summary ---
Total Commercials: 1345
Mapped to Primary Advertiser: 630 (46.8%)
Could NOT be mapped:         715

Top 30 Mapped Primary Advertisers:


Unnamed: 0_level_0,count
Primary_Advertiser,Unnamed: 1_level_1
Budweiser,53
Bud Light,47
Pepsi,33
Doritos,23
T-Mobile,23
McDonald's,19
NFL,18
Toyota,17
Coca-Cola,12
Chevrolet,10



Top 50 UNMAPPED Original 'Advertiser_Product_Title' Entries:


Unnamed: 0_level_0,count
Advertiser_Product_Title,Unnamed: 1_level_1
"EF Hutton ""Joggers""",2
2018 Winter Olympics (NBC),2
Nerds Gummy Clusters,2
2025 IndyCar Series (Fox),2
Instacart,1
Nike,1
"New York Life ""Happiness""",1
"STP ""Snow""",1
"Personna 74 ""Tungsten Steel""",1
"Noxzema ""Close""",1


In [7]:
# Cell 6: Perform Merge

# Check if prerequisite DataFrames exist
if 'commercials_df' in locals() and commercials_df is not None and \
   'ticker_map_df' in locals() and ticker_map_df is not None and \
   'Primary_Advertiser' in commercials_df.columns:

    print("\nAttempting to merge commercials data with ticker map...")

    # Select only the columns needed from the ticker map
    map_cols_to_merge = ['BrandName', 'StockTicker', 'ParentCompany']
    if all(col in ticker_map_df.columns for col in map_cols_to_merge):
        ticker_map_subset = ticker_map_df[map_cols_to_merge].copy()

        # Perform a LEFT merge
        merged_df = pd.merge(
            commercials_df,
            ticker_map_subset,
            left_on='Primary_Advertiser', # Column in commercials_df
            right_on='BrandName',         # Column in ticker_map_subset
            how='left'                    # Keep all commercials
        )

        # Optional: Drop the redundant BrandName column from the merge
        merged_df.drop(columns=['BrandName'], inplace=True, errors='ignore')

        print("Merge complete.")
        print(f"Shape of merged DataFrame: {merged_df.shape}")

        # --- Inspect Merge ---
        print("\n--- Merged Data Inspection ---")
        cols_to_show = ['Year', 'Primary_Advertiser', 'StockTicker', 'ParentCompany', 'Advertiser_Product_Title', 'Title']
        # Ensure columns exist before trying to display them
        cols_to_show = [col for col in cols_to_show if col in merged_df.columns]
        display(merged_df[cols_to_show].head())

        mapped_ticker_count = merged_df['StockTicker'].notna().sum()
        print(f"\nRows with a StockTicker assigned after merge: {mapped_ticker_count}")

        print("\nSample of rows where StockTicker WAS found:")
        display(merged_df[merged_df['StockTicker'].notna()][cols_to_show].sample(min(10, mapped_ticker_count)))

        print("\nSample of rows where StockTicker was NOT found (NaN):")
        display(merged_df[merged_df['StockTicker'].isna()][cols_to_show].sample(min(10, len(merged_df) - mapped_ticker_count)))

    else:
        print("ERROR: 'BrandName', 'StockTicker', or 'ParentCompany' column not found in ticker_map_df. Cannot merge.")
        merged_df = None

else:
    print("ERROR: Cannot merge because 'commercials_df', 'ticker_map_df', or 'Primary_Advertiser' column is missing.")
    merged_df = None # Ensure it's None if prerequisites fail


Attempting to merge commercials data with ticker map...
Merge complete.
Shape of merged DataFrame: (1456, 10)

--- Merged Data Inspection ---


Unnamed: 0,Year,Primary_Advertiser,StockTicker,ParentCompany,Advertiser_Product_Title,Title
0,1969,TWA,,Trans World Airlines (Defunct),"TWA ""Old West""",
1,1969,TWA,,Trans World Airlines (Defunct),"TWA ""Tour Our Tours""",
2,1969,Schlitz,,Pabst Brewing Company (Private),"Schlitz ""Comin' Atcha""",
3,1969,Chrysler,STLA,Stellantis,"Chrysler ""Scuba Diver""[6]",
4,1969,Plymouth,STLA,Stellantis,"Plymouth ""Road Runner""",



Rows with a StockTicker assigned after merge: 644

Sample of rows where StockTicker WAS found:


Unnamed: 0,Year,Primary_Advertiser,StockTicker,ParentCompany,Advertiser_Product_Title,Title
8,1969,Winston,BTI,British American Tobacco,"Winston ""Playing Your Song""",
188,1985,Master Lock,FBIN,Fortune Brands Innovations,"Master Lock ""The Doubters""",
607,2007,Budweiser,BUD,Anheuser-Busch InBev,"Budweiser Select ""Just A Game""",
689,2010,Kia Motors,HYMTF,Hyundai Motor Company,"Kia Motors ""How You Like Me Now""",
206,1987,Johnson Controls,JCI,Johnson Controls International PLC,"Johnson Controls ""Superdome""",
657,2009,Hyundai,HYMTF,Hyundai Motor Company,"Hyundai Motor Company ""Epic Lap""",
1242,2023,Hellmann's,UL,Unilever,Hellmann's/Best Foods Mayonnaise,"""Who's in the Fridge?"""
14,1969,Gillette,PG,Procter & Gamble,"Gillette ""Swedish Lady Barber""",
763,2013,Audi,VWAGY,Volkswagen Group,"Audi ""Prom""",
997,2018,Wix.com,WIX,"Wix.com, Ltd.",Wix.com,"""Rhett & Link"""



Sample of rows where StockTicker was NOT found (NaN):


Unnamed: 0,Year,Primary_Advertiser,StockTicker,ParentCompany,Advertiser_Product_Title,Title
568,2005,,,,Robots,
694,2010,,,,The Back-Up Plan,
567,2005,,,,Hitch,
1360,2024,,,,Crowdstrike,"""The Future"""
387,1999,,,,"FedEx ""Stanley Cup""",
467,2002,,,,Men in Black II,
1029,2019,,,,Toy Story 4,—
614,2007,,,,Meet the Robinsons,
214,1987,,,,"Slice ""Baseball""",
272,1990,,,,"New York Life ""Friends""",


In [10]:
# Cell 7: Optional Save

#Check if merge was successful
if 'merged_df' in locals() and merged_df is not None:
    merged_filename = 'commercials_with_tickers.csv'
    merged_output_path = os.path.join(PROCESSED_DIR, merged_filename)
    try:
        merged_df.to_csv(merged_output_path, index=False)
        print(f"\nMerged data successfully saved to: {merged_output_path}")
    except Exception as e:
        print(f"\nERROR: Could not save merged data to CSV: {e}")
else:
     print("\nNo merged data frame to save.")


Merged data successfully saved to: ../data/processed\commercials_with_tickers.csv


In [18]:
# Cell to Prepare Ticker List and Date Range (Revised to Remove Problem Tickers)

import pandas as pd
import os
import sys
import numpy as np # Often needed

# --- Print environment info (good practice) ---
print(f"DEBUG: Running notebook using Python executable: {sys.executable}")
print(f"DEBUG: Using Pandas version: {pd.__version__}")

# --- Initialize tickers list ---
tickers = []

# --- Ensure merged_df exists from previous cell ---
if 'merged_df' not in locals() or merged_df is None:
    print("ERROR: 'merged_df' does not exist or is None. Please run the cell that loads and merges data first.")
elif 'StockTicker' not in merged_df.columns:
    print("ERROR: 'StockTicker' column not found in 'merged_df'. Cannot extract tickers.")
else:
    # Get unique, non-null stock tickers from the merged data
    tickers = merged_df['StockTicker'].dropna().unique().tolist()
    print(f"Found {len(tickers)} unique company tickers initially from mapped data.")

# Add the market index (S&P 500)
MARKET_INDEX = '^GSPC'
# Check if tickers list was successfully created before appending
if isinstance(tickers, list):
    if MARKET_INDEX not in tickers:
        tickers.append(MARKET_INDEX)
        print(f"Added market index '{MARKET_INDEX}'.")
else:
    print("Tickers list was not created properly.")

# --- Manually remove specific tickers causing YFTzMissingError ---
tickers_to_remove = ['BMWYY', 'CAJ', 'SQSP', 'ZAGG']
print(f"\nAttempting to remove problematic tickers: {tickers_to_remove}")

if isinstance(tickers, list) and tickers: # Proceed only if tickers is a non-empty list
    tickers_original_count = len(tickers)
    tickers = [ticker for ticker in tickers if ticker not in tickers_to_remove]
    tickers_new_count = len(tickers)
    removed_count = tickers_original_count - tickers_new_count
    if removed_count > 0:
         print(f"Successfully removed {removed_count} tickers.")
    else:
         print("No tickers from the removal list were found in the current ticker list.")
elif not isinstance(tickers, list):
     print("Tickers list is not valid.")
# If tickers was empty initially, this block is skipped, which is fine

# --- Define Date Range ---
# Find min/max years in your data for guidance
if 'merged_df' in locals() and merged_df is not None and 'Year' in merged_df.columns:
    try:
        # Attempt to convert Year column to numeric, coercing errors
        merged_df['Year'] = pd.to_numeric(merged_df['Year'], errors='coerce')
        min_year = int(merged_df['Year'].min()) # Convert to int after potential coerce/drop
        max_year = int(merged_df['Year'].max())
        print(f"\nData spans years {min_year} to {max_year}.")
        # Suggest buffer for estimation window (e.g., start mid-year before min_year)
        # and event window (e.g., end Feb/March after max_year)
        suggested_start = f"{min_year - 1}-06-01"
        suggested_end = f"{max_year + 1}-03-31"
        print(f"(Suggested date range based on data: {suggested_start} to {suggested_end})")
    except Exception as e:
        print(f"Warning: Could not determine date range from 'Year' column: {e}")
else:
    print("\nWarning: Could not automatically determine date range ('merged_df' or 'Year' column missing).")

# !!! --- USER ACTION: SET YOUR ACTUAL START AND END DATES HERE --- !!!
# Needs to cover estimation window (e.g., ~200 trading days before event)
# and event window (e.g., week surrounding event) for ALL relevant events.
DOWNLOAD_START_DATE = "1968-06-01" # Example: Adjust as needed! (Covers 2009 event estimation)
DOWNLOAD_END_DATE = "2025-04-01"   # Example: Adjust as needed! (Covers 2024 event window)

# --- Final Confirmation Print ---
if isinstance(tickers, list) and tickers: # Check if list exists and is not empty
    print(f"\nReady to download data for {len(tickers)} tickers from {DOWNLOAD_START_DATE} to {DOWNLOAD_END_DATE}.")
else:
    print("\nNo valid tickers remaining to download.")

# --- End of Cell ---

DEBUG: Running notebook using Python executable: c:\Users\Manny\anaconda3\envs\sb_analysis\python.exe
DEBUG: Using Pandas version: 2.2.3
Found 65 unique company tickers initially from mapped data.
Added market index '^GSPC'.

Attempting to remove problematic tickers: ['BMWYY', 'CAJ', 'SQSP', 'ZAGG']
Successfully removed 4 tickers.

Data spans years 1969 to 2025.
(Suggested date range based on data: 1968-06-01 to 2026-03-31)

Ready to download data for 62 tickers from 1968-06-01 to 2025-04-01.


In [20]:
# In a new cell

import yfinance as yf

stock_data = None # Initialize
adj_close_data = None # Initialize

# Proceed only if we have tickers to download
if tickers:
    print(f"\nDownloading daily stock data for {len(tickers)} tickers...")
    try:
        # Download data (Open, High, Low, Close, Adj Close, Volume)
        stock_data = yf.download(
            tickers = tickers,
            start = DOWNLOAD_START_DATE,
            end = DOWNLOAD_END_DATE,
            progress = True # Show download progress
        )

        if stock_data.empty:
            print("ERROR: No data downloaded. Check tickers and date range.")
        elif stock_data.isnull().all().all():
             print("ERROR: All downloaded data is NaN. Check tickers/dates.")
        else:
            print("\nDownload attempt complete (check debug info if needed).") # Simplified message

            # --- MODIFIED: Check for 'Close' instead of 'Adj Close' ---
            final_price_data = None # Initialize
            close_col_exists = False
            if isinstance(stock_data.columns, pd.MultiIndex):
                # Check if 'Close' exists in the top level ('Price') of the MultiIndex
                if 'Close' in stock_data.columns.get_level_values(0):
                    close_col_exists = True
                    # --- Select 'Close' data ---
                    close_data = stock_data['Close']
                else:
                     print("WARNING: Neither 'Adj Close' nor 'Close' found in MultiIndex level 0.")
                     close_data = None # Should not happen based on output, but good practice
            # Handle case if data isn't MultiIndex for some reason (less likely)
            elif 'Close' in stock_data.columns:
                 close_col_exists = True
                 close_data = stock_data['Close']
            else:
                 print("WARNING: Neither 'Adj Close' nor 'Close' found in columns.")
                 close_data = None

            # --- Process the selected 'Close' data ---
            if close_col_exists and close_data is not None and not close_data.empty:
                 print("Selected 'Close' data (auto_adjust=True default). Shape:", close_data.shape) # Updated message
                 print("\nPreview of Close data:")
                 display(close_data.head())
                 print("\nTail of Close data:")
                 display(close_data.tail())
                 # Assign to the variable used by the next cell
                 final_price_data = close_data
            else:
                 print("WARNING: 'Close' column data not found or is invalid/empty.")
                 if stock_data is not None: print("Available columns structure:", stock_data.columns)
                 # Ensure final_price_data is None if not successful
                 final_price_data = None


    except Exception as e:
        print(f"\nAn error occurred during yfinance download: {e}")

else:
    print("\nSkipping stock download because ticker list is empty.")


Downloading daily stock data for 62 tickers...


[*********************100%***********************]  62 of 62 completed



Download attempt complete (check debug info if needed).
Selected 'Close' data (auto_adjust=True default). Shape: (14527, 62)

Preview of Close data:


Ticker,ALL,AMZN,BHC,BKNG,BTI,BUD,CMCSA,CRM,CSGP,DASH,...,UBER,UL,VSCO,VWAGY,VZ,WIX,WW,XRX,YUM,^GSPC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1968-06-03,,,,,,,,,,,...,,,,,,,,6.509691,,99.989998
1968-06-04,,,,,,,,,,,...,,,,,,,,6.560448,,100.379997
1968-06-05,,,,,,,,,,,...,,,,,,,,6.539077,,99.889999
1968-06-06,,,,,,,,,,,...,,,,,,,,6.787499,,100.650002
1968-06-07,,,,,,,,,,,...,,,,,,,,6.808871,,101.269997



Tail of Close data:


Ticker,ALL,AMZN,BHC,BKNG,BTI,BUD,CMCSA,CRM,CSGP,DASH,...,UBER,UL,VSCO,VWAGY,VZ,WIX,WW,XRX,YUM,^GSPC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-03-25,208.119995,205.710007,7.0,4777.910156,39.969727,61.700001,36.608021,288.609985,80.230003,199.720001,...,75.610001,58.09,20.57,11.29,43.490002,173.309998,0.54,5.131833,155.800003,5776.649902
2025-03-26,209.279999,201.130005,6.81,4764.160156,40.077728,61.869999,37.034157,280.98999,80.370003,192.869995,...,74.18,58.189999,19.549999,10.94,44.18,168.630005,0.51,5.161101,158.679993,5712.200195
2025-03-27,209.289993,201.360001,6.75,4752.799805,40.440998,62.139999,37.034157,277.809998,80.129997,194.020004,...,74.860001,58.669998,19.5,10.96,44.959999,173.880005,0.54,5.29769,157.649994,5693.310059
2025-03-28,206.449997,192.720001,6.48,4634.240234,40.509998,62.040001,36.290897,269.970001,79.309998,182.610001,...,72.75,59.18,18.32,10.67,44.93,166.360001,0.5,5.005,155.759995,5580.939941
2025-03-31,207.070007,190.259995,6.47,4606.910156,41.369999,61.560001,36.568382,268.359985,79.230003,182.770004,...,72.860001,59.549999,18.58,10.33,45.360001,163.380005,0.52,4.83,157.360001,5611.850098


In [21]:
# In Cell 3 (Save Cell)

# Save the Close price data if it was successfully extracted in the previous cell
# --- MODIFIED: Check for 'final_price_data' ---
if 'final_price_data' in locals() and final_price_data is not None and not final_price_data.empty:
    stock_data_filename = 'stock_prices_close_auto_adjusted.csv' # Changed filename
    stock_data_output_path = os.path.join(PROCESSED_DIR, stock_data_filename)
    try:
        # Use the final_price_data variable
        final_price_data.to_csv(stock_data_output_path)
        print(f"\nClose stock data successfully saved to: {stock_data_output_path}")
    except Exception as e:
        print(f"\nERROR: Could not save stock data to CSV: {e}")
else:
    print("\nNo valid Close price data was processed in the previous cell, nothing to save.")


Close stock data successfully saved to: ../data/processed\stock_prices_close_auto_adjusted.csv
