# Normalizing Multiple Features

In [1]:
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler

# Define stock tickers
tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA"]

# Fetch stock data
stock_data = yf.download(tickers, period="1mo", interval="1d")

# Debug: Print available columns to verify structure
print("Available columns:", stock_data.columns)

# Extract 'Close' prices and 'Volume' using .xs() for MultiIndex columns
closing_prices = stock_data.xs("Close", axis=1, level=0)  # Extract "Close" price for all stocks
trading_volumes = stock_data.xs("Volume", axis=1, level=0)  # Extract "Volume" data

# Drop NaN values
closing_prices = closing_prices.dropna()
trading_volumes = trading_volumes.dropna()

# Fetch market capitalization
market_caps = {}
for ticker in tickers:
    stock = yf.Ticker(ticker)
    market_caps[ticker] = stock.info.get("marketCap", None)  # Fetch market cap

# Convert Market Cap to a DataFrame (Repeat last available value for missing dates)
market_cap_df = pd.DataFrame({ticker: [market_caps[ticker]] * len(closing_prices) for ticker in tickers}, 
                             index=closing_prices.index)

# Combine all data
df = pd.DataFrame()
df["Stock_Price"] = closing_prices.mean(axis=1)  # Average stock price across selected stocks
df["Trading_Volume"] = trading_volumes.mean(axis=1)  # Average trading volume
df["Market_Cap"] = market_cap_df.mean(axis=1)  # Average market capitalization

# Normalize data
scaler = MinMaxScaler()
df_normalized = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

# Save to CSV or print the data
df_normalized.to_csv("normalized_financial_data.csv")  # Save to CSV file
print(df_normalized.head())  # Print first few rows


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  5 of 5 completed


Available columns: MultiIndex([( 'Close',  'AAPL'),
            ( 'Close',  'AMZN'),
            ( 'Close', 'GOOGL'),
            ( 'Close',  'MSFT'),
            ( 'Close',  'TSLA'),
            (  'High',  'AAPL'),
            (  'High',  'AMZN'),
            (  'High', 'GOOGL'),
            (  'High',  'MSFT'),
            (  'High',  'TSLA'),
            (   'Low',  'AAPL'),
            (   'Low',  'AMZN'),
            (   'Low', 'GOOGL'),
            (   'Low',  'MSFT'),
            (   'Low',  'TSLA'),
            (  'Open',  'AAPL'),
            (  'Open',  'AMZN'),
            (  'Open', 'GOOGL'),
            (  'Open',  'MSFT'),
            (  'Open',  'TSLA'),
            ('Volume',  'AAPL'),
            ('Volume',  'AMZN'),
            ('Volume', 'GOOGL'),
            ('Volume',  'MSFT'),
            ('Volume',  'TSLA')],
           names=['Price', 'Ticker'])
            Stock_Price  Trading_Volume  Market_Cap
Date                                               
2025-02-18   

# Decimal Scaling

In [2]:
import numpy as np

def decimal_scaling(series):
    max_abs = series.abs().max()
    scale_factor = 10 ** np.ceil(np.log10(max_abs))
    return series / scale_factor

df['Decimal_Normalized_Price'] = decimal_scaling(df['Stock_Price'])
df['Decimal_Normalized_Volume'] = decimal_scaling(df['Trading_Volume'])
df['Decimal_Normalized_Market_Cap'] = decimal_scaling(df['Market_Cap'])
print(df[['Decimal_Normalized_Price', 'Decimal_Normalized_Volume', 'Decimal_Normalized_Market_Cap']])

            Decimal_Normalized_Price  Decimal_Normalized_Volume  \
Date                                                              
2025-02-18                  0.283522                   0.389538   
2025-02-19                  0.286211                   0.343058   
2025-02-20                  0.284718                   0.304468   
2025-02-21                  0.277519                   0.490608   
2025-02-24                  0.274677                   0.452128   
2025-02-25                  0.267152                   0.625002   
2025-02-26                  0.263554                   0.477446   
2025-02-27                  0.257765                   0.489138   
2025-02-28                  0.262835                   0.610557   
2025-03-03                  0.256602                   0.538924   
2025-03-04                  0.254221                   0.632177   
2025-03-05                  0.259408                   0.468537   
2025-03-06                  0.253704                   0.49018

#  Normalizing OHLC Data for Stock Price Prediction

In [3]:
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler

# Download stock data
stock_data = yf.download('GOOG', start='2024-01-01', end='2025-01-01')[['Open', 'High', 'Low', 'Close', 'Volume']]

# Apply Min-Max Scaling to all numerical columns
scaler = MinMaxScaler()
normalized_data = pd.DataFrame(scaler.fit_transform(stock_data), columns=stock_data.columns, index=stock_data.index)

print(normalized_data.head())
print(normalized_data.head())

[*********************100%***********************]  1 of 1 completed

Price           Open      High       Low     Close    Volume
Ticker          GOOG      GOOG      GOOG      GOOG      GOOG
Date                                                        
2024-01-02  0.103153  0.094771  0.094006  0.105561  0.250615
2024-01-03  0.088116  0.101597  0.104485  0.117626  0.229874
2024-01-04  0.106912  0.095058  0.098107  0.082639  0.216249
2024-01-05  0.084387  0.068833  0.080490  0.072837  0.162957
2024-01-08  0.079094  0.095130  0.096132  0.120189  0.204759
Price           Open      High       Low     Close    Volume
Ticker          GOOG      GOOG      GOOG      GOOG      GOOG
Date                                                        
2024-01-02  0.103153  0.094771  0.094006  0.105561  0.250615
2024-01-03  0.088116  0.101597  0.104485  0.117626  0.229874
2024-01-04  0.106912  0.095058  0.098107  0.082639  0.216249
2024-01-05  0.084387  0.068833  0.080490  0.072837  0.162957
2024-01-08  0.079094  0.095130  0.096132  0.120189  0.204759





# Normalizing Rolling Windows for Time-Series Forecasting

In [4]:
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler

# Define stock tickers
tickers = ["AAPL"]  # Single ticker to avoid MultiIndex issues

# Fetch stock data
stock_data = yf.download(tickers, period="6mo", interval="1d")

# Extract 'Close' correctly
if isinstance(stock_data.columns, pd.MultiIndex):  
    close_prices = stock_data.xs("Close", level=0, axis=1)  # Extract Close prices for all tickers
else:
    close_prices = stock_data["Close"]  # Direct access if only one ticker

# Define rolling window size
window_size = 30
scaler = MinMaxScaler()

# Function to normalize each rolling window with condensed output
def rolling_minmax_norm(series, window, max_outputs=20):
    normalized_values = series.copy()
    count = 0  # Count displayed outputs
    
    for i in range(window, len(series)):  
        window_data = series.iloc[i - window:i].values.reshape(-1, 1)  # Extract rolling window
        scaled_window = scaler.fit_transform(window_data).flatten()  # Scale the entire window
        normalized_value = scaled_window[-1]  # Store only the last value
        
        # Print debugging info for a limited number of outputs
        if count < max_outputs:
            print(f"Window {i-window} to {i} -> Assigned: {normalized_value}")
            count += 1
        
        normalized_values.iloc[i] = normalized_value  # Assign normalized value

    return normalized_values

# Apply rolling normalization
stock_data["Rolling_Normalized_Close"] = rolling_minmax_norm(close_prices, window_size, max_outputs=20)

# Drop NaNs (first 30 days will be NaN) and display first 5 rows
print(stock_data[['Close', 'Rolling_Normalized_Close']].dropna().head())


[*********************100%***********************]  1 of 1 completed

Window 0 to 30 -> Assigned: 0.8220390279901704
Window 1 to 31 -> Assigned: 0.5686275523497955
Window 2 to 32 -> Assigned: 0.28532758171448336
Window 3 to 33 -> Assigned: 0.0824878922357275
Window 4 to 34 -> Assigned: 0.021634744630604885
Window 5 to 35 -> Assigned: 0.11899833324372189
Window 6 to 36 -> Assigned: 0.069640840905425
Window 7 to 37 -> Assigned: 0.3914798975958167
Window 8 to 38 -> Assigned: 0.37320451471749827
Window 9 to 39 -> Assigned: 0.18841687104765725
Window 10 to 40 -> Assigned: 0.18841687104765725
Window 11 to 41 -> Assigned: 0.248658943615661
Window 12 to 42 -> Assigned: 0.45849135809617536
Window 13 to 43 -> Assigned: 0.24053609168400314
Window 14 to 44 -> Assigned: 0.43267978829537235
Window 15 to 45 -> Assigned: 0.4506670724902353
Window 16 to 46 -> Assigned: 0.5004798018228254
Window 17 to 47 -> Assigned: 0.46727202015590485
Window 18 to 48 -> Assigned: 0.5606706234464571
Window 19 to 49 -> Assigned: 0.7682240146096486
Price            Close Rolling_Normalized


