In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Load the data
highs = pd.read_csv('Highs.csv')
lows = pd.read_csv('Lows.csv')
macro = pd.read_csv('merged_data(FRED).csv')

In [None]:
# Convert dates to datetime
highs['Date'] = pd.to_datetime(highs['Date'])
lows['Date'] = pd.to_datetime(lows['Date'])
macro['date'] = pd.to_datetime(macro['date'])

# Get company columns (all columns except Date)
company_cols = [col for col in highs.columns if col != 'Date']

# Calculate average prices
avg_prices = highs.copy()
for col in company_cols:
    avg_prices[col] = (highs[col] + lows[col]) / 2


In [None]:
avg_prices_long = avg_prices.melt(
    id_vars=['Date'],
    var_name='Company',
    value_name='Avg_Stock_Price'
)

# Remove rows with missing stock prices
avg_prices_long = avg_prices_long.dropna(subset=['Avg_Stock_Price'])

In [None]:
# Sort by date
macro = macro.sort_values('date')

# Calculate year-over-year GDP growth rate
macro['GDP_Growth_Rate'] = macro['Real GDP'].pct_change(periods=12) * 100

# Use Core Inflation as inflation rate
macro['Inflation_Rate'] = macro['Core Inflation']

# Select ALL relevant columns for both visualizations
macro_clean = macro[[
    'date', 
    'GDP_Growth_Rate', 
    'Inflation_Rate', 
    'Real GDP',
    'Labor Force Participation',
    'Unemployment Rate',
    'Nominal GDP'
]].copy()

In [None]:
merged_data = avg_prices_long.merge(
    macro_clean,
    left_on='Date',
    right_on='date',
    how='inner'
)

# Drop duplicate date column
merged_data = merged_data.drop('date', axis=1)

# Remove rows with missing GDP growth or inflation data
merged_data = merged_data.dropna(subset=['GDP_Growth_Rate', 'Inflation_Rate'])


In [None]:
# Prepare data for new visualization mapping
all_companies = merged_data.copy()

# Create a year-month column for better labeling
all_companies['Year'] = all_companies['Date'].dt.year
all_companies['Month'] = all_companies['Date'].dt.month
all_companies['Year_Month'] = all_companies['Date'].dt.strftime('%Y-%m')

# Calculate stock performance (% change from first observation for each company)
all_companies = all_companies.sort_values(['Company', 'Date'])

# Option 1: Calculate % change from previous month
all_companies['Stock_Pct_Change'] = all_companies.groupby('Company')['Avg_Stock_Price'].pct_change() * 100

# Option 2: Normalize stock price (price / first price for each company * 100)
all_companies['Stock_Normalized'] = all_companies.groupby('Company')['Avg_Stock_Price'].transform(
    lambda x: (x / x.iloc[0]) * 100 if len(x) > 0 and x.iloc[0] != 0 else 100
)

# Use normalized price as the main stock performance metric
all_companies['Stock_Performance'] = all_companies['Stock_Normalized']

# Fill NaN values for first observation with 0 for pct_change
all_companies['Stock_Pct_Change'] = all_companies['Stock_Pct_Change'].fillna(0)

In [None]:
# Sort by date for proper animation
all_companies = all_companies.sort_values('Date')

# Transform highs to long format
highs_long = highs.melt(id_vars=['Date'], var_name='Company', value_name='High_Price')
highs_long = highs_long.dropna(subset=['High_Price'])

# Transform lows to long format
lows_long = lows.melt(id_vars=['Date'], var_name='Company', value_name='Low_Price')
lows_long = lows_long.dropna(subset=['Low_Price'])

# Merge highs and lows
price_range = highs_long.merge(lows_long, on=['Date', 'Company'], how='inner')

# Merge with our existing data to get Year_Month
viz_data = all_companies[['Date', 'Company', 'Year_Month', 'Avg_Stock_Price']].merge(
    price_range,
    on=['Date', 'Company'],
    how='inner'
)

# Use average stock price as market cap proxy for bubble size
viz_data['Market_Cap_Proxy'] = viz_data['Avg_Stock_Price']

# Create color mapping for companies
unique_companies = sorted(viz_data['Company'].unique())
import plotly.colors as pc
colors = pc.qualitative.Dark24 + pc.qualitative.Light24 + pc.qualitative.Alphabet
company_colors = {company: colors[i % len(colors)] for i, company in enumerate(unique_companies)}

# Get sorted list of time periods
time_periods = sorted(viz_data['Year_Month'].unique())
print(f"\nCreating {len(time_periods)} animation frames...")

# Determine size scaling
max_price = viz_data['Market_Cap_Proxy'].max()
size_scale = min(50 / max_price, 3)  # Scale for visibility

# Create frames manually using graph_objects
frames = []
for time_period in time_periods:
    frame_data = viz_data[viz_data['Year_Month'] == time_period]
    
    # Create scatter trace for this frame
    trace = go.Scatter(
        x=frame_data['Low_Price'],
        y=frame_data['High_Price'],
        mode='markers',
        marker=dict(
            size=frame_data['Market_Cap_Proxy'] * size_scale + 5,
            sizemode='diameter',
            sizemin=4,
            color=[company_colors[c] for c in frame_data['Company']],
            line=dict(width=0.5, color='white'),
            opacity=0.7
        ),
        text=frame_data['Company'],
        customdata=frame_data[['Market_Cap_Proxy', 'Avg_Stock_Price']],
        hovertemplate='<b>%{text}</b><br>' +
                      'Low: $%{x:.2f}<br>' +
                      'High: $%{y:.2f}<br>' +
                      'Range: $%{customdata[1]:.2f}<br>' +
                      'Market Cap Proxy: $%{customdata[0]:.2f}<br>' +
                      '<extra></extra>',
        showlegend=False
    )
    
    frames.append(go.Frame(
        data=[trace],
        name=time_period,
        layout=go.Layout(title_text=f'Tech Companies: Monthly Price Range<br><sub>{time_period} - {len(frame_data)} companies</sub>')
    ))

# Create initial frame
initial_data = viz_data[viz_data['Year_Month'] == time_periods[0]]

initial_trace = go.Scatter(
    x=initial_data['Low_Price'],
    y=initial_data['High_Price'],
    mode='markers',
    marker=dict(
        size=initial_data['Market_Cap_Proxy'] * size_scale + 5,
        sizemode='diameter',
        sizemin=4,
        color=[company_colors[c] for c in initial_data['Company']],
        line=dict(width=0.5, color='white'),
        opacity=0.7
    ),
    text=initial_data['Company'],
    customdata=initial_data[['Market_Cap_Proxy', 'Avg_Stock_Price']],
    hovertemplate='<b>%{text}</b><br>' +
                  'Low: $%{x:.2f}<br>' +
                  'High: $%{y:.2f}<br>' +
                  'Range: $%{customdata[1]:.2f}<br>' +
                  'Market Cap Proxy: $%{customdata[0]:.2f}<br>' +
                  '<extra></extra>',
    showlegend=False
)

# Create figure
fig = go.Figure(
    data=[initial_trace],
    frames=frames
)

# Add animation controls
fig.update_layout(
    updatemenus=[
        dict(
            type='buttons',
            showactive=False,
            buttons=[
                dict(
                    label='Play',
                    method='animate',
                    args=[None, {
                        'frame': {'duration': 100, 'redraw': True},
                        'fromcurrent': True,
                        'transition': {'duration': 50, 'easing': 'linear'}
                    }]
                ),
                dict(
                    label='Pause',
                    method='animate',
                    args=[[None], {
                        'frame': {'duration': 0, 'redraw': False},
                        'mode': 'immediate',
                        'transition': {'duration': 0}
                    }]
                )
            ],
            x=0.1,
            y=0,
            xanchor='left',
            yanchor='top'
        )
    ],
    sliders=[
        dict(
            active=0,
            steps=[
                dict(
                    args=[[f.name], {
                        'frame': {'duration': 0, 'redraw': True},
                        'mode': 'immediate',
                        'transition': {'duration': 0}
                    }],
                    label=f.name,
                    method='animate'
                ) for f in frames
            ],
            x=0.1,
            y=0,
            len=0.9,
            xanchor='left',
            yanchor='top',
            pad=dict(b=10, t=50),
            currentvalue=dict(
                visible=True,
                prefix='Date: ',
                xanchor='right',
                font=dict(size=16)
            )
        )
    ]
)

# Update layout
fig.update_layout(
    width=1400,
    height=800,
    title='Tech Companies: Monthly Price Range (Low vs High)<br><sub>Each company = colored bubble | Size = Market Cap (proxy)</sub>',
    xaxis=dict(
        title='Low Stock Price ($)',
        gridcolor='lightgray',
        zeroline=False
    ),
    yaxis=dict(
        title='High Stock Price ($)',
        gridcolor='lightgray',
        zeroline=False
    ),
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(size=12),
    hovermode='closest'
)

# Add diagonal reference line (y=x) to show where low equals high
max_val = max(viz_data['High_Price'].max(), viz_data['Low_Price'].max())
fig.add_shape(
    type='line',
    x0=0, y0=0, x1=max_val, y1=max_val,
    line=dict(color='gray', width=1, dash='dash'),
    name='Low = High'
)

# Show the plot
fig.show()


Creating 572 animation frames...


---

# Part 2: Tech Sector Performance vs Macroeconomic Indicators

In [None]:
# Calculate average tech sector performance per month and get all macro indicators
agg_dict = {
    'Avg_Stock_Price': 'mean',
    'GDP_Growth_Rate': 'first',
    'Inflation_Rate': 'first',
    'Real GDP': 'first'
}

# Add other columns if they exist
for col in ['Labor Force Participation', 'Unemployment Rate', 'Nominal GDP']:
    if col in all_companies.columns:
        agg_dict[col] = 'first'
        print(f"✓ Including {col}")
    else:
        print(f"✗ {col} not found in all_companies")

sector_performance = all_companies.groupby('Date').agg(agg_dict).reset_index()

# Rename for clarity
sector_performance = sector_performance.rename(columns={
    'Avg_Stock_Price': 'Tech_Sector_Avg_Price'
})

# Calculate normalized tech sector performance (base 100 at first observation)
first_price = sector_performance['Tech_Sector_Avg_Price'].iloc[0]
sector_performance['Tech_Sector_Performance'] = (sector_performance['Tech_Sector_Avg_Price'] / first_price) * 100

✓ Including Labor Force Participation
✓ Including Unemployment Rate
✓ Including Nominal GDP


In [None]:
# Function to normalize to 0-100 scale
def normalize_0_100(series):
    """Normalize a series to 0-100 scale"""
    min_val = series.min()
    max_val = series.max()
    if max_val - min_val == 0:
        return series * 0 + 50  # Return 50 if no variation
    return ((series - min_val) / (max_val - min_val)) * 100

# Create normalized dataframe
sector_norm = sector_performance[['Date']].copy()

# Normalize tech sector performance (already base 100, but rescale to 0-100)
sector_norm['Tech_Sector_Performance'] = normalize_0_100(sector_performance['Tech_Sector_Performance'])

# Normalize each economic indicator
indicators = [
    ('Real GDP', 'Real_GDP'),
    ('GDP_Growth_Rate', 'GDP_Growth'),
    ('Inflation_Rate', 'Inflation'),
    ('Unemployment Rate', 'Unemployment'),
    ('Labor Force Participation', 'Labor_Force_Participation'),
    ('Nominal GDP', 'Nominal_GDP')
]

for orig_col, new_col in indicators:
    if orig_col in sector_performance.columns:
        sector_norm[new_col] = normalize_0_100(sector_performance[orig_col])
        print(f"✓ Normalized {orig_col}")
    else:
        print(f"✗ {orig_col} not found in data")

# Remove rows with NaN values
sector_norm = sector_norm.dropna()

✓ Normalized Real GDP
✓ Normalized GDP_Growth_Rate
✓ Normalized Inflation_Rate
✓ Normalized Unemployment Rate
✓ Normalized Labor Force Participation
✓ Normalized Nominal GDP


In [None]:
# Define indicators to compare (excluding tech performance itself)
indicators_to_plot = [
    ('Real_GDP', 'Real GDP', 'Economic Output'),
    ('GDP_Growth', 'GDP Growth Rate', 'Economic Growth'),
    ('Inflation', 'Core Inflation', 'Price Pressure'),
    ('Unemployment', 'Unemployment Rate', 'Labor Market'),
    ('Labor_Force_Participation', 'Labor Force Participation', 'Labor Market'),
    ('Nominal_GDP', 'Nominal GDP', 'Economic Output')
]

# Filter to only available indicators
available_indicators = [(col, name, cat) for col, name, cat in indicators_to_plot if col in sector_norm.columns]
n_indicators = len(available_indicators)

# Create subplots (2 columns, multiple rows)
n_cols = 2
n_rows = (n_indicators + 1) // 2

fig = make_subplots(
    rows=n_rows,
    cols=n_cols,
    subplot_titles=[name for _, name, _ in available_indicators],
    vertical_spacing=0.12,
    horizontal_spacing=0.1
)

# Add traces for each indicator
for idx, (col, name, category) in enumerate(available_indicators):
    row = (idx // n_cols) + 1
    col_pos = (idx % n_cols) + 1
    
    # Add tech sector performance line
    fig.add_trace(
        go.Scatter(
            x=sector_norm['Date'],
            y=sector_norm['Tech_Sector_Performance'],
            name='Tech Sector',
            line=dict(color='#1f77b4', width=2),
            showlegend=(idx == 0)  # Only show legend once
        ),
        row=row,
        col=col_pos
    )
    
    # Add economic indicator line
    fig.add_trace(
        go.Scatter(
            x=sector_norm['Date'],
            y=sector_norm[col],
            name=name,
            line=dict(color='#ff7f0e', width=2, dash='dash'),
            showlegend=(idx == 0)  # Only show legend once
        ),
        row=row,
        col=col_pos
    )

# Update layout
fig.update_layout(
    height=400 * n_rows,
    width=1400,
    title_text='Tech Sector Performance vs Macroeconomic Indicators<br><sub>All variables normalized to 0-100 scale for comparison</sub>',
    showlegend=True,
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    ),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# Update axes
fig.update_xaxes(title_text='Date', gridcolor='lightgray')
fig.update_yaxes(title_text='Normalized Value (0-100)', gridcolor='lightgray', range=[0, 100])

fig.show()