#### Scatterplot data entries year by year by the cash-equity ratio to its share price appreciation

In [35]:
# Include libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import altair as alt

Data to include:
- Dates
- Year
- symbol
- gics_sector_name
- PX_LAST
- DATE_OF_LAST_EXECUTIVE_CHANGE
- TOT_STK_AWD_GIVEN_TO_CEO_EQUIV
- TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV
- TOT_BONUSES_PAID_TO_CEO_EQUIV
- TOT_N_EQT_INCENT_GVN_TO_CEO_EQ
- COMP_LKD_LAST_CEO_&_EQUIV_CHG_DT
- ALL_OTHER_COMP_AW_TO_CEO_EQUIV

The data for the last seven columns are found in the SPX_Data directory

Each CSV file contains yearly data for the named feature. 

Last price every week (PX_LAST) contained in 503PXlast.

### Create dataframe

In [4]:
# Dates, years, symbol, PX_LAST 

# Read ata
px_last_raw_data = pd.read_csv('../../data/503PXlast.csv', index_col=0, parse_dates=True)

# Melt (reshape to long dataframe)
px_last_long = px_last_raw_data.reset_index().melt(id_vars='DATES', var_name='symbol', value_name='price')

# Rename column for consistency
px_last_long.rename(columns={'DATES':'date'},inplace=True)

# Create year column beside date column
px_last_long.insert(1, 'year', pd.to_datetime(px_last_long['date']).dt.year)

display(px_last_long.head())
px_last_long.index

Unnamed: 0,date,year,symbol,price
0,2015-01-02,2015,LYB UN Equity,80.07
1,2015-01-09,2015,LYB UN Equity,78.98
2,2015-01-16,2015,LYB UN Equity,79.84
3,2015-01-23,2015,LYB UN Equity,80.8
4,2015-01-30,2015,LYB UN Equity,79.09


RangeIndex(start=0, stop=263069, step=1)

In [5]:
# gics_sector_name

# Read data 
gics_sector_data = pd.read_csv('../../data/503_GICSData.csv', index_col=0)

# Merge dataframe with gics_sector_data dataframe
merged_df = px_last_long.merge(gics_sector_data[['gics_sector_name']], left_on='symbol', right_index=True,how='left')

# Move gics_sector_name data beside symbol
merged_df.rename(columns={'gics_sector_name':'temp'}, inplace=True)
merged_df.insert(3, 'gics_sector_name', merged_df['temp'])
merged_df.drop('temp', axis=1, inplace=True)

display(merged_df.head())

Unnamed: 0,date,year,symbol,gics_sector_name,price
0,2015-01-02,2015,LYB UN Equity,Materials,80.07
1,2015-01-09,2015,LYB UN Equity,Materials,78.98
2,2015-01-16,2015,LYB UN Equity,Materials,79.84
3,2015-01-23,2015,LYB UN Equity,Materials,80.8
4,2015-01-30,2015,LYB UN Equity,Materials,79.09


In [6]:
'''
The data is formatted similarly; use the function to format it such that 
the date and symbols columns can be matched to those of merged_df.

@param: feature name 

'''

def reformat_dataframe(feature, data_name='value'):
    data = pd.read_csv('../../data/SPX_Data/' + feature + '.csv', parse_dates=['Date'])
    long_data = data.melt(id_vars='Date', var_name='symbol', value_name=data_name)
    long_data.rename(columns={'Date':'date'}, inplace=True)
    long_data.insert(1,'year', pd.to_datetime(long_data['date']).dt.year)

    return long_data

In [7]:
# Remaining columns:
#- DATE_OF_LAST_EXECUTIVE_CHANGE        UNUSED
#- TOT_STK_AWD_GIVEN_TO_CEO_EQUIV
#- TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV
#- TOT_BONUSES_PAID_TO_CEO_EQUIV
#- TOT_N_EQT_INCENT_GVN_TO_CEO_EQ
#- ALL_OTHER_COMP_AW_TO_CEO_EQUIV       UNUSED

TOT_STK_AWD_GIVEN_TO_CEO_EQUIV          = reformat_dataframe('TOT_STK_AWD_GIVEN_TO_CEO_EQUIV', data_name='TOT_STK_AWD_GIVEN_TO_CEO_EQUIV')    
TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV          = reformat_dataframe('TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV', data_name='TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV')    
TOT_BONUSES_PAID_TO_CEO_EQUIV           = reformat_dataframe('TOT_BONUSES_PAID_TO_CEO_EQUIV',  data_name='TOT_BONUSES_PAID_TO_CEO_EQUIV')  
TOT_N_EQT_INCENT_GVN_TO_CEO_EQ          = reformat_dataframe('TOT_N_EQT_INCENT_GVN_TO_CEO_EQ', data_name='TOT_N_EQT_INCENT_GVN_TO_CEO_EQ')
ALL_OTHER_COMP_AW_TO_CEO_EQUIV          = reformat_dataframe('ALL_OTHER_COMP_AW_TO_CEO_EQUIV', data_name='ALL_OTHER_COMP_AW_TO_CEO_EQUIV')        


In [8]:
# Finalize main dataframe without featured engineered columns

main_df = pd.merge(merged_df, TOT_STK_AWD_GIVEN_TO_CEO_EQUIV[['year', 'symbol', 'TOT_STK_AWD_GIVEN_TO_CEO_EQUIV']], on=['year', 'symbol'], how='left')
main_df = pd.merge(main_df, TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV[['year', 'symbol', 'TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV']], on=['year', 'symbol'], how='left')
main_df = pd.merge(main_df, TOT_BONUSES_PAID_TO_CEO_EQUIV[['year', 'symbol', 'TOT_BONUSES_PAID_TO_CEO_EQUIV']], on=['year', 'symbol'], how='left')
main_df = pd.merge(main_df, TOT_N_EQT_INCENT_GVN_TO_CEO_EQ[['year', 'symbol', 'TOT_N_EQT_INCENT_GVN_TO_CEO_EQ']], on=['year', 'symbol'], how='left')
main_df = pd.merge(main_df, ALL_OTHER_COMP_AW_TO_CEO_EQUIV[['year', 'symbol', 'ALL_OTHER_COMP_AW_TO_CEO_EQUIV']], on=['year', 'symbol'], how='left')


display(main_df.head())

Unnamed: 0,date,year,symbol,gics_sector_name,price,TOT_STK_AWD_GIVEN_TO_CEO_EQUIV,TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV,TOT_BONUSES_PAID_TO_CEO_EQUIV,TOT_N_EQT_INCENT_GVN_TO_CEO_EQ,ALL_OTHER_COMP_AW_TO_CEO_EQUIV
0,2015-01-02,2015,LYB UN Equity,Materials,80.07,12356319.0,6518771.0,0.0,4026937.0,20040667.0
1,2015-01-09,2015,LYB UN Equity,Materials,78.98,12356319.0,6518771.0,0.0,4026937.0,20040667.0
2,2015-01-16,2015,LYB UN Equity,Materials,79.84,12356319.0,6518771.0,0.0,4026937.0,20040667.0
3,2015-01-23,2015,LYB UN Equity,Materials,80.8,12356319.0,6518771.0,0.0,4026937.0,20040667.0
4,2015-01-30,2015,LYB UN Equity,Materials,79.09,12356319.0,6518771.0,0.0,4026937.0,20040667.0


In [9]:
# Add cash:equity compensatio ratio column to main_df

main_df.loc[:,"equity_Compensation_CEO"] = main_df['TOT_STK_AWD_GIVEN_TO_CEO_EQUIV']+main_df["TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV"]
main_df.loc[:,"cash_incentive_CEO"] = main_df['TOT_BONUSES_PAID_TO_CEO_EQUIV']+main_df['TOT_N_EQT_INCENT_GVN_TO_CEO_EQ']
main_df.loc[:,"Variable_Pay_CEO"] = main_df["cash_incentive_CEO"]+main_df["equity_Compensation_CEO"]
main_df.loc[:,"cash_incentive_CEO_percent"] = main_df["cash_incentive_CEO"]/main_df["Variable_Pay_CEO"]

In [10]:
# Preview main dataframe 

display(main_df)
main_df.info()

Unnamed: 0,date,year,symbol,gics_sector_name,price,TOT_STK_AWD_GIVEN_TO_CEO_EQUIV,TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV,TOT_BONUSES_PAID_TO_CEO_EQUIV,TOT_N_EQT_INCENT_GVN_TO_CEO_EQ,ALL_OTHER_COMP_AW_TO_CEO_EQUIV,equity_Compensation_CEO,cash_incentive_CEO,Variable_Pay_CEO,cash_incentive_CEO_percent
0,2015-01-02,2015,LYB UN Equity,Materials,80.070000,12356319.0,6518771.0,0.0,4026937.0,20040667.0,18875090.0,4026937.0,22902027.0,0.175833
1,2015-01-09,2015,LYB UN Equity,Materials,78.980000,12356319.0,6518771.0,0.0,4026937.0,20040667.0,18875090.0,4026937.0,22902027.0,0.175833
2,2015-01-16,2015,LYB UN Equity,Materials,79.840000,12356319.0,6518771.0,0.0,4026937.0,20040667.0,18875090.0,4026937.0,22902027.0,0.175833
3,2015-01-23,2015,LYB UN Equity,Materials,80.800000,12356319.0,6518771.0,0.0,4026937.0,20040667.0,18875090.0,4026937.0,22902027.0,0.175833
4,2015-01-30,2015,LYB UN Equity,Materials,79.090000,12356319.0,6518771.0,0.0,4026937.0,20040667.0,18875090.0,4026937.0,22902027.0,0.175833
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263064,2024-12-06,2024,MOH UN Equity,Health Care,294.990000,,,,,,,,,
263065,2024-12-13,2024,MOH UN Equity,Health Care,303.020000,,,,,,,,,
263066,2024-12-20,2024,MOH UN Equity,Health Care,294.730000,,,,,,,,,
263067,2024-12-27,2024,MOH UN Equity,Health Care,292.180000,,,,,,,,,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 263069 entries, 0 to 263068
Data columns (total 14 columns):
 #   Column                          Non-Null Count   Dtype         
---  ------                          --------------   -----         
 0   date                            263069 non-null  datetime64[ns]
 1   year                            263069 non-null  int32         
 2   symbol                          263069 non-null  object        
 3   gics_sector_name                262546 non-null  object        
 4   price                           247334 non-null  float64       
 5   TOT_STK_AWD_GIVEN_TO_CEO_EQUIV  229827 non-null  float64       
 6   TOT_OPT_AWD_GIVEN_TO_CEO_EQUIV  228628 non-null  float64       
 7   TOT_BONUSES_PAID_TO_CEO_EQUIV   229046 non-null  float64       
 8   TOT_N_EQT_INCENT_GVN_TO_CEO_EQ  229148 non-null  float64       
 9   ALL_OTHER_COMP_AW_TO_CEO_EQUIV  229931 non-null  float64       
 10  equity_Compensation_CEO         228524 non-null  float64

In [11]:
# Save finalized dataframe as CSV file
# NOTE: NaN values remain in the dataframe

main_df.to_csv('../../data/finalized_df.csv')

### Graph edge matrix


In [34]:
# Generated with a simple set of loops

matrix = np.array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
                   [ 0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11],
                   [ 0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
                   [ 0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
                   [ 0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8],
                   [ 0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7],
                   [ 0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6],
                   [ 0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5],
                   [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4],
                   [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3],
                   [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2],
                   [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1],
                   [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

### Produce Altair scatter plot

In [171]:
# Function to generate scatter plot
# @param: string displaying company symbol to plot

def scatterplot(symbol):
    # Extract specified symbol
    df = main_df[main_df['symbol'] == symbol].dropna()

    # Ensure data is sorted properly
    df = df.sort_values(by=['symbol', 'date'])

    # Extract the last entry of each year for each symbol
    df_year_end = df.groupby(['symbol', 'year']).last().reset_index()

    # Compute the year-over-year price change as a ratio
    df_year_end['prev_year_price'] = df_year_end.groupby('symbol')['price'].shift(1)
    df_year_end['price_change_ratio'] = df_year_end['price'] / df_year_end['prev_year_price']
    df_year_end = df_year_end.dropna()

    # Create data transformation for lagged values
    df_long = pd.concat([
        df_year_end.assign(lag=i).assign(
            lagged_cash_incentive=df_year_end.groupby('symbol')['cash_incentive_CEO_percent'].shift(-i),
            lagged_price_change_ratio=df_year_end.groupby('symbol')['price_change_ratio'].shift(-i),
            lagged_year=df_year_end.groupby('symbol')['year'].shift(-i)
        ) for i in range(1, 6)
    ]).dropna()

    # Define selection dropdown for lag values
    lag_dropdown = alt.binding_select(options=list(range(1, 6)), name="Lag (years): ")
    lag_selection = alt.param(name="lag", bind=lag_dropdown, value=1)

    # Scatter plot (points)
    scatter = alt.Chart(df_year_end).mark_point(size=80).encode(
        x=alt.X('cash_incentive_CEO_percent', title="CEO Cash Incentive (%)"),
        y=alt.Y('price_change_ratio', title="Year-over-Year Price Change (Ratio)", scale=alt.Scale(zero=False)),
        tooltip=['year', 'price_change_ratio', 'cash_incentive_CEO_percent']
    ).properties(
        title=f"CEO Cash Incentive vs. Price Change Ratio for {symbol}"
    ).interactive()

    # Floating year labels near points
    year_labels = alt.Chart(df_year_end).mark_text(
        align='left', 
        dx=7, dy=-5,  
        fontSize=12, fontWeight='bold'
    ).encode(
        x='cash_incentive_CEO_percent',
        y='price_change_ratio',
        text='year'
    )

    # Reference line at y = 1
    y_line = alt.Chart(pd.DataFrame({'y': [1]})).mark_rule(color='red', strokeDash=[5,5]).encode(
        y='y'
    )

    # Vertical lines at x = 0 and x = 1
    vline_0 = alt.Chart(pd.DataFrame({'x': [0]})).mark_rule(color='black', strokeDash=[5, 5]).encode(x='x:Q')
    vline_1 = alt.Chart(pd.DataFrame({'x': [1]})).mark_rule(color='black', strokeDash=[5, 5]).encode(x='x:Q')

    # Connecting lines based on selected lag
    lines = alt.Chart(df_long).transform_filter(
        f"datum.lag == lag"
    ).mark_line(opacity=0.7).encode(
        x='cash_incentive_CEO_percent',
        y='price_change_ratio',
        x2='lagged_cash_incentive',
        y2='lagged_price_change_ratio',
        detail='symbol'
    )

    # Combine all elements with interactive lag selection
    final_chart = (scatter + year_labels + y_line + vline_0 + vline_1 + lines).add_params(lag_selection)

    return final_chart



In [172]:
for i in main_df['symbol'].unique()[:10]:  
    display(scatterplot(i))
    print('\n')










































