Structure - Conduct - Performance Paradigm for smartphones

In [15]:
import os
import pandas as pd
import statsmodels.formula.api as smf

# 0. Change to your working directory
os.chdir('C:/Users/Lfran/OneDrive/Master thesis/Pricing power/Measurement')

# 1. LOAD BLP OUTPUT (conduct proxies + controls)
blp = pd.read_csv(
    '0_Data/generated_csv/blp_df.csv',
    usecols=[
        'product_ids',        # string code for the model
        'brand_equity_rank',
        'shipments_in_millions',
        'own_elasticity',
        'lerner_index',
        'markup_dollars'
    ]
)
# rename your firm key
blp.rename(columns={'product_ids':'Full Name'}, inplace=True)
blp['Full Name'] = blp['Full Name'].astype(str)

# 2. LOAD HHI STRUCTURE
hhi = pd.read_csv(
    '0_Data/generated_csv/firm_hhi.csv',
    usecols=['Year','Company Name','HHI Contribution']
)
hhi.rename(columns={
    'Company Name':'Full Name',
    'HHI Contribution':'HHI_contrib'
}, inplace=True)
hhi['Full Name'] = hhi['Full Name'].astype(str)

# 3. LOAD PERFORMANCE METRICS
roic     = pd.read_csv('0_Data/generated_csv/sp500_roic_power.csv', usecols=['Full Name','ROIC Power'])
roe      = pd.read_csv('0_Data/generated_csv/sp500_roe_power.csv', usecols=['Full Name','ROE Power'])
roa      = pd.read_csv('0_Data/generated_csv/sp500_roa_power.csv', usecols=['Full Name','ROA Power'])
tobins_q = pd.read_csv('0_Data/generated_csv/Tobins_q.csv',           usecols=['Full Name',"Tobin's Q"])

# rename Tobin’s Q
tobins_q.rename(columns={"Tobin's Q":'Tobin_Q'}, inplace=True)

# ensure key columns are strings
for df in (roic, roe, roa, tobins_q):
    df['Full Name'] = df['Full Name'].astype(str)

# 4. MERGE INTO ONE PANEL
# 4a. Merge blp ↔ hhi on Full Name & Year
panel = pd.merge(
    blp,
    hhi,
    on='Full Name',
    how='inner'
)

# 4b. Merge in each performance metric on Full Name
for perf_df in (roic, roe, roa, tobins_q):
    panel = pd.merge(
        panel,
        perf_df,
        on='Full Name',
        how='inner'
    )

# 5. DROP ROWS MISSING CORE VARIABLES
panel = panel.dropna(subset=['HHI_contrib','brand_equity_rank','Tobin_Q'])

# 6. RUN YOUR SCP REGRESSION
panel['Year'] = panel['Year'].astype(int)

# Inspect how many rows & what columns you have
print("Panel shape:", panel.shape)
print(panel.columns.tolist())
print(panel.head())

# Check for any all‐NA columns
print(panel.isna().sum())


# Assuming you’ve already built `panel` exactly as before, but it contains only one Year
formula = (
    'markup_dollars ~ HHI_contrib + brand_equity_rank '
    '+ Q("ROIC Power") + Q("ROE Power") + Q("ROA Power") + Tobin_Q'
)
model = smf.ols(formula, data=panel).fit(cov_type='HC3')
print(model.summary())



Panel shape: (0, 12)
['Full Name', 'brand_equity_rank', 'shipments_in_millions', 'own_elasticity', 'lerner_index', 'markup_dollars', 'Year', 'HHI_contrib', 'ROIC Power', 'ROE Power', 'ROA Power', 'Tobin_Q']
Empty DataFrame
Columns: [Full Name, brand_equity_rank, shipments_in_millions, own_elasticity, lerner_index, markup_dollars, Year, HHI_contrib, ROIC Power, ROE Power, ROA Power, Tobin_Q]
Index: []
Full Name                0
brand_equity_rank        0
shipments_in_millions    0
own_elasticity           0
lerner_index             0
markup_dollars           0
Year                     0
HHI_contrib              0
ROIC Power               0
ROE Power                0
ROA Power                0
Tobin_Q                  0
dtype: int64


ValueError: zero-size array to reduction operation maximum which has no identity