# Generating TEST dataset

In [43]:
import pandas as pd
import numpy as np

def create_sample_panel(n_firms=100, n_years=7):
    np.random.seed(42)
    data = []
    
    # Technology parameters (Cobb-Douglas coefficients)
    beta_l, beta_k, beta_m = 0.3, 0.2, 0.4
    
    for i in range(n_firms):
        # Unobserved productivity shock (omega) follows a Markov process
        omega = np.random.normal(0, 0.1)
        for t in range(n_years):
            # Inputs in logs
            l = np.random.normal(2, 0.5)  # Labor
            k = np.random.normal(5, 0.8)  # Capital
            # Materials (m) depends on productivity (omega) and capital (k)
            m = 0.6 * k + 1.1 * omega + np.random.normal(0, 0.05)
            
            # Cobb-Douglas: y = b_l*l + b_k*k + b_m*m + omega + epsilon
            epsilon = np.random.normal(0, 0.02) # Measurement error
            y = (beta_l * l) + (beta_k * k) + (beta_m * m) + omega + epsilon
            
            data.append({
                'firm_id': i + 1,
                'year': 2010 + t,
                'log_y': y, 
                'log_l': l, 
                'log_k': k, 
                'log_m': m,
                'revenue': np.exp(y),
                'material_cost': np.exp(m)
            })
            # Law of motion for productivity [cite: 287]
            omega = 0.8 * omega + np.random.normal(0, 0.05)
            
    return pd.DataFrame(data)

# Generate the test data
test_df = create_sample_panel()

In [44]:
test_df

Unnamed: 0,firm_id,year,log_y,log_l,log_k,log_m,revenue,material_cost
0,1,2010,3.104551,1.930868,5.518151,3.441681,22.299206,31.239413
1,1,2011,3.348844,2.789606,5.613948,3.375728,28.469811,29.245575
2,1,2012,2.741472,1.767135,5.193570,3.019657,15.509800,20.484255
3,1,2013,2.670939,1.493584,5.251398,3.073855,14.453534,21.625099
4,1,2014,2.822972,1.887112,5.054023,3.016521,16.826781,20.420116
...,...,...,...,...,...,...,...,...
695,100,2012,2.570074,2.143478,4.334436,2.606141,13.066790,13.546677
696,100,2013,3.563689,3.062386,6.065071,3.706333,35.293156,40.704279
697,100,2014,2.342563,1.353246,4.443444,2.601097,10.407875,13.478512
698,100,2015,3.140898,2.534190,5.483297,3.368117,23.124630,29.023813


# TESTING THE MARKUP CALCUALTION

In [46]:
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

def estimate_markups_final(df):
    # Ensure ID and Year are integers and sorted correctly for the panel
    df = df.sort_values(['firm_id', 'year']).reset_index(drop=True)
    df['firm_id'] = df['firm_id'].astype(int)
    df['year'] = df['year'].astype(int)
    
    # Convert Python DF to R
    with localconverter(ro.default_converter + pandas2ri.converter):
        r_df = ro.conversion.py2rpy(df)
    
    ro.globalenv['r_df'] = r_df
    
    # Structural Estimation in R [cite: 212, 273]
    ro.r('''
        library(prodest)
        
        # We explicitly cast ID and Year to numeric to prevent factor expansion
        # Y: logged output | fX: free inputs | sX: state input | pX: proxy input
        res <- prodestLP(
            Y = as.numeric(r_df$log_y), 
            fX = cbind(as.numeric(r_df$log_l), as.numeric(r_df$log_m)), 
            sX = as.numeric(r_df$log_k), 
            pX = as.numeric(r_df$log_m), 
            id = as.numeric(r_df$firm_id), 
            time = as.numeric(r_df$year)
        )
        
        # Extract estimated coefficients (betas) [cite: 323]
        betas <- as.numeric(coef(res))
    ''')
    
    # Retrieve betas back to Python
    b = ro.r('betas')
    # Order: beta_l, beta_m, beta_k
    beta_m = b[1] 
    
    # Calculate Markup: mu_it = theta_m / alpha_m [cite: 187]
    # 1. Output Elasticity (theta) is beta_m for Cobb-Douglas [cite: 197]
    # 2. Expenditure Share (alpha) [cite: 188]
    df['alpha_m'] = df['material_cost'] / df['revenue']
    df['markup'] = beta_m / df['alpha_m']
    
    return df, beta_m

# Execute the test
estimated_df, material_elasticity = estimate_markups_final(test_df)
print(f"Material Elasticity (Beta_m): {material_elasticity:.4f}")
print(estimated_df[['firm_id', 'year', 'alpha_m', 'markup']].head())

Material Elasticity (Beta_m): 0.9480
   firm_id  year   alpha_m    markup
0        1  2010  1.400920  0.676728
1        1  2011  1.027249  0.922894
2        1  2012  1.320730  0.717816
3        1  2013  1.496181  0.633641
4        1  2014  1.213549  0.781214
