In [11]:
import pandas as pd
import requests
import tempfile
import certifi

# URL for Penn World Table data (modify according to the specific version you're using)
url = 'https://www.rug.nl/ggdc/docs/pwt90.dta'
response = requests.get(url, verify=certifi.where())
response.raise_for_status()

# Save to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.dta') as tmp:
    tmp.write(response.content)
    tmp_path = tmp.name

# Load the data using pandas
pwt_data = pd.read_stata(tmp_path)

# Filter data for the period 1960-2000 and specific countries if needed
pwt_filtered = pwt_data[(pwt_data['year'] >= 1960) & (pwt_data['year'] <= 2000)]


In [14]:
# Calculate labor productivity as real GDP (rgdpna) per worker (emp)
pwt_filtered['labor_productivity'] = pwt_filtered['rgdpna'] / pwt_filtered['emp']

# Calculate growth rate of labor productivity
pwt_filtered['lp_growth'] = pwt_filtered.groupby('country')['labor_productivity'].pct_change() * 100

# Calculate growth rates for TFP and capital
pwt_filtered['tfp_growth'] = pwt_filtered.groupby('country')['rtfpna'].pct_change() * 100
pwt_filtered['capital_growth'] = pwt_filtered.groupby('country')['rkna'].pct_change() * 100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pwt_filtered['labor_productivity'] = pwt_filtered['rgdpna'] / pwt_filtered['emp']
  pwt_filtered['lp_growth'] = pwt_filtered.groupby('country')['labor_productivity'].pct_change() * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pwt_filtered['lp_growth'] = pwt_filtered.groupby('country')['labor_productivity'].pct_change() * 100
  pwt_filtered['tfp_growth'] = pwt_filtered.groupby('country')['rtfpna'].pct_change() * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexe

In [18]:
# Calculate average growth rates and contributions over the period for each country
average_metrics = pwt_filtered.groupby('country').agg({
    'lp_growth': 'mean',
    'tfp_growth': 'mean',
    'capital_growth': 'mean'
}).reset_index()

# Calculate shares of TFP growth and capital deepening in total labor productivity growth
average_metrics['tfp_share'] = average_metrics['tfp_growth'] / average_metrics['lp_growth']
average_metrics['capital_share'] = average_metrics['capital_growth'] / average_metrics['lp_growth']
average_metrics['sum_shares'] = average_metrics['tfp_share'] + average_metrics['capital_share']

# Display the results
print(average_metrics[['country', 'lp_growth', 'tfp_growth', 'capital_growth', 'tfp_share', 'capital_share', 'sum_shares']])


                                country  lp_growth  tfp_growth  \
0                               Albania   1.812419         NaN   
1                               Algeria   0.726973         NaN   
2                                Angola  -0.917570         NaN   
3                              Anguilla   0.173653         NaN   
4                             Argentina   1.018071   -0.176239   
..                                  ...        ...         ...   
171  Venezuela (Bolivarian Republic of)   0.353610   -0.594301   
172                            Viet Nam   2.694036         NaN   
173                               Yemen   2.952456         NaN   
174                              Zambia  -0.234051         NaN   
175                            Zimbabwe  -1.056649   -1.019850   

     capital_growth  tfp_share  capital_share  sum_shares  
0          4.572148        NaN       2.522678         NaN  
1          4.175488        NaN       5.743660         NaN  
2          2.531968        

In [17]:
# Drop rows where 'lp_growth' is NaN since growth rates cannot be computed without it
pwt_filtered = pwt_filtered.dropna(subset=['lp_growth'])

# Optionally, remove countries with extreme values or where data seems unreliable
# This step depends on your specific criteria for data cleaning
