In [27]:
import pandas as pd 
import numpy as np

# Load data
url = 'https://dataverse.nl/api/access/datafile/354098'
pwt1001 = pd.read_stata(url)

# Filter and select relevant columns
countries = ["France", "Germany", "Canada", "Italy", "Japan", "United Kingdom", "United States"]
data = pwt1001.loc[pwt1001['country'].isin(countries)][['year', 'countrycode', 'rgdpna', 'rkna', 'emp', 'labsh', 'rtfpna', 'pop','avh']]
data = data.loc[(data['year'] >= 1995) & (data['year'] <= 2019)].dropna()

# Calculate additional columns
data['alpha'] = 1 - data['labsh'] # Labor share of Income
data['hours'] = data['emp'] * data['avh']
data['y_n'] = np.log(data['rgdpna'] / data['emp']) # Logarithmized GDP per Capita
data['a'] = 1 - data['labsh']  # Capital share 
data['t'] = data['labsh']  # TFP share
data['tfp_term'] = np.log(data['rtfpna'] ** (1 / (1 - data['alpha']))) # Logarithmized TFP Growing
data['cap_term'] = np.log((data['rkna'] / data['rgdpna']) ** (data['alpha'] / ( 1 - data['alpha']))) # Logarithmized Capital Deeping
data['lab_term'] = np.log(data['hours'] / data['pop'])

# Order by year
data = data.sort_values('year')

# Group by countrycode
grouped_data = data.groupby('countrycode')

# Calculate growth rates and Solow residual
data['g'] = grouped_data['y_n'].diff() * 100  # Growth rate of GDP per capita
data['tt'] = grouped_data['tfp_term'].diff() * 100 
data['ct'] = grouped_data['cap_term'].diff() * 100

# Remove missing values
data = data.dropna()

# Calculate summary statistics
summary = data.groupby('countrycode').agg({'g': 'mean', 'a': 'mean', 't': 'mean', 'ct': 'mean', 'tt': 'mean'})

# Calculate additional summary statistics
summary['Growth Rate'] = summary['g']
summary['TFP Growth'] = summary['tt']
summary['Capital Deepening'] = summary['ct']
summary['TFP Share'] = summary['t']
summary['Capital Share'] = summary['a']

# Print Output
print(summary)



ModuleNotFoundError: No module named 'pandas'