# Анализ деловых циклов по данным World Bank’s WDI

В этом блокноте анализируются данные для определения, насколько применимы к реальным данным факты, обсуждаемые в первой главе Martin Uribe, Open Economy Macroeconomics.

В качестве реальных данных взяты:
1. ВВП на душу населения (в локальной валюте)
1. Расходы домохозяйств и некоммерческих организаций на конечное потребление (% от ВВП)
1. Валовое накопление капитала (% от ВВП)
1. Объём государственных расходов на конечное потребление (% от ВВП)
1. Общий объём импорта товаров и услуг (% от ВВП)
1. Общий объём экспорта товаров и услуг (% от ВВП)

Взятые страны: Центральная Африканская республика (бедная), Греция (развивающаяся), США (богатая)

### Импорт библиотек для анализа

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.filters.hp_filter import hpfilter
from scipy import stats

### Чтение данных

In [4]:
raw_df = pd.read_csv('data.csv')
raw_df

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1960 [YR1960],1961 [YR1961],1962 [YR1962],1963 [YR1963],1964 [YR1964],1965 [YR1965],...,2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020],2021 [YR2021],2022 [YR2022],2023 [YR2023]
0,United States,USA,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,..,..,..,..,..,..,...,55153.394018,56849.469792,57976.628204,60047.719073,62875.666138,65227.956591,64401.507435,71307.401728,77860.911291,82304.620427
1,United States,USA,GDP per capita (constant LCU),NY.GDP.PCAP.KN,19373.7345783219,19544.0495179405,20425.4575475238,21010.8855328098,21915.1540734487,23049.3353164902,...,57200.56872,58417.460129,59014.875567,60047.719073,61467.512863,62731.755706,61124.688957,64723.919102,65969.117811,67311.985092
2,United States,USA,Households and NPISHs final consumption expend...,NE.CON.PRVT.ZS,61.0593318290393,60.6914772523262,59.9535682851458,59.8443672268638,59.8910560099553,59.579118683332,...,67.437284,67.21741,67.678319,67.767473,67.457852,67.026718,66.617903,68.045389,68.023662,67.90147
3,United States,USA,Gross capital formation (% of GDP),NE.GDI.TOTL.ZS,22.5817018592685,22.479885011645,23.0803965716589,23.140103957718,23.1234969557422,23.8745255337027,...,20.901194,21.41519,20.887956,21.155769,21.569073,21.669809,21.418819,21.333282,21.949773,21.541033
4,United States,USA,General government final consumption expenditu...,NE.CON.GOVT.ZS,15.5823164085842,15.87869266483,16.1543263149681,16.0961900202767,15.8060103227583,15.6273583261651,...,14.551658,14.233317,14.110004,13.847134,13.844213,13.983559,14.895745,14.253033,13.713795,13.433834
5,United States,USA,Imports of goods and services (% of GDP),NE.IMP.GNFS.ZS,4.21117277102206,4.03241301969625,4.1282910886803,4.09395091411542,4.09899432607246,4.24059713112712,...,16.398355,15.277065,14.564439,14.947857,15.158248,14.469283,13.006122,14.422678,15.289462,13.887852
6,United States,USA,Exports of goods and services (% of GDP),NE.EXP.GNFS.ZS,4.98626012837842,4.9057944362507,4.80644537486808,4.86762696404143,5.10755345519255,4.99607412327851,...,13.508214,12.411149,11.888159,12.177481,12.287111,11.789197,10.073656,10.790978,11.602228,11.011511
7,Central African Republic,CAF,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,..,..,..,..,..,..,...,698.963739,768.870888,826.013301,883.810735,905.817789,984.843184,1066.290054,1128.55238,1217.849771,1257.094357
8,Central African Republic,CAF,GDP per capita (constant LCU),NY.GDP.PCAP.KN,221329.858912348,227976.801300603,215346.852438897,209679.86143579,209792.654096064,207464.478069115,...,135113.434859,140786.551805,144835.555703,148870.838098,151815.550878,154431.18343,153281.460852,152200.108038,153312.638671,152756.29083
9,Central African Republic,CAF,Households and NPISHs final consumption expend...,NE.CON.PRVT.ZS,72.0000080872718,68.874198197447,69.5081982252081,71.9243111186294,67.816108452237,66.937674796748,...,82.221792,82.427548,84.015401,82.955713,93.70013,96.08882,91.20026,92.84039,97.365,97.365


### Подготовка данных
Сначала уберём метаданные, разделим их по странам и поменяем столбы местами с колонками

In [6]:
raw_df = raw_df.iloc[:-5]

usa_df = raw_df[raw_df['Country Name'] == 'United States']
usa_df = usa_df.drop(columns=['Country Name', 'Country Code', 'Series Code'])

greece_df = raw_df[raw_df['Country Name'] == 'Greece']
greece_df = greece_df.drop(columns=['Country Name', 'Country Code', 'Series Code'])

car_df = raw_df[raw_df['Country Name'] == 'Central African Republic']
car_df = car_df.drop(columns=['Country Name', 'Country Code', 'Series Code'])

In [8]:
def transpose_df(df):
    year_columns = [col for col in df.columns if '[YR' in col]
    
    melted_df = df[['Series Name'] + year_columns].copy()
    # Уберём '[YR****]' из имени года
    melted_df.columns = [col.split(' [YR')[0] if '[YR' in col else col for col in melted_df.columns]
    melted_df = melted_df.melt(id_vars=['Series Name'], 
                          var_name='Year', 
                          value_name='Value')
    
    final_df = melted_df.pivot(index='Year', 
                          columns='Series Name', 
                          values='Value')
    final_df.index = pd.to_numeric(final_df.index)
    return final_df

In [9]:
usa_df = transpose_df(usa_df)
greece_df = transpose_df(greece_df)
car_df = transpose_df(car_df)

car_df

Series Name,Exports of goods and services (% of GDP),GDP per capita (constant LCU),"GDP per capita, PPP (current international $)",General government final consumption expenditure (% of GDP),Gross capital formation (% of GDP),Households and NPISHs final consumption expenditure (% of GDP),Imports of goods and services (% of GDP)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1960,23.2727244800003,221329.858912348,..,19.2727184145465,19.6363558254555,72.0000080872718,34.1818122618189
1961,26.4900657865883,227976.801300603,..,19.2053010065348,21.1920526292707,68.874198197447,35.7615888118942
1962,24.5901651330286,215346.852438897,..,20.3278663198068,23.2786931652776,69.5081982252081,37.7049074334869
1963,25.2365924230513,209679.86143579,..,19.5583591278648,21.7665568639355,71.9243111186294,38.4858050224403
1964,28.4482693940755,209792.654096064,..,22.4137898186687,22.1264358377924,67.816108452237,40.8045902843848
...,...,...,...,...,...,...,...
2019,15.75538,154431.18343,984.843184,8.04464,26.0,96.08882,34.31042
2020,15.17248,153281.460852,1066.290054,9.67704,26.0,91.20026,34.34854
2021,13.22746,152200.108038,1128.55238,9.14075,25.36414,92.84039,30.93166
2022,12.3005,153312.638671,1217.849771,8.33528,24.32666,97.365,32.93307


In [None]:
# Convert percentage shares to levels
df['gov_consumption'] = (df['gov_share'] / 100) * df['gdp_per_capita']
df['imports'] = (df['import_share'] / 100) * df['gdp_per_capita']
df['exports'] = (df['export_share'] / 100) * df['gdp_per_capita']

# Calculate trade balance
df['trade_balance'] = df['exports'] - df['imports']
df['trade_balance_share'] = df['trade_balance'] / df['gdp_per_capita']

# Log transformations for real variables
df['ln_gdp'] = np.log(df['gdp_per_capita'])
df['ln_consumption'] = np.log(df['consumption'])
df['ln_investment'] = np.log(df['investment'])
df['ln_gov'] = np.log(df['gov_consumption'])
df['ln_exports'] = np.log(df['exports'])
df['ln_imports'] = np.log(df['imports'])

# Time trend
df['trend'] = np.arange(len(df))
df['trend_sq'] = df['trend'] ** 2

### Detrending Methods
The following cells implement the core detrending methods:
1. Log-linear detrending
2. Log-quadratic detrending
3. HP filter

In [None]:
    def log_linear_detrending(self, series_name):
        """Apply log-linear detrending to a series"""
        df = self.processed_data
        ln_series = f"ln_{series_name}"
        
        if ln_series not in df.columns:
            # For trade balance, use levels instead of logs
            if series_name == 'trade_balance_share':
                X = sm.add_constant(df['trend'])
                model = sm.OLS(df[series_name], X).fit()
                cyclical = df[series_name] - model.fittedvalues
            else:
                # Create log series if it doesn't exist
                df[ln_series] = np.log(df[series_name])
                X = sm.add_constant(df['trend'])
                model = sm.OLS(df[ln_series], X).fit()
                cyclical = df[ln_series] - model.fittedvalues
        else:
            X = sm.add_constant(df['trend'])
            model = sm.OLS(df[ln_series], X).fit()
            cyclical = df[ln_series] - model.fittedvalues
            
        return cyclical, model

In [None]:
    def log_quadratic_detrending(self, series_name):
        """Apply log-quadratic detrending to a series"""
        df = self.processed_data
        ln_series = f"ln_{series_name}"
        
        if ln_series not in df.columns:
            # For trade balance, use levels instead of logs
            if series_name == 'trade_balance_share':
                X = sm.add_constant(np.column_stack([df['trend'], df['trend_sq']]))
                model = sm.OLS(df[series_name], X).fit()
                cyclical = df[series_name] - model.fittedvalues
            else:
                # Create log series if it doesn't exist
                df[ln_series] = np.log(df[series_name])
                X = sm.add_constant(np.column_stack([df['trend'], df['trend_sq']]))
                model = sm.OLS(df[ln_series], X).fit()
                cyclical = df[ln_series] - model.fittedvalues
        else:
            X = sm.add_constant(np.column_stack([df['trend'], df['trend_sq']]))
            model = sm.OLS(df[ln_series], X).fit()
            cyclical = df[ln_series] - model.fittedvalues
            
        return cyclical, model

In [None]:
    def hp_filter_detrending(self, series_name, lamb=100):
        """Apply Hodrick-Prescott filter"""
        df = self.processed_data
        
        if series_name == 'trade_balance_share':
            # For trade balance, use levels
            series = df[series_name]
        else:
            # For other series, use logs
            ln_series = f"ln_{series_name}"
            if ln_series not in df.columns:
                df[ln_series] = np.log(df[series_name])
            series = df[ln_series]
        
        # Apply HP filter
        cyclical, trend = hpfilter(series, lamb=lamb)
        
        return cyclical, trend

### Analysis and Visualization Methods
The following cells implement methods for analyzing and visualizing the detrended series:

In [None]:
    def compute_all_detrending(self):
        """Compute all four detrending methods for all variables"""
        df = self.prepare_variables()
        
        # Variables to detrend
        variables = ['gdp', 'consumption', 'investment', 'gov', 'exports', 'imports', 'trade_balance_share']
        
        results = {}
        
        for var in variables:
            var_results = {}
            
            # Method (a): Log-linear detrending
            cyclical_linear, model_linear = self.log_linear_detrending(var)
            var_results['linear_cyclical'] = cyclical_linear
            var_results['linear_trend'] = model_linear.fittedvalues
            
            # Method (b): Log-quadratic detrending
            cyclical_quad, model_quad = self.log_quadratic_detrending(var)
            var_results['quadratic_cyclical'] = cyclical_quad
            var_results['quadratic_trend'] = model_quad.fittedvalues
            
            # Method (c): HP filter with λ=100
            cyclical_hp100, trend_hp100 = self.hp_filter_detrending(var, lamb=100)
            var_results['hp100_cyclical'] = cyclical_hp100
            var_results['hp100_trend'] = trend_hp100
            
            # Method (d): HP filter with λ=6.25
            cyclical_hp625, trend_hp625 = self.hp_filter_detrending(var, lamb=6.25)
            var_results['hp625_cyclical'] = cyclical_hp625
            var_results['hp625_trend'] = trend_hp625
            
            results[var] = var_results
        
        self.results = results
        return results
        
    def calculate_business_cycle_statistics(self):
        """Calculate business cycle statistics for detrended series"""
        if not self.results:
            self.compute_all_detrending()
        
        stats_summary = {}
        
        for var, methods in self.results.items():
            var_stats = {}
            
            for method in ['linear_cyclical', 'quadratic_cyclical', 'hp100_cyclical', 'hp625_cyclical']:
                cyclical_series = methods[method]
                
                # Basic statistics
                var_stats[method] = {
                    'std_dev': np.std(cyclical_series),
                    'mean': np.mean(cyclical_series),
                    'min': np.min(cyclical_series),
                    'max': np.max(cyclical_series),
                    'persistence': cyclical_series.autocorr(lag=1) if len(cyclical_series) > 1 else np.nan
                }
            
            stats_summary[var] = var_stats
        
        return stats_summary
    
    def plot_detrending_comparison(self, variable='gdp'):
        """Plot comparison of different detrending methods for a variable"""
        if variable not in self.results:
            print(f"Variable {variable} not found in results")
            return
        
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle(f'Detrending Methods Comparison: {variable.upper()}', fontsize=16)
        
        methods = [
            ('linear_cyclical', 'Linear Detrending'),
            ('quadratic_cyclical', 'Quadratic Detrending'), 
            ('hp100_cyclical', 'HP Filter (λ=100)'),
            ('hp625_cyclical', 'HP Filter (λ=6.25)')
        ]
        
        for idx, (method, title) in enumerate(methods):
            ax = axes[idx//2, idx%2]
            cyclical = self.results[variable][method]
            
            ax.plot(self.processed_data['year'], cyclical, 'b-', linewidth=1.5)
            ax.axhline(y=0, color='r', linestyle='--', alpha=0.7)
            ax.set_title(title)
            ax.set_xlabel('Year')
            ax.set_ylabel('Cyclical Component')
            ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    
    def generate_summary_table(self):
        """Generate summary table of business cycle statistics"""
        stats = self.calculate_business_cycle_statistics()
        
        summary_data = []
        
        for var, var_stats in stats.items():
            for method, method_stats in var_stats.items():
                summary_data.append({
                    'Variable': var,
                    'Method': method.replace('_cyclical', ''),
                    'Std_Dev': method_stats['std_dev'],
                    'Mean': method_stats['mean'],
                    'Persistence': method_stats['persistence']
                })
        
        summary_df = pd.DataFrame(summary_data)
        return summary_df.pivot(index='Variable', columns='Method', values=['Std_Dev', 'Mean', 'Persistence'])