# Анализ деловых циклов по данным World Bank’s WDI

В этом блокноте анализируются данные для определения, насколько применимы к реальным данным факты, обсуждаемые в первой главе Martin Uribe, Open Economy Macroeconomics.

В качестве реальных данных взяты:
1. ВВП на душу населения (в локальной валюте)
1. Расходы домохозяйств и некоммерческих организаций на конечное потребление (% от ВВП)
1. Валовое накопление капитала (% от ВВП)
1. Объём государственных расходов на конечное потребление (% от ВВП)
1. Общий объём импорта товаров и услуг (% от ВВП)
1. Общий объём экспорта товаров и услуг (% от ВВП)

Взятые страны: Центральная Африканская республика (бедная), Греция (развивающаяся), США (богатая)

### Импорт библиотек для анализа

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.filters.hp_filter import hpfilter
from scipy import stats

### Чтение данных

In [17]:
raw_df = pd.read_csv('data.csv')
raw_df

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1960 [YR1960],1961 [YR1961],1962 [YR1962],1963 [YR1963],1964 [YR1964],1965 [YR1965],...,2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020],2021 [YR2021],2022 [YR2022],2023 [YR2023]
0,Greece,GRC,GDP per capita (constant LCU),NY.GDP.PCAP.KN,4464.8581176167,5014.47288358607,5002.87136966985,5574.74015655173,6077.22842173996,6700.20886905808,...,16033.0004012885,16102.1387995786,16164.1081810632,16434.6986088567,16808.0869611722,17208.9557306002,15659.9493073813,17223.5459103034,18443.7187274897,18930.6108410212
1,Greece,GRC,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,..,..,..,..,..,..,...,26450.181326726,26614.8483754225,27504.5497984948,28681.6815585005,29792.0592519585,31927.3792151369,29533.087740299,33531.15511226,38969.0221675373,41181.9912164324
2,Greece,GRC,Households and NPISHs final consumption expend...,NE.CON.PRVT.ZS,74.9743600644054,70.8930378982755,71.5567889804874,68.7646539327248,67.950635364173,65.8186851542815,...,69.5329655860989,68.6886594094333,68.0126932819622,68.8100060538919,68.8330418045266,68.3649191955026,69.1479743901376,67.0716816185717,68.0652905785852,66.8996651467981
3,Greece,GRC,Gross capital formation (% of GDP),NE.GDI.TOTL.ZS,15.2152426095875,20.5512940761309,21.6209766692666,25.0513864574215,29.3079210690835,32.1589823023432,...,12.136918450953,12.0084097844243,12.4680861540635,11.8883016329317,12.9530537485632,12.0996603246177,13.5410023084822,17.48296657419,19.9840681581658,16.7312327593368
4,Greece,GRC,General government final consumption expenditu...,NE.CON.GOVT.ZS,12.4644627835671,11.8509885497247,12.3297810989905,11.7579248468176,12.1239020582414,12.1047837901808,...,20.7227080595732,20.7679837462522,20.6180603140124,20.4384126534599,19.6597104516787,19.9791204523634,22.7274940590434,21.4974385558012,19.9856875626536,19.2868439733478
5,Greece,GRC,Imports of goods and services (% of GDP),NE.IMP.GNFS.ZS,13.9746948449116,13.5441527446301,14.1555396958321,14.6484619232212,16.0350443321509,16.3236690804828,...,33.9185692513027,33.085423586627,32.4782765724963,36.1032862068475,40.6071874522328,41.0298090980773,38.8433569445986,47.7522509396405,58.6593231211582,48.4480311941784
6,Greece,GRC,Exports of goods and services (% of GDP),NE.EXP.GNFS.ZS,9.86566267941313,9.44717364857963,8.31788483336611,9.03639646117294,7.61728936615559,7.62209645477615,...,32.514999005264,32.1863775524943,31.1793860165412,34.7793204673102,38.630976154138,39.5602917930958,31.450429504041,40.2604118004654,49.019884781247,43.7082588320458
7,Greece,GRC,Current account balance (% of GDP),BN.CAB.XOKA.GD.ZS,..,..,..,..,..,..,...,-1.59663409250033,-0.82819219779973,-1.68881636338714,-1.79956830770948,-2.92946837177349,-1.50121840629263,-6.48664200392794,-6.34795283469094,-10.3357101038964,-6.16330418209365
8,United States,USA,GDP per capita (constant LCU),NY.GDP.PCAP.KN,19373.7345783219,19544.0495179405,20425.4575475238,21010.8855328098,21915.1540734487,23049.3353164902,...,57200.5687195003,58417.4601292274,59014.8755674907,60047.7190728307,61467.5128626242,62731.7557063691,61124.6889567852,64723.9191018988,65969.117811109,67311.9850921048
9,United States,USA,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,..,..,..,..,..,..,...,55153.3940182967,56849.4697923159,57976.628204291,60047.7190728307,62875.6661382728,65227.9565911035,64401.5074354209,71307.4017277218,77860.9112908848,82304.6204272866


### Подготовка данных
Здесь:
1. уберём метаданные
1. разделим данные по странам
1. поменяем столбы местами с колонками
1. укоротим название колонок

In [24]:
raw_df = raw_df.iloc[:-5]

usa_df = raw_df[raw_df['Country Name'] == 'United States']
usa_df = usa_df.drop(columns=['Country Name', 'Country Code', 'Series Code'])

greece_df = raw_df[raw_df['Country Name'] == 'Greece']
greece_df = greece_df.drop(columns=['Country Name', 'Country Code', 'Series Code'])

car_df = raw_df[raw_df['Country Name'] == 'Central African Republic']
car_df = car_df.drop(columns=['Country Name', 'Country Code', 'Series Code'])

In [None]:
def transpose_rename_df(df):
    year_columns = [col for col in df.columns if '[YR' in col]
    
    melted_df = df[['Series Name'] + year_columns].copy()
    # Уберём '[YR****]' из имени года
    melted_df.columns = [col.split(' [YR')[0] if '[YR' in col else col for col in melted_df.columns]
    melted_df = melted_df.melt(id_vars=['Series Name'], 
                          var_name='Year', 
                          value_name='Value')
    
    final_df = melted_df.pivot(index='Year', 
                          columns='Series Name', 
                          values='Value')
    final_df.index = pd.to_numeric(final_df.index)

    column_mapping = {
        'GDP per capita (constant LCU)': 'gdp_per_capita',
        'GDP per capita, PPP (current international $)': 'gdp_ppp',
        'Households and NPISHs final consumption expenditure (% of GDP)': 'house_consumption',
        'Gross capital formation (% of GDP)': 'investment',
        'General government final consumption expenditure (% of GDP)': 'gov_spending',
        'Imports of goods and services (% of GDP)': 'import',
        'Exports of goods and services (% of GDP)': 'export',
        'Current account balance (% of GDP)': "current_account"
    }
    final_df = final_df.rename(columns=column_mapping)
    
    final_df = final_df.replace('..', np.nan)
    final_df = final_df.astype(float)
    
    return final_df

In [26]:
usa_df = transpose_rename_df(usa_df)
greece_df = transpose_rename_df(greece_df)
car_df = transpose_rename_df(car_df)

usa_df

ValueError: could not convert string to float: '..'

### Вычисление статистик
Вычислим абсолютные значения всех данных (перевёд проценты в валюту), а также вычислим тороговый баланс и сальдо торговых операций?

In [23]:
def prepare_statistics(df, gdp_name = 'gdp_per_capita'):
    df['house_consumption'] = (df['house_consumption'] / 100) * df[gdp_name]
    df['gov_spending'] = (df['gov_spending'] / 100) * df[gdp_name]
    df['investment'] = (df['investment'] / 100) * df[gdp_name]
    df['import'] = (df['import'] / 100) * df[gdp_name]
    df['export'] = (df['export'] / 100) * df[gdp_name]

    df['trade_balance'] = df['export'] - df['import'] # не забыть разделить на тренд ВВП

    # Log transformations for real variables
    df['ln_gdp'] = np.log(df[gdp_name])
    df['ln_house_consumption'] = np.log(df['house_consumption'])
    df['ln_gov_spending'] = np.log(df['gov_spending'])
    df['ln_investment'] = np.log(df['investment'])
    df['ln_export'] = np.log(df['export'])
    df['ln_import'] = np.log(df['import'])

    # Time trend
    df['trend'] = np.arange(len(df))
    df['trend_sq'] = df['trend'] ** 2

prepare_statistics(usa_df)
prepare_statistics(greece_df)
prepare_statistics(car_df)

usa_df

TypeError: unsupported operand type(s) for /: 'str' and 'int'

In [None]:
    def log_linear_detrending(self, series_name):
        """Apply log-linear detrending to a series"""
        df = self.processed_data
        ln_series = f"ln_{series_name}"
        
        if ln_series not in df.columns:
            # For trade balance, use levels instead of logs
            if series_name == 'trade_balance_share':
                X = sm.add_constant(df['trend'])
                model = sm.OLS(df[series_name], X).fit()
                cyclical = df[series_name] - model.fittedvalues
            else:
                # Create log series if it doesn't exist
                df[ln_series] = np.log(df[series_name])
                X = sm.add_constant(df['trend'])
                model = sm.OLS(df[ln_series], X).fit()
                cyclical = df[ln_series] - model.fittedvalues
        else:
            X = sm.add_constant(df['trend'])
            model = sm.OLS(df[ln_series], X).fit()
            cyclical = df[ln_series] - model.fittedvalues
            
        return cyclical, model

In [None]:
    def log_quadratic_detrending(self, series_name):
        """Apply log-quadratic detrending to a series"""
        df = self.processed_data
        ln_series = f"ln_{series_name}"
        
        if ln_series not in df.columns:
            # For trade balance, use levels instead of logs
            if series_name == 'trade_balance_share':
                X = sm.add_constant(np.column_stack([df['trend'], df['trend_sq']]))
                model = sm.OLS(df[series_name], X).fit()
                cyclical = df[series_name] - model.fittedvalues
            else:
                # Create log series if it doesn't exist
                df[ln_series] = np.log(df[series_name])
                X = sm.add_constant(np.column_stack([df['trend'], df['trend_sq']]))
                model = sm.OLS(df[ln_series], X).fit()
                cyclical = df[ln_series] - model.fittedvalues
        else:
            X = sm.add_constant(np.column_stack([df['trend'], df['trend_sq']]))
            model = sm.OLS(df[ln_series], X).fit()
            cyclical = df[ln_series] - model.fittedvalues
            
        return cyclical, model

In [None]:
    def hp_filter_detrending(self, series_name, lamb=100):
        """Apply Hodrick-Prescott filter"""
        df = self.processed_data
        
        if series_name == 'trade_balance_share':
            # For trade balance, use levels
            series = df[series_name]
        else:
            # For other series, use logs
            ln_series = f"ln_{series_name}"
            if ln_series not in df.columns:
                df[ln_series] = np.log(df[series_name])
            series = df[ln_series]
        
        # Apply HP filter
        cyclical, trend = hpfilter(series, lamb=lamb)
        
        return cyclical, trend

In [None]:
    def compute_all_detrending(self):
        """Compute all four detrending methods for all variables"""
        df = self.prepare_variables()
        
        # Variables to detrend
        variables = ['gdp', 'consumption', 'investment', 'gov', 'exports', 'imports', 'trade_balance_share']
        
        results = {}
        
        for var in variables:
            var_results = {}
            
            # Method (a): Log-linear detrending
            cyclical_linear, model_linear = self.log_linear_detrending(var)
            var_results['linear_cyclical'] = cyclical_linear
            var_results['linear_trend'] = model_linear.fittedvalues
            
            # Method (b): Log-quadratic detrending
            cyclical_quad, model_quad = self.log_quadratic_detrending(var)
            var_results['quadratic_cyclical'] = cyclical_quad
            var_results['quadratic_trend'] = model_quad.fittedvalues
            
            # Method (c): HP filter with λ=100
            cyclical_hp100, trend_hp100 = self.hp_filter_detrending(var, lamb=100)
            var_results['hp100_cyclical'] = cyclical_hp100
            var_results['hp100_trend'] = trend_hp100
            
            # Method (d): HP filter with λ=6.25
            cyclical_hp625, trend_hp625 = self.hp_filter_detrending(var, lamb=6.25)
            var_results['hp625_cyclical'] = cyclical_hp625
            var_results['hp625_trend'] = trend_hp625
            
            results[var] = var_results
        
        self.results = results
        return results
        
    def calculate_business_cycle_statistics(self):
        """Calculate business cycle statistics for detrended series"""
        if not self.results:
            self.compute_all_detrending()
        
        stats_summary = {}
        
        for var, methods in self.results.items():
            var_stats = {}
            
            for method in ['linear_cyclical', 'quadratic_cyclical', 'hp100_cyclical', 'hp625_cyclical']:
                cyclical_series = methods[method]
                
                # Basic statistics
                var_stats[method] = {
                    'std_dev': np.std(cyclical_series),
                    'mean': np.mean(cyclical_series),
                    'min': np.min(cyclical_series),
                    'max': np.max(cyclical_series),
                    'persistence': cyclical_series.autocorr(lag=1) if len(cyclical_series) > 1 else np.nan
                }
            
            stats_summary[var] = var_stats
        
        return stats_summary
    
    def plot_detrending_comparison(self, variable='gdp'):
        """Plot comparison of different detrending methods for a variable"""
        if variable not in self.results:
            print(f"Variable {variable} not found in results")
            return
        
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle(f'Detrending Methods Comparison: {variable.upper()}', fontsize=16)
        
        methods = [
            ('linear_cyclical', 'Linear Detrending'),
            ('quadratic_cyclical', 'Quadratic Detrending'), 
            ('hp100_cyclical', 'HP Filter (λ=100)'),
            ('hp625_cyclical', 'HP Filter (λ=6.25)')
        ]
        
        for idx, (method, title) in enumerate(methods):
            ax = axes[idx//2, idx%2]
            cyclical = self.results[variable][method]
            
            ax.plot(self.processed_data['year'], cyclical, 'b-', linewidth=1.5)
            ax.axhline(y=0, color='r', linestyle='--', alpha=0.7)
            ax.set_title(title)
            ax.set_xlabel('Year')
            ax.set_ylabel('Cyclical Component')
            ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    
    def generate_summary_table(self):
        """Generate summary table of business cycle statistics"""
        stats = self.calculate_business_cycle_statistics()
        
        summary_data = []
        
        for var, var_stats in stats.items():
            for method, method_stats in var_stats.items():
                summary_data.append({
                    'Variable': var,
                    'Method': method.replace('_cyclical', ''),
                    'Std_Dev': method_stats['std_dev'],
                    'Mean': method_stats['mean'],
                    'Persistence': method_stats['persistence']
                })
        
        summary_df = pd.DataFrame(summary_data)
        return summary_df.pivot(index='Variable', columns='Method', values=['Std_Dev', 'Mean', 'Persistence'])