### Import libraries

In [2]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import grangercausalitytests
import matplotlib.pyplot as plt
import seaborn

### Read data

In [None]:
df_military = pd.read_csv("data/military_expenditure.csv", skiprows=4)
df_imports = pd.read_csv("data/imports.csv", skiprows=4)
df_gdp = pd.read_csv("data/gdp.csv", skiprows=4)
df_gdp.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2016,2017,2018,2019,2020,2021,2022,2023,2024,Unnamed: 69
0,Aruba,ABW,GDP (current US$),NY.GDP.MKTP.CD,,,,,,,...,2983635000.0,3092429000.0,3276184000.0,3395799000.0,2481857000.0,2929447000.0,3279344000.0,3648573000.0,,
1,Africa Eastern and Southern,AFE,GDP (current US$),NY.GDP.MKTP.CD,24210630000.0,24963980000.0,27078800000.0,31775750000.0,30285790000.0,33813170000.0,...,828942800000.0,972998900000.0,1012306000000.0,1009721000000.0,933391800000.0,1085745000000.0,1191423000000.0,1245472000000.0,,
2,Afghanistan,AFG,GDP (current US$),NY.GDP.MKTP.CD,,,,,,,...,18116570000.0,18753460000.0,18053220000.0,18799440000.0,19955930000.0,14260000000.0,14497240000.0,17233050000.0,,
3,Africa Western and Central,AFW,GDP (current US$),NY.GDP.MKTP.CD,11904950000.0,12707880000.0,13630760000.0,14469090000.0,15803760000.0,16921090000.0,...,694361000000.0,687849200000.0,770495000000.0,826483800000.0,789801700000.0,849312400000.0,883973900000.0,799106000000.0,,
4,Angola,AGO,GDP (current US$),NY.GDP.MKTP.CD,,,,,,,...,52761620000.0,73690150000.0,79450690000.0,70897960000.0,48501560000.0,66505130000.0,104399700000.0,84824650000.0,,


In [None]:
import pandas as pd
import numpy as np

# Read the datasets into DataFrames
military_df = pd.read_csv("data/military_expenditure.csv", skiprows=4)
gdp_df = pd.read_csv("data/gdp.csv", skiprows=4)
imports_df = pd.read_csv("data/imports.csv", skiprows=4)

# Define the years to consider
years = [str(year) for year in range(1990, 2024)]

# Filter only relevant columns
military_data = military_df[['Country Name'] + years]
imports_data = imports_df[['Country Name'] + years]

# Merge datasets on 'Country Name'
merged_df = pd.merge(military_data, imports_data, on='Country Name', suffixes=('_military', '_imports'))

# Function to compute normalized cross-correlation
def normalized_cross_correlation(x, y):
    x = (x - np.mean(x)) / np.std(x)
    y = (y - np.mean(y)) / np.std(y)
    return np.correlate(x, y, mode='full') / len(x)

# Compute cross-correlation for each country
cross_corr_results = []

for index, row in merged_df.iterrows():
    country = row['Country Name']
    military = row[[f'{year}_military' for year in years]].astype(float)
    imports = row[[f'{year}_imports' for year in years]].astype(float)

    if military.isnull().sum() > 0 or imports.isnull().sum() > 0:
        continue  # Skip countries with missing data

    cross_corr = normalized_cross_correlation(military.values, imports.values)
    max_corr = np.max(cross_corr)
    lag = np.argmax(cross_corr) - (len(military) - 1)

    cross_corr_results.append((country, max_corr, lag))

# Convert results to DataFrame
cross_corr_df = pd.DataFrame(cross_corr_results, columns=['Country', 'Max Cross-Correlation', 'Lag at Max'])

# Display top countries by cross-correlation
print(cross_corr_df.sort_values(by='Max Cross-Correlation', ascending=False).head(20))

                                         Country  Max Cross-Correlation  \
64                     Late-demographic dividend               0.994957   
46                              IDA & IBRD total               0.990974   
45                                     IBRD only               0.990508   
118                                        World               0.988087   
63                           Low & middle income               0.988077   
70                                 Middle income               0.987891   
18                                         China               0.985049   
115                          Upper middle income               0.984435   
98                                     Singapore               0.984053   
105   East Asia & Pacific (IDA & IBRD countries)               0.983389   
27   East Asia & Pacific (excluding high income)               0.983255   
42                                   High income               0.979097   
29                       

  x = (x - np.mean(x)) / np.std(x)
  x = (x - np.mean(x)) / np.std(x)


 ### Interpretation
Ein Wert nahe 1 zeigt eine hohe Korrelation zwischen den Militärausgaben und dem Importanteil am GDP an. Was konkret bedeutet, dass wenn
einer dieser Werte steigt oder fällt der andere Wert nachfolgt.

### Granger Kausalität

In [13]:
# import pandas as pd
# import numpy as np
# from statsmodels.tsa.stattools import grangercausalitytests

# # Read the datasets into DataFrames
# military_df = pd.read_csv("Military/military.csv", skiprows=4)
# gdp_df = pd.read_csv("GDP/GDP.csv", skiprows=4)
# imports_df = pd.read_csv("Imports/imports.csv", skiprows=4)

# # Define the years to consider
# years = [str(year) for year in range(1990, 2024)]

# # Filter only relevant columns
# military_data = military_df[['Country Name'] + years]
# imports_data = imports_df[['Country Name'] + years]

# # Merge datasets on 'Country Name'
# merged_df = pd.merge(military_data, imports_data, on='Country Name', suffixes=('_military', '_imports'))

# # Compute Granger causality for each country
# granger_results = []

# for index, row in merged_df.iterrows():
#     country = row['Country Name']
#     military = row[[f'{year}_military' for year in years]].astype(float)
#     imports = row[[f'{year}_imports' for year in years]].astype(float)

#     if military.isnull().sum() > 0 or imports.isnull().sum() > 0:
#         continue  # Skip countries with missing data

#     try:
#         data = pd.DataFrame({"military": military.values, "imports": imports.values})
#         # Granger test: does military spending help predict imports?
#         test_result = grangercausalitytests(data, maxlag=5)
#         p_values = [round(test_result[i+1][0]['ssr_ftest'][1], 4) for i in range(5)]
#         granger_results.append((country, p_values))
#     except Exception as e:
#         continue  # Skip if test fails

# # Convert results to DataFrame
# granger_df = pd.DataFrame(granger_results, columns=['Country', 'P-Values by Lag'])

# # Display top countries where military spending Granger-causes imports (lowest p-values)
# print(granger_df.tail(20))

### Granger Kausalität für ein einzelnen Staat

In [None]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests

country_code = "DEU"

# --- Hilfsfunktion zum Einlesen und Umformen ---
def load_world_bank_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, skiprows=4)
    df = df.drop(columns=[col for col in df.columns if col.startswith("Unnamed")], errors="ignore")
    return df

def reshape_indicator(df: pd.DataFrame, value_name: str) -> pd.DataFrame:
    df_usa = df[df["Country Code"] == country_code]
    df_long = df_usa.melt(
        id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
        var_name="Year",
        value_name=value_name
    )
    df_long["Year"] = pd.to_numeric(df_long["Year"], errors="coerce")
    df_long = df_long.dropna(subset=["Year", value_name])
    df_long["Year"] = df_long["Year"].astype(int)
    return df_long[["Year", value_name]]

# --- Dateien einlesen ---
military_df = load_world_bank_csv("data/military_expenditure.csv")
export_df = load_world_bank_csv("data/export.csv")
import_df = load_world_bank_csv("data/imports.csv")
gdp_df = load_world_bank_csv("gdp/gdp.csv")

# --- USA-spezifische Zeitreihen extrahieren ---
military_usa = reshape_indicator(military_df, "Military")
export_usa = reshape_indicator(export_df, "Export_pct_GDP")
import_usa = reshape_indicator(import_df, "Import_pct_GDP")
gdp_usa = reshape_indicator(gdp_df, "GDP")

# --- Daten zusammenführen ---
merged_df = military_usa.merge(gdp_usa, on="Year", how="inner") \
                        .merge(export_usa, on="Year", how="inner") \
                        .merge(import_usa, on="Year", how="inner")

# --- Granger-Kausalität: Military → andere Variablen ---
target_vars = ["GDP", "Export_pct_GDP", "Import_pct_GDP"]
results = {}

for target in target_vars:
    test_data = merged_df[["Military", target]].dropna()
    # Achtung: Reihenfolge im Test ist [Ziel, Ursache]
    test_result = grangercausalitytests(test_data[[target, "Military"]], maxlag=5)
    p_values = {lag: round(test_result[lag][0]['ssr_ftest'][1], 4) for lag in test_result}
    results[target] = p_values

# --- Ergebnisse als DataFrame ---
granger_df = pd.DataFrame(results).T
granger_df.columns = [f"Lag {i}" for i in granger_df.columns]
print(granger_df)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=0.0480  , p=0.8274  , df_denom=50, df_num=1
ssr based chi2 test:   chi2=0.0509  , p=0.8215  , df=1
likelihood ratio test: chi2=0.0509  , p=0.8215  , df=1
parameter F test:         F=0.0480  , p=0.8274  , df_denom=50, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=0.3506  , p=0.7061  , df_denom=47, df_num=2
ssr based chi2 test:   chi2=0.7758  , p=0.6785  , df=2
likelihood ratio test: chi2=0.7700  , p=0.6804  , df=2
parameter F test:         F=0.3506  , p=0.7061  , df_denom=47, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=0.3262  , p=0.8064  , df_denom=44, df_num=3
ssr based chi2 test:   chi2=1.1344  , p=0.7688  , df=3
likelihood ratio test: chi2=1.1219  , p=0.7718  , df=3
parameter F test:         F=0.3262  , p=0.8064  , df_denom=44, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=0.8781  , p=0.4854  , df_d

### Granger Kausalität für alle Staaten

In [40]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import grangercausalitytests

# Read the datasets into DataFrames
military_df = pd.read_csv("Military/military.csv", skiprows=4)
gdp_df = pd.read_csv("GDP/GDP.csv", skiprows=4)
imports_df = pd.read_csv("Imports/imports.csv", skiprows=4)

# Define the years to consider
years = [str(year) for year in range(1990, 2024)]

# Filter only relevant columns
military_data = military_df[['Country Name'] + years]
imports_data = imports_df[['Country Name'] + years]

# Merge datasets on 'Country Name'
merged_df = pd.merge(military_data, imports_data, on='Country Name', suffixes=('_military', '_imports'))

# Compute Granger causality for each country
granger_results = []

for index, row in merged_df.iterrows():
    country = row['Country Name']
    military = row[[f'{year}_military' for year in years]].astype(float)
    imports = row[[f'{year}_imports' for year in years]].astype(float)

    if military.isnull().sum() > 0 or imports.isnull().sum() > 0:
        continue  # Skip countries with missing data

    try:
        data = pd.DataFrame({"military": military.values, "imports": imports.values})
        # Granger test: does military spending help predict imports?
        test_result = grangercausalitytests(data, maxlag=3)
        for lag in range(1, 4):
            p_value = round(test_result[lag][0]['ssr_ftest'][1], 4)
            granger_results.append({
                'Country': country,
                'Lag': lag,
                'P-Value': p_value
            })
    except Exception:
        continue  # Skip if test fails

# Convert results to DataFrame
granger_df = pd.DataFrame(granger_results)

# Display top results
# print(granger_df.sort_values(by='P-Value').head(100))
print(granger_df[granger_df["Country"] == "Germany"])



Granger Causality
number of lags (no zero) 1
ssr based F test:         F=9.8063  , p=0.0039  , df_denom=30, df_num=1
ssr based chi2 test:   chi2=10.7869 , p=0.0010  , df=1
likelihood ratio test: chi2=9.3333  , p=0.0023  , df=1
parameter F test:         F=9.8063  , p=0.0039  , df_denom=30, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=4.7122  , p=0.0176  , df_denom=27, df_num=2
ssr based chi2 test:   chi2=11.1697 , p=0.0038  , df=2
likelihood ratio test: chi2=9.5809  , p=0.0083  , df=2
parameter F test:         F=4.7122  , p=0.0176  , df_denom=27, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=2.2524  , p=0.1081  , df_denom=24, df_num=3
ssr based chi2 test:   chi2=8.7280  , p=0.0331  , df=3
likelihood ratio test: chi2=7.6902  , p=0.0529  , df=3
parameter F test:         F=2.2524  , p=0.1081  , df_denom=24, df_num=3

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=0.0090  , p=0.9252  , df_d