### Import libraries

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import grangercausalitytests
import matplotlib.pyplot as plt
import seaborn

### Read data

In [None]:
df_military = pd.read_csv("data/military_expenditure.csv", skiprows=4)
df_imports = pd.read_csv("data/imports.csv", skiprows=4)
df_gdp = pd.read_csv("data/gdp.csv", skiprows=4)
df_gdp.head()

In [None]:
import pandas as pd
import numpy as np

# Read the datasets into DataFrames
military_df = pd.read_csv("data/military_expenditure.csv", skiprows=4)
gdp_df = pd.read_csv("data/gdp.csv", skiprows=4)
imports_df = pd.read_csv("data/imports.csv", skiprows=4)

# Define the years to consider
years = [str(year) for year in range(1990, 2024)]

# Filter only relevant columns
military_data = military_df[['Country Name'] + years]
imports_data = imports_df[['Country Name'] + years]

# Merge datasets on 'Country Name'
merged_df = pd.merge(military_data, imports_data, on='Country Name', suffixes=('_military', '_imports'))

# Function to compute normalized cross-correlation
def normalized_cross_correlation(x, y):
    x = (x - np.mean(x)) / np.std(x)
    y = (y - np.mean(y)) / np.std(y)
    return np.correlate(x, y, mode='full') / len(x)

# Compute cross-correlation for each country
cross_corr_results = []

for index, row in merged_df.iterrows():
    country = row['Country Name']
    military = row[[f'{year}_military' for year in years]].astype(float)
    imports = row[[f'{year}_imports' for year in years]].astype(float)

    if military.isnull().sum() > 0 or imports.isnull().sum() > 0:
        continue  # Skip countries with missing data

    cross_corr = normalized_cross_correlation(military.values, imports.values)
    max_corr = np.max(cross_corr)
    lag = np.argmax(cross_corr) - (len(military) - 1)

    cross_corr_results.append((country, max_corr, lag))

# Convert results to DataFrame
cross_corr_df = pd.DataFrame(cross_corr_results, columns=['Country', 'Max Cross-Correlation', 'Lag at Max'])

# Display top countries by cross-correlation
print(cross_corr_df.sort_values(by='Max Cross-Correlation', ascending=False).head(20))

 ### Interpretation
Ein Wert nahe 1 zeigt eine hohe Korrelation zwischen den Militärausgaben und dem Importanteil am GDP an. Was konkret bedeutet, dass wenn
einer dieser Werte steigt oder fällt der andere Wert nachfolgt.

### Granger Kausalität

In [None]:
# import pandas as pd
# import numpy as np
# from statsmodels.tsa.stattools import grangercausalitytests

# # Read the datasets into DataFrames
# military_df = pd.read_csv("Military/military.csv", skiprows=4)
# gdp_df = pd.read_csv("GDP/GDP.csv", skiprows=4)
# imports_df = pd.read_csv("Imports/imports.csv", skiprows=4)

# # Define the years to consider
# years = [str(year) for year in range(1990, 2024)]

# # Filter only relevant columns
# military_data = military_df[['Country Name'] + years]
# imports_data = imports_df[['Country Name'] + years]

# # Merge datasets on 'Country Name'
# merged_df = pd.merge(military_data, imports_data, on='Country Name', suffixes=('_military', '_imports'))

# # Compute Granger causality for each country
# granger_results = []

# for index, row in merged_df.iterrows():
#     country = row['Country Name']
#     military = row[[f'{year}_military' for year in years]].astype(float)
#     imports = row[[f'{year}_imports' for year in years]].astype(float)

#     if military.isnull().sum() > 0 or imports.isnull().sum() > 0:
#         continue  # Skip countries with missing data

#     try:
#         data = pd.DataFrame({"military": military.values, "imports": imports.values})
#         # Granger test: does military spending help predict imports?
#         test_result = grangercausalitytests(data, maxlag=5)
#         p_values = [round(test_result[i+1][0]['ssr_ftest'][1], 4) for i in range(5)]
#         granger_results.append((country, p_values))
#     except Exception as e:
#         continue  # Skip if test fails

# # Convert results to DataFrame
# granger_df = pd.DataFrame(granger_results, columns=['Country', 'P-Values by Lag'])

# # Display top countries where military spending Granger-causes imports (lowest p-values)
# print(granger_df.tail(20))

### Granger Kausalität für ein einzelnen Staat

In [None]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests

country_code = "DEU"

# --- Hilfsfunktion zum Einlesen und Umformen ---
def load_world_bank_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, skiprows=4)
    df = df.drop(columns=[col for col in df.columns if col.startswith("Unnamed")], errors="ignore")
    return df

def reshape_indicator(df: pd.DataFrame, value_name: str) -> pd.DataFrame:
    df_usa = df[df["Country Code"] == country_code]
    df_long = df_usa.melt(
        id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
        var_name="Year",
        value_name=value_name
    )
    df_long["Year"] = pd.to_numeric(df_long["Year"], errors="coerce")
    df_long = df_long.dropna(subset=["Year", value_name])
    df_long["Year"] = df_long["Year"].astype(int)
    return df_long[["Year", value_name]]

# --- Dateien einlesen ---
military_df = load_world_bank_csv("data/military_expanditure.csv")
export_df = load_world_bank_csv("data/exports.csv")
import_df = load_world_bank_csv("data/imports.csv")
gdp_df = load_world_bank_csv("data/gdp.csv")

# --- USA-spezifische Zeitreihen extrahieren ---
military_usa = reshape_indicator(military_df, "Military")
export_usa = reshape_indicator(export_df, "Export_pct_GDP")
import_usa = reshape_indicator(import_df, "Import_pct_GDP")
gdp_usa = reshape_indicator(gdp_df, "GDP")

# --- Daten zusammenführen ---
merged_df = military_usa.merge(gdp_usa, on="Year", how="inner") \
                        .merge(export_usa, on="Year", how="inner") \
                        .merge(import_usa, on="Year", how="inner")

# --- Granger-Kausalität: Military → andere Variablen ---
target_vars = ["GDP", "Export_pct_GDP", "Import_pct_GDP"]
results = {}

for target in target_vars:
    test_data = merged_df[["Military", target]].dropna()
    # Achtung: Reihenfolge im Test ist [Ziel, Ursache]
    test_result = grangercausalitytests(test_data[[target, "Military"]], maxlag=5)
    p_values = {lag: round(test_result[lag][0]['ssr_ftest'][1], 4) for lag in test_result}
    results[target] = p_values

# --- Ergebnisse als DataFrame ---
granger_df = pd.DataFrame(results).T
granger_df.columns = [f"Lag {i}" for i in granger_df.columns]
print(granger_df)

In [None]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests

# country_code = "DEU"
maxlag = 3

# --- Hilfsfunktion zum Einlesen und Umformen ---
def load_world_bank_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, skiprows=4)
    df = df.drop(columns=[col for col in df.columns if col.startswith("Unnamed")], errors="ignore")
    df = df.dropna(thresh=df.shape[1] - 40)
    return df

def reshape_indicator(df: pd.DataFrame, value_name: str, country_code: str) -> pd.DataFrame:
    df_usa = df[df["Country Code"] == country_code]
    df_long = df_usa.melt(
        id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
        var_name="Year",
        value_name=value_name
    )
    df_long["Year"] = pd.to_numeric(df_long["Year"], errors="coerce")
    df_long = df_long.dropna(subset=["Year", value_name])
    df_long["Year"] = df_long["Year"].astype(int)
    return df_long[["Year", value_name]]

# --- Dateien einlesen ---
military_df = load_world_bank_csv("data/military_expanditure.csv")
export_df = load_world_bank_csv("data/exports.csv")
import_df = load_world_bank_csv("data/imports.csv")
gdp_df = load_world_bank_csv("data/gdp.csv")

# --- USA-spezifische Zeitreihen extrahieren ---
def calc(country_code):
    military_usa = reshape_indicator(military_df, "Military", country_code)
    export_usa = reshape_indicator(export_df, "Export_pct_GDP", country_code)
    import_usa = reshape_indicator(import_df, "Import_pct_GDP", country_code)
    gdp_usa = reshape_indicator(gdp_df, "GDP", country_code)
    
    # --- Daten zusammenführen ---
    merged_df = military_usa.merge(gdp_usa, on="Year", how="inner") \
                            .merge(export_usa, on="Year", how="inner") \
                            .merge(import_usa, on="Year", how="inner")
    
    # --- Granger-Kausalität: Military → andere Variablen ---
    target_vars = ["GDP", "Export_pct_GDP", "Import_pct_GDP"]
    results = {}
    
    for target in target_vars:
        test_data = merged_df[["Military", target]].dropna()
        # Achtung: Reihenfolge im Test ist [Ziel, Ursache]
        test_result = grangercausalitytests(test_data[[target, "Military"]], maxlag=maxlag)
        p_values = {lag: round(test_result[lag][0]['ssr_ftest'][1], 4) for lag in test_result}
        results[target] = p_values

    return results

# --- Ergebnisse als DataFrame ---
results = military_df["Country Code"].map(calc)

granger_df = pd.DataFrame(results).T
granger_df.columns = [f"Lag {i}" for i in granger_df.columns]
print(granger_df)

### Granger Kausalität für alle Staaten

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import grangercausalitytests

# Read the datasets into DataFrames
military_df = pd.read_csv("data/military_expanditure.csv", skiprows=4)
gdp_df = pd.read_csv("data/GDP.csv", skiprows=4)
imports_df = pd.read_csv("data/imports.csv", skiprows=4)

# Define the years to consider
years = [str(year) for year in range(2000, 2024)]

# Filter only relevant columns
military_data = military_df[['Country Name'] + years]
imports_data = imports_df[['Country Name'] + years]
gdp_data = gdp_df[['Country Name'] + years]

# Merge all datasets on 'Country Name'
merged_df = military_data.merge(imports_data, on='Country Name', suffixes=('_military', '_imports'))
merged_df = merged_df.merge(gdp_data, on='Country Name')
merged_df.rename(columns={year: f"{year}_gdp" for year in years}, inplace=True)

# Compute Granger causality for each country
granger_results = []

for index, row in merged_df.iterrows():
    country = row['Country Name']
    military = row[[f'{year}_military' for year in years]].astype(float)
    imports = row[[f'{year}_imports' for year in years]].astype(float)
    gdp = row[[f'{year}_gdp' for year in years]].astype(float)

    if military.isnull().any() or imports.isnull().any() or gdp.isnull().any():
        continue  # Skip countries with missing data

    try:
        data = pd.DataFrame({"imports": imports.values, "military": military.values})
        # Granger test: does military spending help predict imports?
        test_result = grangercausalitytests(data, maxlag=3)
        for lag in range(1, 4):
            p_value = round(test_result[lag][0]['ssr_ftest'][1], 4)
            granger_results.append({
                'Country': country,
                'KPI': 'Import',
                'Lag': lag,
                'P-Value': p_value
            })
    except Exception:
        continue  # Skip if test fails

    try:
        data = pd.DataFrame({"gdp": gdp.values, "military": military.values})
        # Granger test: does military spending help predict gdp?
        test_result = grangercausalitytests(data, maxlag=3)
        for lag in range(1, 4):
            p_value = round(test_result[lag][0]['ssr_ftest'][1], 4)
            granger_results.append({
                'Country': country,
                'KPI': 'GDP',
                'Lag': lag,
                'P-Value': p_value
            })
    except Exception:
        continue  # Skip if test fails

    try:
        data = pd.DataFrame({"gdp": gdp.values, "military": military.values})
        # Granger test: does military spending help predict gdp?
        test_result = grangercausalitytests(data, maxlag=3)
        for lag in range(1, 4):
            p_value = round(test_result[lag][0]['ssr_ftest'][1], 4)
            granger_results.append({
                'Country': country,
                'KPI': 'GDP',
                'Lag': lag,
                'P-Value': p_value
            })
    except Exception:
        continue  # Skip if test fails

# Convert results to DataFrame
granger_df = pd.DataFrame(granger_results)

# Display top results
print(granger_df.sort_values(by='P-Value').head(10))
