In [None]:
!pip install deep-translator

# Bibliotheken importieren
import pandas as pd
import re
import string
from deep_translator import GoogleTranslator
from google.colab import files
from tqdm import tqdm

# CSV laden
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv("/content/drive/MyDrive/Civilian Peacebuilding Dataset.csv")

# Texte kombinieren
df['FullText'] = df[['ProjectTitle', 'ShortDescription', 'LongDescription']].fillna('').agg(' '.join, axis=1)

# Übersetzung ins Englische (via Google Translate API)
translated = []
translator = GoogleTranslator(source='auto', target='en')

for text in tqdm(df['FullText'].tolist()):
    try:
        translated.append(translator.translate(text))
    except Exception as e:
        translated.append(text)  # falls Fehler, original belassen

df['TranslatedText'] = translated

# Speichern
df.to_csv("/content/drive/MyDrive/Civilian Peacebuilding Translated.csv", index=False)

In [None]:
!pip install -q transformers tqdm pandas

# Bibliotheken importieren
from transformers import pipeline
from datasets import Dataset
import pandas as pd
from tqdm.auto import tqdm
import torch
import numpy as np

classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# CSV laden
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv("/content/drive/MyDrive/Civilian Peacebuilding Translated.csv")
df = df[df['TranslatedText'].notna()].reset_index(drop=True)

# MULTI-PROMPT-Hypothesen für jedes SDG
sdg_prompts = {
    "Significantly reduce all forms of violence and related death rates everywhere (SDG 16.1)": [
        "This project contributes to reducing violence.",
        "This project prevents armed conflict or physical harm.",
        "This project aims to reduce violent-related deaths."
    ],
    "End abuse, exploitation, trafficking and all forms of violence against and torture of children (SDG 16.2)": [
        "This project protects children from abuse or exploitation.",
        "This project fights human trafficking or sexual violence.",
        "This project prevents neglect and mistreatment of children."
    ],
    "Promote the rule of law at the national and international levels and ensure equal access to justice for all (SDG 16.3)": [
        "This project promotes the rule of law and justice.",
        "This project improves access to fair legal systems.",
        "This project strengthens judicial institutions."
    ],
    "By 2030, significantly reduce illicit financial and arms flows, strengthen the recovery and return of stolen assets and combat all forms of organized crime (SDG 16.4)": [
        "This project fights organized crime or money laundering.",
        "This project addresses illicit arms trafficking.",
        "This project prevents illegal financial flows."
    ],
    "Develop effective, accountable and transparent institutions at all levels (SDG 16.6)": [
        "This project supports institutional reforms.",
        "This project strengthens public administration and governance structures.",
        "This project promotes transparency and accountability in governance."
    ],
    "Broaden and strengthen the participation of developing countries in the institutions of global governance (SDG 16.8)": [
        "This project enhances the participation of developing countries in global governance.",
        "This project builds capacity for engagement in international institutions.",
        "This project supports inclusive representation in multilateral decision-making."
    ],
    "Eliminate all forms of violence against all women and girls in the public and private spheres, including trafficking and sexual and other types of exploitation (SDG 5.2)": [
        "This project helps eliminate violence against women and girls.",
        "This project addresses gender-based violence.",
        "This project supports female survivors of abuse."
    ]
}

# Klassifikationsdurchlauf mit Mean Aggregation
results_dict = {k: [] for k in sdg_prompts.keys()}
top_sdgs = []
top_scores = []

for i, text in tqdm(enumerate(df["TranslatedText"]), total=len(df), desc="🔍 Classifying"):
    sdg_scores = {}
    for sdg, prompts in sdg_prompts.items():
        scores = []
        for hyp in prompts:
            res = classifier(text, [hyp], multi_label=False)
            scores.append(res["scores"][0])
        sdg_scores[sdg] = np.mean(scores)  # <- mean aggregation
        results_dict[sdg].append(sdg_scores[sdg])

    # Top SDG
    top_sdg = max(sdg_scores, key=sdg_scores.get)
    top_sdgs.append(top_sdg)
    top_scores.append(sdg_scores[top_sdg])

# DataFrame zusammenführen
for k in results_dict:
    df[k] = results_dict[k]
df["Top_SDG"] = top_sdgs
df["Top_Score"] = top_scores

# Relevante SDGs (Score > 0.4)
def assign_sdg(row):
    return [sdg for sdg in sdg_prompts if row[sdg] > 0.4]
df["Assigned_SDGs"] = df.apply(assign_sdg, axis=1)

# Speichern
df.to_csv("/content/drive/MyDrive/Civilian Peacebuilding SDG-Mapped.csv", index=False)

In [None]:
!pip install -q transformers tqdm pandas

# Bibliotheken importieren
from transformers import pipeline
from datasets import Dataset
import pandas as pd
from tqdm.auto import tqdm
import torch
import numpy as np

classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# CSV laden
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv("/content/drive/MyDrive/Civilian Peacebuilding Translated.csv")
df = df[df['TranslatedText'].notna()].reset_index(drop=True)

# SINGLE-PROMPT-Hypothesen für jedes SDG
sdg_prompts = {
    "Significantly reduce all forms of violence and related death rates everywhere (SDG 16.1)": [
        "This project aims to significantly reduce all forms of violence and related death rates everywhere."
    ],
    "End abuse, exploitation, trafficking and all forms of violence against and torture of children (SDG 16.2)": [
        "This project aims to end abuse, exploitation, trafficking and all forms of violence against and torture of children."
    ],
    "Promote the rule of law at the national and international levels and ensure equal access to justice for all (SDG 16.3)": [
        "This project aims to promote the rule of law at the national and international levels and ensure equal access to justice for all."
    ],
    "By 2030, significantly reduce illicit financial and arms flows, strengthen the recovery and return of stolen assets and combat all forms of organized crime (SDG 16.4)": [
        "This project aims to significantly reduce illicit financial and arms flows, strengthen the recovery and return of stolen assets and combat all forms of organized crime."
    ],
    "Eliminate all forms of violence against all women and girls in the public and private spheres, including trafficking and sexual and other types of exploitation (SDG 5.2)": [
        "This project aims to eliminate all forms of violence against all women and girls in the public and private spheres, including trafficking and sexual and other types of exploitation."
    ],
    "Develop effective, accountable and transparent institutions at all levels (SDG 16.6)": [
        "This project aims to develop effective, accountable and transparent institutions at all levels."
    ],
    "Broaden and strengthen the participation of developing countries in the institutions of global governance (SDG 16.8)": [
        "This project aims to broaden and strengthen the participation of developing countries in the institutions of global governance."
    ]
}

# Klassifikationsdurchlauf mit Mean Aggregation
results_dict = {k: [] for k in sdg_prompts.keys()}
top_sdgs = []
top_scores = []

for i, text in tqdm(enumerate(df["TranslatedText"]), total=len(df), desc="🔍 Classifying"):
    sdg_scores = {}
    for sdg, prompts in sdg_prompts.items():
        scores = []
        for hyp in prompts:
            res = classifier(text, [hyp], multi_label=False)
            scores.append(res["scores"][0])
        sdg_scores[sdg] = np.mean(scores)  # <- mean aggregation
        results_dict[sdg].append(sdg_scores[sdg])

    # Top SDG
    top_sdg = max(sdg_scores, key=sdg_scores.get)
    top_sdgs.append(top_sdg)
    top_scores.append(sdg_scores[top_sdg])

# DataFrame zusammenführen
for k in results_dict:
    df[k] = results_dict[k]
df["Top_SDG"] = top_sdgs
df["Top_Score"] = top_scores

# Relevante SDGs (Score > 0.4)
def assign_sdg(row):
    return [sdg for sdg in sdg_prompts if row[sdg] > 0.4]

df["Assigned_SDGs"] = df.apply(assign_sdg, axis=1)

# Speichern
df.to_csv("/content/drive/MyDrive/Civilian Peacebuilding SDG-Mapped single prompt.csv", index=False)

In [None]:
# Install falls nicht geschehen
!pip install pandas geopandas matplotlib unidecode

# Imports
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from unidecode import unidecode
from matplotlib.colors import Normalize
from google.colab import drive

# Google Drive mounten
from google.colab import drive
drive.mount('/content/drive')

# Daten laden
df = pd.read_csv("/content/drive/MyDrive/Peace & Conflict SDG Mapped.csv", low_memory=False)

# Empfängerländer bereinigen
df["RecipientName_clean"] = df["RecipientName"].apply(lambda x: unidecode(str(x).strip().lower()))
df["USD_Disbursement"] = (
    df["USD_Disbursement"]
    .astype(str)
    .str.replace(",", "")
    .astype(float)
)

# Regionale/unspezifische Empfänger ausschließen
non_countries = [
    "africa, regional", "america, regional", "asia, regional", "europe, regional",
    "oceania, regional", "south america, regional", "south asia, regional",
    "western africa, regional", "eastern africa, regional", "southern africa, regional",
    "central asia, regional", "central america, regional", "middle east, regional",
    "middle africa, regional", "caribbean, regional", "caribbean & central america, regional",
    "far east asia, regional", "north of sahara, regional", "south of sahara, regional",
    "south & central asia, regional", "melanesia, regional", "bilateral, unspecified",
    "states ex-yugoslavia unspecified", "tokelau"
]
df = df[~df["RecipientName_clean"].isin(non_countries)].copy()

# Mapping abweichender Ländernamen
manual_map = {
    "china (people's republic of)": "china",
    "democratic people's republic of korea": "north korea",
    "democratic republic of the congo": "democratic republic of the congo",
    "cote d'ivoire": "ivory coast",
    "lao people's democratic republic": "laos",
    "syrian arab republic": "syria",
    "viet nam": "vietnam",
    "turkiye": "turkey",
    "micronesia": "micronesia (federated states of)",
    "west bank and gaza strip": "palestine",
    "congo": "republic of the congo",
    "north macedonia": "north macedonia",
    "cabo verde": "cape verde",
    "eswatini": "swaziland",
    "timor-leste": "east timor",
    "sao tome and principe": "são tomé and príncipe",
    "trinidad and tobago": "trinidad and tobago",
    "myanmar": "myanmar",
    "kosovo": "kosovo",
    "republic of moldova": "moldova",
    "bahamas, the": "bahamas",
    "gambia, the": "gambia",
    "venezuela (bolivarian republic of)": "venezuela",
    "iran (islamic republic of)": "iran",
    "russian federation": "russia",
    "bolivia (plurinational state of)": "bolivia",
    "tanzania, united republic of": "tanzania",
    "korea, republic of": "south korea",
    "türkiye": "turkey",
    "serbia": "republic of serbia"
}

# Weltkarte laden
world = gpd.read_file("https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson")
world["name"] = world["name"].str.strip().str.lower()

# Karten pro SDG-Label erzeugen
unique_sdg_labels = df["AssignedLabel"].dropna().unique()

for label in unique_sdg_labels:
    df_label = df[df["AssignedLabel"] == label].copy()
    usd_label_total = df_label["USD_Disbursement"].sum()

    # Mapping anwenden und gruppieren
    df_label["MatchName"] = df_label["RecipientName_clean"].replace(manual_map)
    label_data = df_label.groupby("MatchName", as_index=False)["USD_Disbursement"].sum()

    # Merge mit Weltkarte
    merged = world.merge(label_data, how="left", left_on="name", right_on="MatchName")

    # Plot
    fig, ax = plt.subplots(figsize=(28, 14))
    norm = Normalize(vmin=0, vmax=merged["USD_Disbursement"].max())

    merged.plot(
        column="USD_Disbursement",
        cmap="Reds",
        linewidth=0.5,
        edgecolor="white",
        ax=ax,
        legend=True,
        norm=norm,
        missing_kwds={
            "color": "white",        # Länder ohne Funding
            "edgecolor": "lightgray" # Dezente Umrandung
        },
        legend_kwds={
            "shrink": 0.6,
            "orientation": "vertical"
        }
    )

    # Schriftgröße nachträglich setzen
    colorbar = ax.get_figure().get_axes()[-1]
    colorbar.tick_params(labelsize=30)
    colorbar.set_ylabel("Disbursements (in USD millions)", fontsize=30)

    # Karte begrenzen (Arktis abschneiden)
    ax.set_ylim(-60, 85)
    ax.set_xlim(-150, 150)
    ax.axis("off")

    # Speichern
    plt.tight_layout()
    output_path = f"/content/drive/MyDrive/Map_{label}.pdf"
    plt.savefig(output_path, bbox_inches="tight")
    plt.close()

In [None]:
# Install falls nicht geschehen
!pip install pandas geopandas matplotlib unidecode

# Imports
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from unidecode import unidecode
from matplotlib.colors import Normalize
from google.colab import drive

# Google Drive mounten
from google.colab import drive
drive.mount('/content/drive')

# Daten laden
df = pd.read_csv("/content/drive/MyDrive/Peace & Conflict SDG Mapped.csv", low_memory=False)

# Empfängerländer bereinigen
df["RecipientName_clean"] = df["RecipientName"].apply(lambda x: unidecode(str(x).strip().lower()))
df["USD_Disbursement"] = (
    df["USD_Disbursement"]
    .astype(str)
    .str.replace(",", "")
    .astype(float)
)

# Regionale/unspezifische Empfänger ausschließen
non_countries = [
    "africa, regional", "america, regional", "asia, regional", "europe, regional",
    "oceania, regional", "south america, regional", "south asia, regional",
    "western africa, regional", "eastern africa, regional", "southern africa, regional",
    "central asia, regional", "central america, regional", "middle east, regional",
    "middle africa, regional", "caribbean, regional", "caribbean & central america, regional",
    "far east asia, regional", "north of sahara, regional", "south of sahara, regional",
    "south & central asia, regional", "melanesia, regional", "bilateral, unspecified",
    "states ex-yugoslavia unspecified", "tokelau"
]
df = df[~df["RecipientName_clean"].isin(non_countries)].copy()

# Mapping abweichender Ländernamen
manual_map = {
    "china (people's republic of)": "china",
    "democratic people's republic of korea": "north korea",
    "democratic republic of the congo": "democratic republic of the congo",
    "cote d'ivoire": "ivory coast",
    "lao people's democratic republic": "laos",
    "syrian arab republic": "syria",
    "viet nam": "vietnam",
    "turkiye": "turkey",
    "micronesia": "micronesia (federated states of)",
    "west bank and gaza strip": "palestine",
    "congo": "republic of the congo",
    "north macedonia": "north macedonia",
    "cabo verde": "cape verde",
    "eswatini": "swaziland",
    "timor-leste": "east timor",
    "sao tome and principe": "são tomé and príncipe",
    "trinidad and tobago": "trinidad and tobago",
    "myanmar": "myanmar",
    "kosovo": "kosovo",
    "republic of moldova": "moldova",
    "bahamas, the": "bahamas",
    "gambia, the": "gambia",
    "venezuela (bolivarian republic of)": "venezuela",
    "iran (islamic republic of)": "iran",
    "russian federation": "russia",
    "bolivia (plurinational state of)": "bolivia",
    "tanzania, united republic of": "tanzania",
    "korea, republic of": "south korea",
    "türkiye": "turkey",
    "serbia": "republic of serbia"
}

# Weltkarte laden
world = gpd.read_file("https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson")
world["name"] = world["name"].str.strip().str.lower()

# Mapping anwenden & aggregieren
df["MatchName"] = df["RecipientName_clean"].replace(manual_map)
total_data = df.groupby("MatchName", as_index=False)["USD_Disbursement"].sum()

# Merge mit Weltkarte
merged = world.merge(total_data, how="left", left_on="name", right_on="MatchName")

# Plot
fig, ax = plt.subplots(figsize=(28, 14))
norm = Normalize(vmin=0, vmax=merged["USD_Disbursement"].max())

merged.plot(
    column="USD_Disbursement",
    cmap="Reds",
    linewidth=0.5,
    edgecolor="white",
    ax=ax,
    legend=True,
    norm=norm,
    missing_kwds={
        "color": "white",        # Länder ohne Funding
        "edgecolor": "lightgray" # Dezente Umrandung
    },
    legend_kwds={
        "shrink": 0.6,
        "orientation": "horizontal"
    }
)

# Schriftgröße nachträglich setzen
colorbar = ax.get_figure().get_axes()[-1]
colorbar.tick_params(labelsize=30)
colorbar.set_xlabel("Disbursements (in USD millions)", fontsize=30)

# Karte begrenzen (Arktis & überflüssige Ränder entfernen)
ax.set_ylim(-60, 85)
ax.set_xlim(-150, 150)
ax.axis("off")

# Speichern
plt.tight_layout()
plt.savefig("/content/drive/MyDrive/Map_total.pdf", bbox_inches="tight")
plt.close()

In [None]:
# Install falls nicht geschehen
!pip install pandas matplotlib

# Imports
import pandas as pd
import matplotlib.pyplot as plt

# Google Drive mounten
from google.colab import drive
drive.mount('/content/drive')

# Daten laden
df = pd.read_csv("/content/drive/MyDrive/Peace & Conflict SDG Mapped.csv", low_memory=False)
df["USD_Disbursement"] = (
    df["USD_Disbursement"].astype(str).str.replace(",", "").astype(float)
)
df = df[df["AssignedLabel"].notna()].copy()
df["AssignedLabel"] = df["AssignedLabel"].astype(str)

# Aggregation
bar_data = df.groupby("AssignedLabel")["USD_Disbursement"].sum()
sdg_order = ["16.1", "16.3", "16.8", "16.6", "16.2", "5.2", "16.4"]
bar_data = bar_data.reindex(sdg_order)

# Farben: farbenblindensichere, kräftige Tol-Palette
color_palette = [
    "#332288",  # dunkelblau
    "#117733",  # grün
    "#88CCEE",  # hellblau
    "#DDCC77",  # sandgelb
    "#555555",  # dunkelgrau
    "#AA4499",  # violett
    "#44AA99",  # türkisgrün
]

# Plot
fig, ax = plt.subplots(figsize=(12, 6))
bars = ax.bar([f"SDG {k}" for k in bar_data.index], bar_data.values, color=color_palette)

# Achsen und Layout
ax.set_ylabel("Disbursements (in USD millions)", fontsize=18)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
ax.spines[['top', 'right']].set_visible(False)
ax.grid(axis='y', linestyle='--', alpha=0.5)
ax.set_axisbelow(True)

# Export
plt.tight_layout()
plt.savefig("/content/drive/MyDrive/SDG_disbursements.pdf")
plt.show()

In [None]:
# Installieren (falls nötig)
!pip install pandas matplotlib seaborn

# Google Drive mounten
from google.colab import drive
drive.mount('/content/drive')

# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Stil & Farben
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("colorblind")

# CSV laden
csv_path = "/content/drive/MyDrive/jahresausgaben_nach_laendern_und_purpose.csv"
df = pd.read_csv(csv_path)
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

# Beträge bereinigen
df["usd_disbursement_in_millions"] = (
    df["usd_disbursement_in_millions"]
    .astype(str)
    .str.replace(",", "")
    .astype(float)
)

# Mikro-Cluster entfernen
df = df[df["purpose_name"] != "Conflict, Peace & Security"]

# Gruppieren
df_grouped = (
    df.groupby(["year", "purpose_name"], as_index=False)["usd_disbursement_in_millions"]
    .sum()
)

# Plot-Vorbereitung
fig, ax = plt.subplots(figsize=(14, 8))
years_full = list(range(df_grouped["year"].min(), df_grouped["year"].max() + 1))
purpose_order = df_grouped.groupby("purpose_name")["year"].min().sort_values().index
colors = sns.color_palette("colorblind", n_colors=len(purpose_order))

# Linien zeichnen
for i, purpose in enumerate(purpose_order):
    sub = df_grouped[df_grouped["purpose_name"] == purpose].set_index("year")
    sub = sub.reindex(years_full, fill_value=0).reset_index()
    ax.plot(
        sub["year"],
        sub["usd_disbursement_in_millions"],
        label=purpose,
        linewidth=2.2,
        color=colors[i]
    )

# Achsenbeschriftung
ax.set_xlabel("Year", fontsize=18)
ax.set_ylabel("Disbursements (in USD millions)", fontsize=18)
ax.tick_params(axis='both', labelsize=16)

# Legende
legend = ax.legend(
    title="Purpose",
    title_fontsize=18,
    fontsize=16,
    loc="upper left",
    frameon=True,
    framealpha=0.95,
    facecolor="white",
    edgecolor="gray"
)

# Export
plt.tight_layout()
plt.savefig("/content/drive/MyDrive/USD_disbursements_(in_millions)_purpose_names_over_time.pdf", bbox_inches="tight")
plt.show()

In [None]:
# Installation (nicht löschen)
!pip install pandas matplotlib seaborn

# Drive mounten
from google.colab import drive
drive.mount('/content/drive')

# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Stil
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("colorblind")

# Daten einlesen
csv_path = "/content/drive/MyDrive/jahresausgaben_nach_laendern_und_purpose.csv"
df = pd.read_csv(csv_path)

# Spaltennamen vereinheitlichen
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

# Beträge bereinigen
df["usd_disbursement_in_millions"] = (
    df["usd_disbursement_in_millions"]
    .astype(str)
    .str.replace(",", "")
    .astype(float)
)

# Auswahl der Länder
countries = [
    "Afghanistan", "Iraq", "Colombia", "Ukraine", "Democratic Republic of the Congo", "Syrian Arab Republic", "Sudan", "Somalia"
]

df_filtered = df[df["recipient_name_(en)"].isin(countries)]

# Gruppieren
df_grouped = (
    df_filtered.groupby(["year", "recipient_name_(en)"], as_index=False)
    ["usd_disbursement_in_millions"].sum()
)

# Vorbereitung für Plot
fig, ax = plt.subplots(figsize=(14, 8))
years_full = list(range(1992, df_grouped["year"].max() + 1))
country_order = df_grouped.groupby("recipient_name_(en)")["usd_disbursement_in_millions"].sum().sort_values(ascending=False).index
colors = sns.color_palette("colorblind", n_colors=len(country_order))

# Plotten
for i, country in enumerate(country_order):
    sub = df_grouped[df_grouped["recipient_name_(en)"] == country].set_index("year")
    sub = sub.reindex(years_full, fill_value=0).reset_index()
    ax.plot(
        sub["year"],
        sub["usd_disbursement_in_millions"],
        label=country,
        linewidth=2.2,
        color=colors[i]
    )

# Achsenbeschriftung
ax.set_xlabel("Year", fontsize=18)
ax.set_ylabel("USD Disbursements in Millions", fontsize=18)
ax.tick_params(axis='both', labelsize=16)

# Legende oben links
ax.legend(
    title="Country",
    title_fontsize=18,
    fontsize=16,
    loc="upper left",
    frameon=True,
    facecolor="white",
    edgecolor="gray"
)

# Export
plt.tight_layout()
plt.savefig("/content/drive/MyDrive/USD_disbursements_(in millions)_top_countries_over_time.pdf", bbox_inches="tight")
plt.show()

In [None]:
# Installieren, falls nötig
!pip install pandas matplotlib seaborn

# Google Drive mounten
from google.colab import drive
drive.mount('/content/drive')

# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

# Stil
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("colorblind")

# Dateipfade
aid_path = "/content/drive/MyDrive/Peace & Conflict SDG Mapped 2023.csv"
conflict_path = "/content/drive/MyDrive/2024 - Results.csv"

# Aid-Daten laden und aggregieren
df_aid = pd.read_csv(aid_path)
df_aid["USD_Disbursement"] = (
    df_aid["USD_Disbursement"]
    .astype(str)
    .str.replace(",", "")
    .astype(float)
)
df_aid_grouped = (
    df_aid.groupby("RecipientName", as_index=False)["USD_Disbursement"]
    .sum()
    .rename(columns={"RecipientName": "Country", "USD_Disbursement": "Aid_2023"})
)

# Konfliktdaten laden
df_conflict = pd.read_csv(conflict_path)
conflict_cols = ["Country", "Total Score", "Deadliness Value", "Diffusion Value",
                 "Danger Value", "Fragmentation Value"]
df_conflict = df_conflict[conflict_cols].copy()
df_conflict = df_conflict.rename(columns=lambda x: x.replace(" ", "_"))

# Mergen
df_merged = pd.merge(df_aid_grouped, df_conflict, on="Country", how="inner")

# Scatterplots generieren
indicators = ["Total_Score", "Deadliness_Value", "Diffusion_Value", "Danger_Value", "Fragmentation_Value"]

for indicator in indicators:
    plt.figure(figsize=(6, 4))

    # Plot mit Regressionslinie
    sns.regplot(data=df_merged, x="Aid_2023", y=indicator, scatter_kws={"alpha": 0.4}, line_kws={"color": "red"})

    # Korrelation berechnen
    corr_value = df_merged["Aid_2023"].corr(df_merged[indicator])
    plt.text(0.05, 0.95, f"Correlation: {corr_value:.2f}", transform=plt.gca().transAxes,
             fontsize=12, verticalalignment="top", weight="bold")

    plt.xlabel("Disbursements (in USD millions) 2023")
    plt.ylabel(indicator.replace("_", " ").capitalize() + " 2024")
    plt.tight_layout()

    # Speichern (optional)
    filename = f"/content/drive/MyDrive/Scatter_aid2023_vs_{indicator.lower()}_2024.pdf"
    plt.savefig(filename)

    plt.show()

In [None]:
# Installieren, falls nötig
!pip install pandas matplotlib seaborn statsmodels

# Google Drive mounten
from google.colab import drive
drive.mount('/content/drive')

# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy.stats import pearsonr

# Stil
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("colorblind")

# Dateipfade
aid_path = "/content/drive/MyDrive/Peace & Conflict SDG Mapped 2023.csv"
conflict_path = "/content/drive/MyDrive/2024 - Results.csv"

# Aid-Daten laden
df_aid = pd.read_csv(aid_path)
df_aid["USD_Disbursement"] = (
    df_aid["USD_Disbursement"]
    .astype(str)
    .str.replace(",", "")
    .astype(float)
)

# Gruppieren nach Land und SDG
df_sdg_grouped = (
    df_aid.groupby(["RecipientName", "AssignedLabel"], as_index=False)["USD_Disbursement"]
    .sum()
    .rename(columns={"RecipientName": "Country", "USD_Disbursement": "Aid_2023", "AssignedLabel": "SDG"})
)

# Konfliktdaten laden
df_conflict = pd.read_csv(conflict_path)
df_conflict = df_conflict[["Country", "Total Score"]].copy()
df_conflict = df_conflict.rename(columns=lambda x: x.replace(" ", "_"))

# Mergen
df_merged = pd.merge(df_sdg_grouped, df_conflict, on="Country", how="inner")

# Scatterplots pro SDG
for target in sorted(df_merged["SDG"].dropna().unique()):
    sub = df_merged[df_merged["SDG"] == target].copy()

    # Nur weiter, wenn ausreichend Daten vorhanden
    if len(sub) < 5 or sub["Aid_2023"].sum() == 0:
        continue

    # Regression vorbereiten
    X = sub["Aid_2023"]
    y = sub["Total_Score"]
    X_const = sm.add_constant(X)
    model = sm.OLS(y, X_const).fit()
    pred_summary = model.get_prediction(X_const).summary_frame(alpha=0.05)

    # Plot erstellen
    plt.figure(figsize=(6, 4))
    plt.scatter(X, y, alpha=0.6)

    # Regressionslinie und Konfidenzband sortiert plotten
    sort_idx = X.argsort()
    x_sorted = X.iloc[sort_idx]
    mean = pred_summary["mean"].iloc[sort_idx]
    ci_low = pred_summary["mean_ci_lower"].iloc[sort_idx]
    ci_up = pred_summary["mean_ci_upper"].iloc[sort_idx]

    plt.plot(x_sorted, mean, color="red")
    plt.fill_between(x_sorted, ci_low, ci_up, color="red", alpha=0.2)

    # Statistische Annotationen
    corr, p = pearsonr(X, y)
    r2 = model.rsquared_adj
    plt.text(
        0.05, 0.95,
        f"Correlation: {corr:.2f}\n$p$-value: {p:.3f}\nAdj. $R^2$: {r2:.2f}",
        transform=plt.gca().transAxes,
        fontsize=11, verticalalignment="top", weight="bold"
    )

    # Achsentitel und Layout
    plt.xlabel("Disbursements (in USD millions) 2023")
    plt.ylabel("Total conflict score 2024")
    plt.tight_layout()

    # Speichern
    safe_target = str(target).replace(".", "_")
    out_path = f"/content/drive/MyDrive/Scatter_aid2023_vs_total_score_2024_SDG_{safe_target}.pdf"
    plt.savefig(out_path)
    plt.show()