In [33]:
from gdelt import gdelt
import pandas as pd
pd.set_option('display.max_columns', None)
from datetime import datetime, timedelta

In [86]:
# Initialize GDELT object
g = gdelt(version=2)

# Set the year you want to query
year = 2016

# Create a list to store weekly DataFrames
weekly_data = []

# Start from January 1st of the given year
start_date = datetime(year, 1, 1)

# Loop through the entire year, week by week
while start_date.year == year:
    # Define the end of the week
    end_date = start_date + timedelta(days=6)

    # Create date strings in 'YYYY-MM-DD' format
    current_date = start_date

    # Temporary list to hold daily results for the week
    week_results = []

    # Loop through each day of the current week
    while current_date <= end_date and current_date.year == year:
        date_str = current_date.strftime("%Y-%m-%d")
        try:
            # Query GDELT for the specific day
            results = g.Search(date_str, table="events")

            # Convert to DataFrame and append to the week_results list
            df = pd.DataFrame(results)
            if not df.empty:
                week_results.append(df)

        except Exception as e:
            print(f"Error on {date_str}: {e}")

        # Move to the next day
        current_date += timedelta(days=1)

    # Combine daily DataFrames into a single weekly DataFrame
    if week_results:
        week_df = pd.concat(week_results, ignore_index=True)

        # Apply Filtering and Grouping:
        # 1️⃣ Drop rows with NaN in essential columns
        week_df = week_df.dropna(subset=['Actor1CountryCode', 'Actor2CountryCode', 'GoldsteinScale'])

        # 2️⃣ Group by Actor1CountryCode and Actor2CountryCode and sum GoldsteinScale
        df_grouped = week_df.groupby(['Actor1CountryCode', 'Actor2CountryCode']).agg(
            Total_Goldstein=('GoldsteinScale', 'sum'),
            Num_Events=('GoldsteinScale', 'count')
        ).reset_index()

        # 3️⃣ Remove self-loops where Actor1CountryCode == Actor2CountryCode
        df_grouped = df_grouped[df_grouped['Actor1CountryCode'] != df_grouped['Actor2CountryCode']]

        # 4️⃣ Add a column to identify the week
        df_grouped['Week_Start'] = start_date.strftime("%Y-%m-%d")

        # Append the processed weekly DataFrame to the list
        weekly_data.append(df_grouped)

    # Move to the next week
    start_date = end_date + timedelta(days=1)

# Combine all weekly DataFrames into one final DataFrame
final_df = pd.concat(weekly_data, ignore_index=True)

# Display the first few rows
print(final_df.head())

# Optional: Save to CSV
final_df.to_csv(f"GDELT_Weekly_{year}.csv", index=False)



Error on 2016-01-24: This GDELT query returned no data. Check query parameters and retry




Error on 2016-04-08: This GDELT query returned no data. Check query parameters and retry




Error on 2016-11-16: This GDELT query returned no data. Check query parameters and retry
  Actor1CountryCode Actor2CountryCode  Total_Goldstein  Num_Events  Week_Start
0               AFG               PAK             -5.0           3  2016-01-01
1               AFG               USA            -41.1          13  2016-01-01
2               AFR               AUS              0.0           1  2016-01-01
3               AFR               EUR              3.0           1  2016-01-01
4               AFR               KEN              2.8           1  2016-01-01


In [88]:
final_df

Unnamed: 0,Actor1CountryCode,Actor2CountryCode,Total_Goldstein,Num_Events,Week_Start
0,AFG,PAK,-5.0,3,2016-01-01
1,AFG,USA,-41.1,13,2016-01-01
2,AFR,AUS,0.0,1,2016-01-01
3,AFR,EUR,3.0,1,2016-01-01
4,AFR,KEN,2.8,1,2016-01-01
...,...,...,...,...,...
30529,WST,LTU,-1.6,4,2016-12-30
30530,WST,LVA,-1.6,4,2016-12-30
30531,WST,UKR,-0.8,2,2016-12-30
30532,WST,USA,-4.0,1,2016-12-30
