In [3]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px



from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

import tensorflow as tf
from tensorflow.keras import models

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import matthews_corrcoef

from tensorflow.keras.layers import Dense, Embedding, LSTM, Conv1D, MaxPooling1D, Flatten,Input


# DATA UNDERSTANDING

In [4]:
df = pd.read_csv('us_foreign_aid.csv')
df.head(10)

FileNotFoundError: [Errno 2] No such file or directory: 'us_foreign_aid.csv'

In [None]:
df.tail(5)

In [None]:
df.columns

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df['activity_end_date'].sort_values(ascending=False)



# DATA CLEANING

In [None]:
df.duplicated().sum()

In [None]:
df.drop_duplicates(inplace=True)


In [None]:
df.shape

In [None]:
df.isnull().sum().sum()

In [None]:
df.isnull().mean()*100

In [None]:
df.dropna(inplace = True)

In [None]:
df.shape

In [None]:
#df.rename(columns={'region_name': 'region'}, inplace=True)

In [None]:
df.head(5)

## EDA VISUALIZATION

In [None]:
# Convert 'fiscal_year' to datetime if necessary
df['fiscal_year'] = pd.to_datetime(df['fiscal_year'], format='%Y')

# Aggregate funding by fiscal year
funding_over_time = df.groupby('fiscal_year')['current_amount'].sum().reset_index()

# Plot funding over time
plt.figure(figsize=(10, 6))
sns.lineplot(data=funding_over_time, x='fiscal_year', y='current_amount', marker='o')
plt.title('USAID Funding Over Time')
plt.xlabel('fiscal_year')
plt.ylabel('Total Funding (USD)')
plt.grid(True)
plt.show()

In [None]:
# Aggregate funding by region
funding_by_region = df.groupby('region_name')['current_amount'].sum().reset_index()

# Create a choropleth map
fig = px.choropleth(funding_by_region,
                    locations='region_name',
                    locationmode='country names',
                    color='current_amount',
                    hover_name='region_name',
                    title='USAID Funding by Region',
                    color_continuous_scale='Blues')
fig.show()

In [None]:
# Aggregate funding by sector
funding_by_sector = df.groupby('dac_sector_name')['current_amount'].sum().reset_index()

# Sort by funding amount
funding_by_sector = funding_by_sector.sort_values(by='current_amount', ascending=False)

# Plot funding by sector
plt.figure(figsize=(12, 6))
sns.barplot(data=funding_by_sector, x='current_amount', y='dac_sector_name', palette='viridis')
plt.title('USAID Funding by Sector')
plt.xlabel('Total Funding (USD)')
plt.ylabel('Sector')
plt.show()

In [None]:
# Aggregate funding by country
funding_by_country = df.groupby('country_name')['current_amount'].sum().reset_index()

# Sort and select top 10 countries
top_countries = funding_by_country.sort_values(by='current_amount', ascending=False).head(10)

# Plot top recipient countries
plt.figure(figsize=(10, 6))
sns.barplot(data=top_countries, x='current_amount', y='country_name', palette='magma')
plt.title('Top 10 Recipient Countries of USAID Funding')
plt.xlabel('Total Funding (USD)')
plt.ylabel('Country')
plt.show()



In [None]:
# Aggregate funding by income group
funding_by_income = df.groupby('income_group_name')['current_amount'].sum().reset_index()

# Plot funding by income group
plt.figure(figsize=(8, 8))
plt.pie(funding_by_income['current_amount'], labels=funding_by_income['income_group_name'], autopct='%1.1f%%', startangle=140)
plt.title('USAID Funding by Income Group')
plt.show()

In [None]:
# funding by region data 
funding_by_region = pd.DataFrame({
    'region_name': ['Africa', 'Asia', 'Europe', 'South America', 'North America'],
    'current_amount': [100000000, 80000000, 60000000, 40000000, 20000000]  # Example funding amounts
})

# sentiment data by region 
sentiment_data = {
    'region_name': ['Africa', 'Asia', 'Europe', 'South America', 'North America'],
    'average_sentiment': [0.5, -0.2, 0.3, -0.1, 0.4]  # Example sentiment scores
}
sentiment_df = pd.DataFrame(sentiment_data)

# Merge sentiment data with funding data
merged_df = pd.merge(funding_by_region, sentiment_df, on='region_name')

# Plot correlation
plt.figure(figsize=(8, 6))
sns.scatterplot(data=merged_df, x='current_amount', y='average_sentiment', hue='region_name', s=100)
plt.title('Correlation Between USAID Funding and Public Sentiment by Region')
plt.xlabel('Total Funding (USD)')
plt.ylabel('Average Sentiment Score')
plt.grid(True)
plt.show()

In [None]:
# Example funding by sector data 
funding_by_sector = pd.DataFrame({
    'dac_sector_name': ['Health', 'Education', 'Agriculture', 'Infrastructure'],
    'current_amount': [50000000, 30000000, 20000000, 10000000]  # Example funding amounts
})

# Example sentiment data by sector 
sector_sentiment_data = {
    'dac_sector_name': ['Health', 'Education', 'Agriculture', 'Infrastructure'],
    'average_sentiment': [0.6, 0.4, -0.1, 0.2]  # Example sentiment scores
}
sector_sentiment_df = pd.DataFrame(sector_sentiment_data)

# Merge sentiment data with funding data
sector_merged_df = pd.merge(funding_by_sector, sector_sentiment_df, on='dac_sector_name')

# Plot sector funding vs. sentiment
plt.figure(figsize=(8, 4))
sns.barplot(data=sector_merged_df, x='dac_sector_name', y='current_amount', hue='average_sentiment', palette='coolwarm')
plt.title('USAID Sector Funding ')
plt.xlabel('Sector')
plt.ylabel('Total Funding (USD)')
plt.xticks(rotation=45)
plt.legend(title='Average Sentiment')
plt.show()