In [None]:
# Load cleaned data
benin = pd.read_csv("../data/processed/benin_clean.csv")
sierra = pd.read_csv("../data/processed/sierra_leone_clean.csv")
togo = pd.read_csv("../data/processed/togo_clean.csv")

# Add country labels
benin['Country'] = 'Benin'
sierra['Country'] = 'Sierra Leone'
togo['Country'] = 'Togo'

# Combine datasets
combined = pd.concat([benin, sierra, togo], ignore_index=True)

# 1. Comparative boxplots
plt.figure(figsize=(12, 6))
sns.boxplot(data=combined, x='Country', y='GHI')
plt.title('GHI Distribution by Country')
plt.ylabel('GHI (W/m²)')
plt.tight_layout()
plt.savefig('../figures/cross_country_ghi_boxplot.png')
plt.show()

# 2. Summary table
summary_table = combined.groupby('Country')[['GHI', 'DNI', 'DHI', 'Tamb']].agg(['mean', 'std'])
print(summary_table)

# 3. ANOVA test (optional)
from scipy.stats import f_oneway
f_stat, p_value = f_oneway(
    benin['GHI'],
    sierra['GHI'],
    togo['GHI']
)
print(f"\nANOVA Results for GHI:\nF-statistic: {f_stat:.2f}, p-value: {p_value:.4f}")