In [None]:
# compare_countries.ipynb

# =============================
# 1. Import Libraries
# =============================
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats


# Set seaborn style
sns.set(style="whitegrid")
# =============================
# 2. Load Datasets
# =============================
benin = pd.read_csv("../data/benin-malanville.csv")
sierra_leone = pd.read_csv("../data/sierraleone-bumbuna.csv")
togo = pd.read_csv("../data/togo-dapaong_qc.csv")
# Add 'Country' column
benin['Country'] = 'Benin'
sierra_leone['Country'] = 'Sierra Leone'
togo['Country'] = 'Togo'

# Combine datasets
data = pd.concat([benin, sierra_leone, togo], ignore_index=True)

# Quick check
display(data.head())
# 3. Metric Comparison: Boxplots
# =============================
metrics = ['GHI', 'DNI', 'DHI']

for metric in metrics:
    plt.figure(figsize=(8,6))
    sns.boxplot(x='Country', y=metric, data=data, palette="Set2")
    plt.title(f'{metric} Comparison Across Countries', fontsize=14)
    plt.ylabel(f'{metric} (W/m²)')
    plt.xlabel('Country')
    plt.show()
# =============================
# 4. Summary Table (mean, median, std)
# =============================
summary = data.groupby('Country')[['GHI', 'DNI', 'DHI']].agg(['mean', 'median', 'std'])
display(summary)
# =============================
# 5. Statistical Testing (ANOVA for GHI)
# =============================
ghi_benin = benin['GHI']
ghi_sierra = sierra_leone['GHI']
ghi_togo = togo['GHI']

f_stat, p_value = stats.f_oneway(ghi_benin, ghi_sierra, ghi_togo)
print(f"ANOVA F-statistic: {f_stat:.2f}, p-value: {p_value:.4f}")

# =============================
# =============================
# 6. Key Observations (Markdown)
# =============================
# In a Markdown cell, you can write something like:
"""
### Key Observations
- **Benin** shows the highest median GHI but also has greater variability compared to Sierra Leone and Togo.
- **Sierra Leone** has moderate GHI values but DNI and DHI are slightly lower on average.
- **Togo** has the lowest variability, suggesting more stable solar conditions, though its GHI is slightly lower than Benin.
"""

# =============================
# 7. Bonus: Bar Chart Ranking Countries by Average GHI
# =============================
avg_ghi = data.groupby('Country')['GHI'].mean().sort_values(ascending=False)

plt.figure(figsize=(6,4))
sns.barplot(x=avg_ghi.index, y=avg_ghi.values, palette="Set2")
plt.ylabel('Average GHI (W/m²)')
plt.title('Countries Ranked by Average GHI', fontsize=14)
plt.show()