In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import os

plt.rcParams.update({'figure.dpi': 120})
sns.set(style='whitegrid')
os.makedirs('outputs', exist_ok=True)


In [None]:
files = {
    'Benin': '../data/benin_clean.csv',
    'SierraLeone': '../data/sierra_leone_clean.csv',
    'Togo': '../data/togo_clean.csv'
}

dfs = {}
for country, path in files.items():
    dfs[country] = pd.read_csv(path)
    print(country, 'loaded, shape=', dfs[country].shape)


In [None]:
for c, df in dfs.items():
    print(f"--- {c} ---")
    print(df[['GHI','DNI','DHI']].describe().loc[['count','mean','50%','std']])
    print()

In [None]:
combined = []
for country, df in dfs.items():
    tmp = df[['GHI','DNI','DHI']].copy()
    tmp['country'] = country
    combined.append(tmp)
combined = pd.concat(combined, ignore_index=True)
combined.head()


In [None]:
metrics = ['GHI','DNI','DHI']
summary = combined.groupby('country')[metrics].agg(['mean','median','std'])
summary.columns = ['_'.join(col).strip() for col in summary.columns.values]
summary = summary.reset_index()
summary.to_csv('outputs/summary_table.csv', index=False)
summary

In [None]:
for metric in metrics:
    plt.figure(figsize=(7,5))
    sns.boxplot(data=combined, x='country', y=metric)
    plt.title(f'{metric} distribution by country')
    plt.ylabel(metric)
    plt.xlabel('')
    plt.tight_layout()
    fname = f'outputs/boxplot_{metric}.png'
    plt.savefig(fname)
    print('Saved', fname)
    plt.show()

In [None]:
ghi_rank = summary[['country','GHI_mean']].sort_values('GHI_mean', ascending=False)
plt.figure(figsize=(6,4))
sns.barplot(data=ghi_rank, x='GHI_mean', y='country')
plt.xlabel('Average GHI')
plt.title('Countries ranked by average GHI')
plt.tight_layout()
plt.savefig('outputs/ghi_ranking.png')
plt.show()

In [None]:
groups = [combined.loc[combined['country']==c, 'GHI'].dropna() for c in combined['country'].unique()]
levene_stat, levene_p = stats.levene(*groups)
print('Levene p-value (equal variances test):', levene_p)
for i, c in enumerate(combined['country'].unique()):
    sample = groups[i].sample(n=min(500, len(groups[i])), random_state=0) 
    sh_stat, sh_p = stats.shapiro(sample)
    print(f'{c} Shapiro p-value (sampled):', sh_p)
if len(groups) >= 2:
    try:
        anova_stat, anova_p = stats.f_oneway(*groups)
        print('ANOVA p-value:', anova_p)
    except Exception as e:
        print('ANOVA failed:', e)
        anova_p = None
    kruskal_stat, kruskal_p = stats.kruskal(*groups)
    print('Kruskal-Wallis p-value:', kruskal_p)


In [None]:
with open('outputs/stats_summary.txt','w') as f:
    f.write('Levene p-value: {}\n'.format(levene_p))
    f.write('ANOVA p-value: {}\n'.format(anova_p))
    f.write('Kruskal p-value: {}\n'.format(kruskal_p))
print('Saved text summary to outputs/stats_summary.txt')
