# Togo EDA
Quick profiling, cleaning, and export of cleaned CSV (local only).

In [None]:
import sys, os, pandas as pd, numpy as np
if 'src' not in sys.path: sys.path.append('src')
from ingest import load_all
import preprocess
df_all = load_all('data')
df = df_all[df_all['country']=='togo'].reset_index(drop=True)
df = preprocess.quick_preprocess(df)
df.head()

In [None]:
display(df.describe(include='all'))
df.isna().mean().sort_values(ascending=False).head(20)

In [None]:
cols = ['GHI','DNI','DHI','ModA','ModB','WS','WSgust']
def zf(d, cs, z=3.0):
    out = d.copy()
    for c in cs:
        if c in out:
            mu, sd = out[c].mean(), out[c].std(ddof=0)
            if pd.notnull(sd) and sd>0:
                out = out[(out[c]-mu).abs() <= z*sd]
    return out.reset_index(drop=True)
df_clean = zf(df, cols)
len(df), len(df_clean)

In [None]:
import plotly.express as px
metric = 'GHI'
fig1 = px.line(df_clean.sort_values('Timestamp').head(5000), x='Timestamp', y=metric)
fig2 = px.histogram(df_clean, x=metric, nbins=40)
fig1.show(); fig2.show()

In [None]:
if 'Cleaning' in df_clean:
    display(df_clean.groupby('Cleaning')[['ModA','ModB']].mean())
import seaborn as sns, matplotlib.pyplot as plt
sel = [c for c in ['GHI','DNI','DHI','TModA','TModB','Tamb','RH','WS','WD'] if c in df_clean]
plt.figure(figsize=(8,6)); sns.heatmap(df_clean[sel].corr(), annot=False, cmap='viridis'); plt.show()

In [None]:
out_path = os.path.join('data','togo_clean.csv')
df_clean.to_csv(out_path, index=False)
out_path

## Notes & Observations (Togo)
- Second-highest average GHI (≈223.38). DHI slightly highest (≈112.63).
- Strong overall potential; ranks close to Benin.
- Time series and distributions show typical daily solar signatures.
- Cleaning appears beneficial; monitor sensor/module performance (ModA/ModB).
- Proceed with site selection and comparison with Benin for ROI estimation.

<!-- Results-focused notebook: observations are stated without workflow notes. -->