# Tableau Processing
This notebook is to preprocess data into a format to feed into Tableau.

In [1]:
import numpy as np
import pandas as pd
import os
import datetime
import EDA_utils

In [2]:
data = EDA_utils.remove_aggregates(pd.read_csv("wb_data.csv"))

To generate features explored in the EDA_2 notebook.

In [3]:
data["GHGT_Capita"] = data["EN.ATM.GHGT.KT.CE"] / data["SP.POP.TOTL"]
data["GHGT_GDP"] = data["EN.ATM.GHGT.KT.CE"] / data["NY.GDP.MKTP.CD"]

To get latest data and its associated year for each column.

Initially a single DataFramea and CSV file is generated with all the missing values in separate columns. However the following problems arose:
- The presence of null values means the columns cannot be saved as integers
- Multiple measurement years needing unique column names

Hence modifications are made to save as separate CSV files.

In [4]:
# data_copy = pd.DataFrame(index=data.country.unique())

In [5]:
indicators_dict = EDA_utils.indicators_dict
indicators_dict["GHGT_Capita"] = "Greenhouse gases in CO2 per capita"
indicators_dict["GHGT_GDP"] = "Greenhouse gases in CO2 per GDP"

In [8]:
for col, desc in indicators_dict.items():
    data_segment = EDA_utils.get_latest_data(data, col, drop_year=False)
    data_segment.index.names = ["Country"]
    data_segment = data_segment.rename(columns={
        col: desc,
        "year": "Measurement Year"
    })
    print(data_segment.head())
    print(data_segment.dtypes)
    print(data_segment.isna().sum())
    data_segment.to_csv(f"wb_tableau/{col}.csv")

             Measurement Year  Total greenhouse gases in CO2
Country                                                     
Afghanistan              2018                        98920.0
Albania                  2018                        10080.0
Algeria                  2018                       218910.0
Andorra                  2018                          590.0
Angola                   2018                        79730.0
Measurement Year                   int64
Total greenhouse gases in CO2    float64
dtype: object
Measurement Year                 0
Total greenhouse gases in CO2    0
dtype: int64
                Measurement Year  Renewable energy in %
Country                                                
Afghanistan                 2015              86.050111
Albania                     2015             100.000000
Algeria                     2015               0.322684
American Samoa              2015               0.885478
Andorra                     2015              86.116700
Me