# World Bank Data

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
def transform(data: pd.DataFrame) -> pd.DataFrame:
    """
    Transposes csv data so that years are rows and
    indicators are columns
    """

    # isolate unique countries
    countries = data["Country Name"].unique()
    tables = []

    for country in countries:
        subset = data.loc[data["Country Name"] == country]
        country_name = subset["Country Name"].iloc[0]
        country_code = subset["Country Code"].iloc[0]

        subset = subset.drop(columns=["Country Code", 
                                      "Country Name",
                                      "Indicator Code"])
        
        subset = subset.set_index("Indicator Name")
        subset = subset.T
        subset["Year"] = subset.index
        subset["Country Name"] = country_name
        subset["Country Code"] = country_code
        subset = subset.reset_index(drop=True)
        subset.columns.name = None
        # include only rows where at least one native data point exists
        subset = subset.dropna(thresh=4)
        # drop all columns where 80% of the data is missing
        subset = subset.dropna(thresh=int(subset.shape[0] * .2), axis=1)

        tables.append(subset)

    output = pd.concat(tables)
    return output