# SoGA Dataset Tool
* Zhean Ganituen
* Created On: January 29, 2024
* Last Updated: January 30, 2024

> Dependencies: python (strictly: pandas; optional: jupyter notebook)

In [2]:
# Tool for Viewing the data set using Pandas

import pandas as pd
import re

countryList = [
    "Global",
    "China",
    "North_Korea",
    "Taiwan",
    "Cambodia",
    "Indonesia",
    "Laos",
    "Malaysia",
    "Maldives",
    "Myanmar",
    "Philippines",
    "Sri_Lanka",
    "Thailand",
    "Timor_Leste",
    "Vietnam",
    "Fiji",
    "Kiribati",
    "Marshall_Islands",
    "Micronesia",
    "Papua_New_Guinea",
    "Samoa",
    "Solomon_Islands",
    "Tonga",
    "Vanuatu",
    "Armenia",
    "Azerbaijan",
    "Georgia",
    "Kazakhstan",
    "Kyrgyzstan",
    "Mongolia",
    "Tajikistan",
    "Turkmenistan",
    "Uzbekistan",
    "Albania",
    "Bosnia_and_Herzegovina",
    "Bulgaria",
    "Croatia",
    "Czechia",
    "Hungary",
    "North_Macedonia",
    "Montenegro",
    "Poland",
    "Romania",
    "Serbia",
    "Slovakia",
    "Slovenia",
    "Belarus",
    "Estonia",
    "Latvia",
    "Lithuania",
    "Moldova",
    "Russia",
    "Ukraine",
    "Brunei",
    "Japan",
    "South_Korea",
    "Singapore",
    "Australia",
    "New_Zealand",
    "Andorra",
    "Austria",
    "Belgium",
    "Cyprus",
    "Denmark",
    "Finland",
    "France",
    "Germany",
    "Greece",
    "Iceland",
    "Ireland",
    "Israel",
    "Italy",
    "Luxembourg",
    "Malta",
    "Netherlands",
    "Norway",
    "Portugal",
    "Spain",
    "Sweden",
    "Switzerland",
    "UK",
    "Argentina",
    "Chile",
    "Uruguay",
    "Canada",
    "USA",
    "Antigua_and_Barbuda",
    "Bahamas",
    "Barbados",
    "Belize",
    "Cuba",
    "Dominica",
    "Dominican_Republic",
    "Grenada",
    "Guyana",
    "Haiti",
    "Jamaica",
    "Saint_Lucia",
    "Saint_Vincent_and_the_Grenadines",
    "Suriname",
    "Trinidad_and_Tobago",
    "Bolivia",
    "Ecuador",
    "Peru",
    "Colombia",
    "Costa_Rica",
    "El_Salvador",
    "Guatemala",
    "Honduras",
    "Mexico",
    "Nicaragua",
    "Panama",
    "Venezuela",
    "Brazil",
    "Paraguay",
    "Algeria",
    "Bahrain",
    "Egypt",
    "Iran",
    "Iraq",
    "Jordan",
    "Kuwait",
    "Lebanon",
    "Libya",
    "Morocco",
    "Palestine",
    "Oman",
    "Qatar",
    "Saudi_Arabia",
    "Syria",
    "Tunisia",
    "Turkey",
    "UAE",
    "Yemen",
    "Afghanistan",
    "Bangladesh",
    "Bhutan",
    "India",
    "Nepal",
    "Pakistan",
    "Angola",
    "Central_African_Republic",
    "Congo",
    "Congo_Kinshasa",
    "Equatorial_Guinea",
    "Gabon",
    "Burundi",
    "Comoros",
    "Djibouti",
    "Eritrea",
    "Ethiopia",
    "Kenya",
    "Madagascar",
    "Malawi",
    "Mauritius",
    "Mozambique",
    "Rwanda",
    "Seychelles",
    "Somalia",
    "Tanzania",
    "Uganda",
    "Zambia",
    "Botswana",
    "Lesotho",
    "Namibia",
    "South_Africa",
    "Eswatini",
    "Zimbabwe",
    "Benin",
    "Burkina_Faso",
    "Cameroon",
    "Cabo_Verde",
    "Chad",
    "Gambia",
    "Ghana",
    "Guinea",
    "Guinea_Bissau",
    "Liberia",
    "Mali",
    "Mauritania",
    "Niger",
    "Nigeria",
    "Sao_Tome_and_Principe",
    "Senegal",
    "Sierra_Leone",
    "Togo",
    "American_Samoa",
    "Bermuda",
    "Cook_Islands",
    "Greenland",
    "Guam",
    "Monaco",
    "Nauru",
    "Niue",
    "Northern_Mariana_Islands",
    "Palau",
    "Puerto_Rico",
    "Saint_Kitts_and_Nevis",
    "San_Marino",
    "Tokelau",
    "Tuvalu",
    "South_Sudan",
    "Sudan",
]

categories = {
    1: "Baseline_Life_Expectancy",
    2: "Air_Pollution",
    3: "Ambient_PM",
    4: "Ozone",
    5: "Household_AP",
    6: "Environmental_Occupational_Hazard",
    7: "Occupational_Hazard",
    8: "Unsafe_Hand_Washing",
    9: "Metabolic_Syndrome",
    10: "Deitary",
    11: "High_Fasting_Plasma_Glucose/Sugar",
    12: "Tobacco",
    13: "Smoking",
    14: "Secondhand_Smoke",
    15: "Unsafe_Sex",
}

# read the data frame
try:
    df = pd.read_csv("out.csv", index_col=0)
except FileNotFoundError:
    print("SoGa Tool Error: use `convert()` command")

"""
convert
    For converting the text file from AnimoSpace to a workable .csv file.

    @param:
    _ None
"""
def convert():
    with open("SoGA_AnnotatedData.txt", "r") as data, open("out.csv", "w") as output:
        for line in data:
            outputLine = re.sub(r"[ \t]+", ",", line.strip())
            output.write(outputLine + "\n")

        print("SoGA Tool: Conversion done.")


"""
viewCat
    For viewing the data of all the countries for a category.
    Then, returns it.

    @param:
    _ catNum (int): category number from the categories dictionary.
"""
def viewCat(catNum):
    return df[categories[catNum]]


"""
country
    For viewing all the data for a specific country.

    @param:
    _ countryName (string): name of the country.

"""
def country(countryName):
    return df.loc[countryName]


"""
array
    Converting the .csv to a C array.

    @param:
    _ countryName (str): name of country.
"""
def array(countryName):
    output = ""
    name = country(countryName)
    for i in range(0, 14, 1):
        output += str(name[i]) + ","
    return output


if __name__ == "__main__":
    # code here

    for name in countryList:
        with open("out.c", "a") as file: file.write(array(name) + "\n")


    print("SoGA Tool: Done.")

  output += str(name[i]) + ","


SoGA Tool: Done.
