In [162]:
import pandas as pd
import numpy as np
from pandas import DataFrame
from pandas import Series
from copy import deepcopy
from datetime import datetime
import json

defaultMissingValue = 999999


def loadLocalJsonDoc(filepath, dataProp=''):
    """
    return deserialised json in dictionary

    Parameters
    ----------
    filepath: file location or buffer.
    dataProp: (optional) specified property to access required data
    """
    output = {}
    with open(file=filepath, mode='r', encoding='utf-8') as f:
        json_load = json.load(f)
        if (dataProp):
            output = json_load[dataProp]
        else:
            output = json_load
    return output


# Import dataset
services = pd.read_csv(
    'Caracterización_de_punto_de_servicio_-_Aurora_Office_-_Ronda_2_-_all_versions_-_False_-_2023-11-28-13-59-47.csv', sep=';', index_col=False) 

# Replace "other in organizations"
services.loc[services['organizacionimplementadora'] == "otra",
             'organizacionimplementadora'] = services['cualotraorganizacionimp']
services.loc[services['organizacionprincipal'] == "otra",
             'organizacionprincipal'] = services['cualotraorganizacionprin']

# Fill  missing values of variables of attended separated children
common = services['nnanoacompanados'].isnull()
common1 = services['nnaseparados'].isnull()
condition = [(services['numeronna'] == 0) & (common)]
condition1 = [(services['numeronna'] == 0) & (common)]
fill_with = ['no']

services['nnanoacompanados'] = np.select(
    condition, fill_with, default=services['nnanoacompanados'])
services['nnaseparados'] = np.select(
    condition1, fill_with, default=services['nnaseparados'])

# Fill  missing values
services = services.fillna(defaultMissingValue)



In [163]:
# keep the observations that were agreed with the informed consent 
services=services[services['consentimiento'] != 'no'] 

In [165]:
#Rename the principal organizations
principal_dict = {
    "hias": "Hias",
    "worldvision": "World Vision",
    "cruzroja": "Cruz Roja",
    "serviciojesuitaparamigrantes": "Servicio Jesuita a Migrantes",
    "samaritanspurse": "Samaritans Purse",
    "ayudaenaccion": "Ayuda en Acción",
    "unicef": "Unicef",
    "pastoralsocial" : "Pastoral Social",
    "acnur" : "ACNUR",
     "oim" : "OIM",
    "Sub secretaria de la niñez" : "Subsecretaría de la Niñez",
    "consejonoruegopararefugiadosnrc" : "NRC",
    "globalbrigades" : "Global Brigades",
    "A titulo personal pero bajo figura de asociación de desarrollo" : "Asociación Local de Desarrollo",
    "medicossinfronteras" : "Médicos sin Fronteras",
    "Delegación presidencial - encargada social Yenderi Jaldin y de logística Franco Villalobos" : "Delegación Presidencial",
    "Simn" : "SIMN",
    "Delegación presidencial regional de Tarapaca (DPR)" : "Delegación Presidencial Regional de Tarapacá"
}



services['organizacionprincipal'] = services['organizacionprincipal'].replace(principal_dict)

In [166]:
#Rename the implementators 
implemen_dict = {
    "hias": "Hias",
    "worldvision": "World Vision",
    "cruzroja": "Cruz Roja",
    "serviciojesuitaparamigrantes": "Servicio Jesuita a Migrantes",
    "samaritanspurse": "Samaritans Purse",
    "ayudaenaccion": "Ayuda en Acción",
    "unicef": "Unicef",
    "pastoralsocial" : "Pastoral Social",
    "Hogar de cristo" : "Hogar de Cristo",
    "acnur" : "ACNUR",
    "Fundación grandes valores futbol mas" : "Fundación Futbol Más",
    "retinternacional" : "RET Internacional",
    "consejonoruegopararefugiadosnrc" : "NRC",
    "globalbrigades" : "Global Brigades",
    "A titulo personal pero bajo figura de asociación de desarrollo" : "Asociación Local de Desarrollo"
}

services['organizacionimplementadora'] = services['organizacionimplementadora'].replace(implemen_dict)

In [167]:
#Rename places
places_dict = {
"colchane":	"Colchane",
"arica"	: "Arica",
"villa del rosario"	: "Villa del Rosario",
"chinacota"	: "Chinácota",
"iquique"	: "Iquique",
"lospatios"	: "Los Patios",
"dtlobito"	: "DT Lobito",
"loschiles"	: "Los Chiles",
"necocli"	: "Necoclí",
"ipiales"	: "Ipiales",
"pasocanoas" :	"Paso Canoas",
"lajasblancas" : "Lajas Blancas",
"sanvicente" :	"San Vicente",
"sanjose" :	"San José"
} 

services['punto_reporte'] = services['punto_reporte'].replace(places_dict)

country_dict = {
"colombia" : "Colombia",
"chile" : "Chile",
"panama" : "Panama",
"costarica" : "Costa Rica",
}

services['pais'] = services['pais'].replace(country_dict)



In [168]:
# rename variables
newColumns = loadLocalJsonDoc("defaults/rename_columns.json")
services_carto = services.rename(columns=newColumns)

# Dropping two variables related to final observations and scarce services 
# (they are not needed in Carto, they are important on other analysis)
services_carto = services_carto.drop("observacion", axis='columns')
services_carto = services_carto.drop("serviciosescasos", axis='columns')

In [169]:
# create Unixtime variable
def toUnixTimestamp(time, format: str = "%d-%m-%Y"):
    start = datetime(1970, 1, 1)
    target = datetime.strptime(time, format)
    in_seconds = (target - start).total_seconds()
    in_milliseconds = int(in_seconds) * 1000
    return in_milliseconds

#Organize services in order to have them separated by | and same categories of first round 
def codifyServices(value: str, values_dict: dict[str, int], otherValue: str):
    if (type(value) == float or type(value) == int):
        return otherValue
    raw_values = value.split(" ")
    output = []
    for value in raw_values:
        try:
            codedValue = values_dict[value]
            output.append(str(codedValue))
        except Exception as e:
            output.append(otherValue)

    return "|".join(output)


def processColumn(dfColumn: Series, values_dict: dict[int, str], other_value: str):
    reversed_values_dict = dict([(x[1], x[0]) for x in values_dict.items()])
    return dfColumn.apply(lambda x: codifyServices(x, reversed_values_dict, other_value))


def processMultValueColumns(df: DataFrame, columnObjectsList: list[dict]):
    """
    df: DataFrame object
    columnsObjectsList: list of column object
    columnObject: dictionary {"target_column": str, "output_column": str, values_dict: dict, other_value: str}

    return DataFrame Object
    """
    for columnObject in columnObjectsList:
        try:
            target_column = columnObject["target_column"]
            output_column = columnObject["output_column"]
            values_dict = columnObject["values_dict"]
            other_value = str(columnObject["other_value"])
            df[output_column] = processColumn(
                df[target_column], values_dict, other_value)
        except Exception as e:
            print(e)
            continue
    return df

# Export to CVS
def exportToFile(df: DataFrame, fileType: str, exportName: str, encoding: str = 'utf-8'):
    """ 
    df -> Pandas DataFrame object
    fileType -> Either "csv" or "json"
    exportName -> File location
    """
    if (fileType == "csv"):
        name = f"{exportName}.csv"
        df.to_csv(name)
        print(f"data export to {name}")
    else:
        name = f"{exportName}.json"
        df.to_json(name, orient="records")
        print(f"data export to {name}")

In [170]:
# parsing date field into unix timestamp
services_carto["timeunix"] = services_carto["fecha"].apply(
    lambda x: toUnixTimestamp(time=x, format="%Y-%m-%d"))

In [171]:
# serv_tipo (separating by pipe symbol and categorized with number)
codify_dict = loadLocalJsonDoc("defaults/codification_dict.json")
services_dict = codify_dict["services_dict"]

# re structure variable cuenta_con
cuenta_con_dict = codify_dict["cuenta_con_dict"]

# re structure variable children services (cual_serv1)
cual_serv1_dict = codify_dict["cual_serv1_dict"]

# re structure variable women services (cual_ser_2)
cual_ser_2_dict = codify_dict["cual_ser_2_dict"]

# re structure variable data storage (almacenamientoregistros)
registro_dict = codify_dict["registro_dict"]

# variable funding
financ_dict = codify_dict["financ_dict"]

#  variable challenges
reto_dict = codify_dict["reto_dict"]

# variable lenguages
idio_dict = codify_dict["idio_dict"]

# variable medios
medio_dict = codify_dict["medio_dict"]

values = [
    {
        "target_column": "serv_tipo",
        "output_column": "serv_tipo1",
        "values_dict": services_dict,
        "other_value": defaultMissingValue
    },
    {
        "target_column": "cuenta_con",
        "output_column": "cuenta_c_1",
        "values_dict": cuenta_con_dict,
        "other_value": defaultMissingValue
    },
    {
        "target_column": "cual_serv1",
        "output_column": "cual_ser_1",
        "values_dict": cual_serv1_dict,
        "other_value": defaultMissingValue
    },
    {
        "target_column": "cual_ser_2",
        "output_column": "cual_ser_3",
        "values_dict": cual_ser_2_dict,
        "other_value": defaultMissingValue
    },
    {
        "target_column": "almacenamientoregistros",
        "output_column": "almacenamientoregistros_",
        "values_dict": cual_ser_2_dict,
        "other_value": defaultMissingValue
    },
    {
        "target_column": "financiamiento",
        "output_column": "financb",
        "values_dict": financ_dict,
        "other_value": defaultMissingValue
    },
    {
        "target_column": "princ_reto",
        "output_column": "princ_re_1",
        "values_dict": reto_dict,
        "other_value": defaultMissingValue
    },
    {
        "target_column": "idioma_ent",
        "output_column": "idioma_e_1",
        "values_dict": idio_dict,
        "other_value": defaultMissingValue
    },
    {
        "target_column": "medios_bri",
        "output_column": "medios_b_1",
        "values_dict": medio_dict,
        "other_value": defaultMissingValue
    },
]

In [172]:
output_df = processMultValueColumns(services_carto, values)

In [173]:
exportToFile(output_df, "csv", "output")

data export to output.csv
