In [4]:
import pandas as pd
import os
from db import connection
from db_utils import DBUtils

## Database importer

In [5]:
entities = pd.read_csv('./countries-standardized.csv')


In [6]:
with connection as c:
    db = DBUtils(c)
    new_entities = entities[entities['db_entity_id'].isnull()]
    for _, entity in new_entities.iterrows():
        entity_id = entity.name
        entity_name = entity['name']
        db_entity_id = db.get_or_create_entity(entity_name)
        entities.loc[entity_id, 'db_entity_id'] = db_entity_id
    
    
    # Inserting the dataset
    dataset_id = db.upsert_dataset(
                "BP Statistical Review of Global Energy", 
                "bpstatreview_2019", 
                35, 
                tag_id=None, 
                description=''
                )

    #Inserting the source
    source_id = db.upsert_source("BP Statistical Review of Global Energy", description="", dataset_id=dataset_id)

    # Inserting variables
    variables = pd.read_csv("variables.csv")
    names_to_ids = {}

    for i, row in variables.iterrows():
        desc = row['notes'] if pd.notnull(row['notes']) else ""
        variable_id = db.upsert_variable(
                                        name=row['name'], 
                                        code=None, 
                                        unit=row['unit'], 
                                        short_unit=None, 
                                        source_id=dataset_id, 
                                        dataset_id=dataset_id, 
                                        description=desc, 
                                        timespan='', 
                                        coverage='', 
                                        display={}
                                        )
        names_to_ids[row['name']] = variable_id

    #Inserting datapoints

    datapoints_files = os.listdir("csvs/")
    for x in datapoints_files:
        if x == ".DS_Store":
            continue
        # to get variable is
        v_id = int(x.split("_")[1].split(".")[0])
       
        # to get variable name
        variable_name = variables[variables['id']==v_id]['name'].values[0]
       
        # to get variable id from db
        variable_id = names_to_ids[variable_name]
        data = pd.read_csv("csvs/"+x)

        for i, row in data.iterrows():
            entity_id = entities[entities['name'] == row['country']]['db_entity_id'].values[0]
            if not pd.notnull(row['value']):
                val = 0.0
            else:
                val = row['value']

            year = row['year']

            db.upsert_one("""
                INSERT INTO data_values
                    (value, year, entityId, variableId)
                VALUES
                    (%s, %s, %s, %s)
                ON DUPLICATE KEY UPDATE
                    value = VALUES(value),
                    year = VALUES(year),
                    entityId = VALUES(entityId),
                    variableId = VALUES(variableId)
            """, [val, int(year), str(int(entity_id)), str(variable_id)])
