# Libraries import

In [1]:
from pandas.core.frame import DataFrame
import psycopg2
from psycopg2 import sql
import numpy as np
import pandas as pd
import re
from datetime import date, datetime
import os
from tqdm import tqdm
from dotenv import load_dotenv
import json

# Initialization and database connection

In [2]:
load_dotenv()

db_connection_dict = {
    'dbname': str(os.getenv('DB_NAME')),
    'user': str(os.getenv('DB_USER')),
    'password': str(os.getenv('DB_PASSWORD')),
    'host': str(os.getenv('DB_HOST')),
    'port': str(os.getenv('DB_PORT')),
    'options': """-c search_path="colombia" """
}

In [3]:
class errorhandling(object):
    """
    classe pour la gestion d'erreur
    """

    def __init__(self):
        self.time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        self.file_error_name = """logs/logErrors-{}.txt""".format(
            str(self.time))
        self.log_error = open(self.file_error_name, "w")
        self.log_error.write("file;table;error;id\n")

    def error_save(self):
        self.log_error.close()

In [4]:
class database:

    def __init__(self,param_dict):
        self.conn = self.connect_bd(param_dict)

    def connect_bd(self, param_dict):
        conn = None
        try:
            conn = psycopg2.connect(**param_dict)
            conn.set_client_encoding('UTF8')
            print("Connection successful")
        except (Exception, psycopg2.DatabaseError) as error:
            print(error)
            conn = None

        return conn

In [5]:
error_handler = errorhandling()
conn = database(db_connection_dict).conn
cursor = conn.cursor()

Connection successful


# Queries

In [49]:
with open('query_parameters.json') as json_file:
    query_parameters = json.load(json_file)

In [25]:
def create_join_clause(parameters_dict,field_to_update):
    
    joins = parameters_dict.get("Joins").get(field_to_update)
    join_clause = sql.SQL("")

    if joins: # check if dictionary is not empty
        for key, value in joins.items():
            if "tables_to_join" in value: # check if there are tables to join
                tables_list = value.get("tables_to_join")
                if tables_list: # check if list is not empty
                    for table in tables_list:
                        join_type = table.get("join_type").strip()
                        table_name = table.get("name").strip()
                        primary_key = value.get("primary_key").strip()
                        foreign_key = joins.get(table_name).get("foreign_keys").get(key).strip()
                        join_query =  sql.SQL(join_type + " join {} on {} = {}\n").format(sql.Identifier(table_name),
                                                                      sql.Identifier(key, primary_key),
                                                                      sql.Identifier(table_name, foreign_key) )
                        join_clause = sql.Composed([join_clause, join_query])

    return join_clause

In [62]:
q = create_join_clause(query_parameters,"Planted_area_ha")
print(q.as_string(conn))

inner join "parcelwaves" on "parcels"."id" = "parcelwaves"."parcelid"



In [10]:
excel_file = os.path.join('missing_data', 'missing_data.xlsx')
missing_data_df = pd.read_excel(excel_file)

In [97]:
def execute_query(cursor,query):

    cursor.mogrify(query)
                
    try:
        cursor.execute(query)
    except Exception as e:
        print(e)
        conn.rollback()
    else:
        conn.commit()

    return cursor.fetchall()

In [96]:
def update_request_by_id(cursor, table, field, value, select_query):
    update_query = sql.SQL("UPDATE {table}\nSET {table}.{field} = {value}\nWHERE id in").format(
                                                table=sql.Identifier(table),
                                                field=sql.Identifier(field),
                                                value=sql.Literal(value)
                                                )
    
    query = sql.Composed([update_query, sql.SQL("("), select_query, sql.SQL(")")])

    cursor.mogrify(query)

    return execute_query(cursor,query)
    

In [106]:
def create_select_request(cursor, parameters_dict, dataframe):
    columns_to_update = parameters_dict.get("Columns_to_update")

    for row in dataframe.itertuples():
        for key1, value1 in columns_to_update.items():
            
            identifying_columns = parameters_dict.get("Identifying_columns").get(key1)
            
            fields_to_select = (value1.get("table_name"),"id")
            join_clause = create_join_clause(parameters_dict,key1)

            for key2, value2 in identifying_columns.items():

                field_name = value2.get("field_name")
                value_to_check = getattr(row, key2).strip().lower()
                value_to_update = getattr(row, key1)
                table = value1.get("table_name")
                where_condition = sql.SQL("")

                if not where_condition.as_string(cursor):
                    where_check = sql.SQL("where {field_name}={value}").format(field_name=sql.Identifier(field_name),
                                                                         value = sql.Literal(value_to_check))
                    where_condition = sql.Composed([where_condition, where_check])
                else:
                    where_check = sql.SQL(" AND {field_name}={value}").format(field_name=sql.Identifier(field_name),
                                                                              value = sql.Literal(value_to_check))
                    where_condition = sql.Composed([where_condition, where_check])

                select_fields = sql.SQL("SELECT DISTINCT {fields}\nFROM {table}\n").format(
                                                fields=sql.Identifier(*fields_to_select),
                                                table=sql.Identifier(table))
                
                query = sql.Composed([select_fields, join_clause, where_condition])

                result = execute_query(cursor,query)

                if len(result) == 0:
                    print("No result has been return by query:")
                    print(result)
                elif len(result) == 1:
                    print("One result has been returned by query, making the update:")
                    update_request_by_id(cursor, table, value1.get("field_name"), value_to_update, query)
                else:
                    print("Multiple results have been return by the query")
    

In [103]:
create_select_request(cursor,query_parameters, missing_data_df)

One result has been returned by query, making the update:
column "parcels" of relation "parcels" does not exist
LINE 2: SET "parcels"."plantedarea" = 'bok noi-village no.6-2015-p1-...
            ^



ProgrammingError: no results to fetch