# Libraries import

In [1]:
from pandas.core.frame import DataFrame
import psycopg2
from psycopg2 import sql
import numpy as np
import pandas as pd
import re
from datetime import date, datetime
import os
from tqdm import tqdm
from dotenv import load_dotenv
import json

# Initialization and database connection

In [2]:
load_dotenv()

db_connection_dict = {
    'dbname': str(os.getenv('DB_NAME')),
    'user': str(os.getenv('DB_USER')),
    'password': str(os.getenv('DB_PASSWORD')),
    'host': str(os.getenv('DB_HOST')),
    'port': str(os.getenv('DB_PORT')),
    'options': """-c search_path="colombia" """
}

In [3]:
class errorhandling(object):
    """
    classe pour la gestion d'erreur
    """

    def __init__(self):
        self.time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        self.file_error_name = """logs/logErrors-{}.txt""".format(
            str(self.time))
        self.log_error = open(self.file_error_name, "w")
        self.log_error.write("file;table;error;id\n")

    def error_save(self):
        self.log_error.close()

In [4]:
class database:

    def __init__(self,param_dict):
        self.conn = self.connect_bd(param_dict)

    def connect_bd(self, param_dict):
        conn = None
        try:
            conn = psycopg2.connect(**param_dict)
            conn.set_client_encoding('UTF8')
            print("Connection successful")
        except (Exception, psycopg2.DatabaseError) as error:
            print(error)
            conn = None

        return conn

In [5]:
error_handler = errorhandling()
conn = database(db_connection_dict).conn
cursor = conn.cursor()

Connection successful


# Queries

In [6]:
query = """
select p4.projectname, w.wavename, avg(p5.plantedarea) 
   from parcelwavespecies p
   inner join parcelwaves p2 on p2.id = p.parcelwaveid
   inner join parcels p5 on p5.id = p2.parcelid
   inner join projectwaves p3 on p2.projectwaveid = p3.id
   inner join subprojects s on p3.subprojectid = s.id
   inner join waves w on p3.waveid = w.id
   inner join projects p4 on s.projectid = p4.id
   where p4.projectname = 'thammaraksa'
   group by p4.projectname, w.wavename;
"""

try:
    cursor.execute(query)
except Exception as e:
    conn.rollback()
else:
    conn.commit()

average_planted_area = cursor.fetchall()
print(average_planted_area)

[('thammaraksa', 'w-15', None)]


In [None]:
query = """
select gpsfilename, plantedarea
from parcels
inner join parcelwaves on parcels.id = parcelwaves.parcelid
where gpsfilename = 'bok noi-village no.6-2015-p1-m3';
"""

In [7]:
with open('query_parameters.json') as json_file:
    query_parameters = json.load(json_file)

In [16]:
def create_join_clause(parameters_dict):
    
    joins = parameters_dict.get("Joins")
    join_clause = ""

    if joins: # check if dictionary is not empty
        for key, value in joins.items():
            if "tables_to_join" in value: # check if there are tables to join
                tables_list = value.get("tables_to_join")
                if tables_list: # check if list is not empty
                    for table in tables_list:
                        join_type = table.get("join_type").strip()
                        table_name = table.get("name").strip() 
                        primary_key = value.get("primary_key")
                        foreign_key = joins.get(table_name).get("foreign_keys").get(key)
                        join_query =  "{} join {} on {}.{} = {}.{} ".format(join_type,table_name,
                                                                      key, primary_key,
                                                                      table_name, foreign_key)
                        join_clause += join_query

    return join_clause.strip()

In [9]:
create_join_clause(query_parameters)

'inner join parcelwaves on parcels.id = parcelwaves.parcelid'

In [10]:
excel_file = os.path.join('missing_data', 'missing_data.xlsx')
missing_data_df = pd.read_excel(excel_file)

In [17]:
def create_select_request(cursor, parameters_dict, dataframe):
    columns_to_update = parameters_dict.get("Columns_to_update")
    identifying_columns = parameters_dict.get("Identifying_columns")
    
    fields_to_select = {c["field_name"] for c in columns_to_update.values()}
    fields_to_select.update({c["field_name"] for c in identifying_columns.values()})
    fields_to_select_str = ",".join(fields_to_select)

    join_clause = create_join_clause(parameters_dict)

    for row in missing_data_df.itertuples():
        for key1, value1 in columns_to_update.items():
            table = value1.get("table_name")
            where_condition = ""
            for key2, value2 in identifying_columns.items():

                field_name = value2.get("field_name")
                value = getattr(row, key2).strip().lower()
                if not where_condition:
                    where_condition += "{field_name}={value}".format(field_name=field_name, value = value)
                else:
                    where_condition += " AND {field_name}={value}".format(field_name=field_name, value = value)

                query = sql.SQL("""SELECT {fields} FROM {table} {join} where %s;""").format(
                                        fields=sql.SQL(',').join([sql.Identifier(f) for f in fields_to_select]),
                                        table=sql.Identifier(table),
                                        join=sql.Identifier(join_clause)
                                        )
                
                
                cursor.mogrify(query,(where_condition,))
                
                query_str = """SELECT {fields} FROM {table} {join} where {condition};""".format(
                                                                                        fields = fields_to_select_str, 
                                                                                        table = table,
                                                                                        join=join_clause,
                                                                                        condition=where_condition
                                                                                        )


                cursor.mogrify(query_str)
                
                try:
                    cursor.execute(query_str)
                except Exception as e:
                    print(e)
                    conn.rollback()
                else:
                    conn.commit()

                print(cursor.fetchall())
    

In [18]:
create_select_request(cursor,query_parameters, missing_data_df)