In [6]:
import pandas as pd
import psycopg
from contextlib import contextmanager

@contextmanager
def get_connection(config):
    """Context manager to handle PostgreSQL connection lifecycle."""
    conn = None
    try:
        conn = psycopg.connect(
            dbname=config["dbname"],
            user=config["user"],
            password=config["password"],
            host=config["host"],
            port=config["port"],
        )
        print(f"Connected to database {config['dbname']} successfully.")
        yield conn
    except psycopg.OperationalError as e:
        print(f"Error connecting to database {config['dbname']}: {e}")
        raise
    finally:
        if conn:
            conn.close()
            print(f"Connection to database {config['dbname']} closed.")

def fetch_data(conn, query, params=None):
    """Fetch all data based on the query, with optional parameters."""
    with conn.cursor() as cursor:
        cursor.execute(query, params)
        return cursor.fetchall(), [desc[0] for desc in cursor.description]

def run_query_with_dynamic_columns(db_configs, user_query, chunk_size=10000, params=None):
    """Execute a user query across multiple databases and return results as a pandas DataFrame."""
    dfs = []

    for config in db_configs:
        try:
            with get_connection(config) as conn:
                # Fetch data and column names
                data, column_names = fetch_data(conn, user_query, params)

                if data:
                    # Convert data to DataFrame and add the database name
                    df = pd.DataFrame(data, columns=column_names)
                    df['database'] = config['dbname']
                    dfs.append(df)
                else:
                    print(f"No data returned from {config['dbname']}.")
        except Exception as e:
            print(f"Error processing database {config['dbname']}: {e}")

    combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
    return combined_df

# Example usage
db_configs = [
    {
        "dbname": "Employees",
        "user": "postgres",
        "password": "root",
        "host": "localhost",
        "port": "5432"
    }
]


In [10]:
# Sample query with potential parameters
user_query = "SELECT * FROM employees WHERE first_name ILIKE %s AND last_name ILIKE %s"
params = ['%Se%', '%Si%']  # Example parameters

# Execute the function
result_df = run_query_with_dynamic_columns(db_configs, user_query, params=params)
print(result_df)


Connected to database Employees successfully.
Connection to database Employees closed.
     emp_no  birth_date      first_name      last_name gender   hire_date  \
0     10361  1961-04-22           Seshu          Sidou      F  1986-10-23   
1     10717  1956-01-27           Serap       Tasistro      M  1987-06-27   
2     13579  1963-10-13           Basem        Passino      M  1987-03-12   
3     26793  1956-02-18           Serap          Sidhu      F  1988-12-25   
4     30934  1963-03-26          Kensei          Rossi      F  1988-07-01   
..      ...         ...             ...            ...    ...         ...   
166  478520  1957-04-01  Chandrasekaran     Sambasivam      F  1992-08-07   
167  478861  1953-03-02           Basem  Khasidashvili      F  1985-09-12   
168  492884  1963-03-19          Ulises       Siegrist      M  1995-08-16   
169  494934  1960-11-27           Basem        Bisiani      F  1988-03-05   
170  497080  1956-06-15           Seshu      Stasinski      F  199

In [20]:
import pandas as pd
import psycopg
from contextlib import contextmanager
from fuzzywuzzy import process


@contextmanager
def get_connection(config):
    """Context manager to handle PostgreSQL connection lifecycle."""
    conn = None
    try:
        conn = psycopg.connect(
            dbname=config["dbname"],
            user=config["user"],
            password=config["password"],
            host=config["host"],
            port=config["port"],
        )
        print(f"Connected to database {config['dbname']} successfully.")
        yield conn
    except psycopg.OperationalError as e:
        print(f"Error connecting to database {config['dbname']}: {e}")
        raise
    finally:
        if conn:
            conn.close()
            print(f"Connection to database {config['dbname']} closed.")


def fetch_all_data(conn, table_name):
    """Fetch all data from the specified table."""
    query = f"SELECT * FROM {table_name};"
    with conn.cursor() as cursor:
        cursor.execute(query)
        return cursor.fetchall(), [desc[0] for desc in cursor.description]


def fetch_data_from_table(db_configs, table_name):
    """Fetch all data from the specified table and return it as a DataFrame."""
    dfs = []

    for config in db_configs:
        try:
            with get_connection(config) as conn:
                data, column_names = fetch_all_data(conn, table_name)

                if data:
                    df = pd.DataFrame(data, columns=column_names)
                    df["database"] = config["dbname"]
                    dfs.append(df)
                else:
                    print(
                        f"No data returned from {config['dbname']} for table {table_name}."
                    )
        except Exception as e:
            print(f"Error processing database {config['dbname']}: {e}")

    combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
    return combined_df


def fuzzy_search(df, search_term):
    """Perform fuzzy search on the DataFrame based on the search term."""
    results = {}

    for column in df.columns:
        if df[column].dtype == object:  # Check if column type is string
            matches = process.extract(
                search_term, df[column].dropna().unique(), limit=None
            )
            filtered_matches = [
                match for match, score in matches if score > 80
            ]  # Adjust threshold as needed
            if filtered_matches:
                results[column] = filtered_matches

    return results


# Example usage
db_configs = [
    {
        "dbname": "Employees",
        "user": "postgres",
        "password": "root",
        "host": "localhost",
        "port": "5432",
    }
]

# Fetch all data from the specified table
table_name = "employees"
data_df = fetch_data_from_table(db_configs, table_name)

# Print the DataFrame
print("All Data from Employees Table:")
print(data_df)

# Perform a fuzzy search
search_term = "1953 geor cello"  # Example search term
fuzzy_results = fuzzy_search(data_df, search_term)

# Print the fuzzy search results
print(f"\nFuzzy Search Results for '{search_term}':")
for column, matches in fuzzy_results.items():
    print(f"In column '{column}': {matches}")


Connected to database Employees successfully.
Connection to database Employees closed.
All Data from Employees Table:
        emp_no  birth_date first_name last_name gender   hire_date   database
0        10001  1953-09-02     Georgi   Facello      M  1986-06-26  Employees
1        10002  1964-06-02    Bezalel    Simmel      F  1985-11-21  Employees
2        10003  1959-12-03      Parto   Bamford      M  1986-08-28  Employees
3        10004  1954-05-01  Chirstian   Koblick      M  1986-12-01  Employees
4        10005  1955-01-21    Kyoichi  Maliniak      M  1989-09-12  Employees
...        ...         ...        ...       ...    ...         ...        ...
300019  499995  1958-09-24     Dekang  Lichtner      F  1993-01-12  Employees
300020  499996  1953-03-07       Zito      Baaz      M  1990-09-27  Employees
300021  499997  1961-08-03    Berhard    Lenart      M  1986-04-21  Employees
300022  499998  1956-09-05   Patricia   Breugel      M  1993-10-13  Employees
300023  499999  1958-05-

In [23]:
import pandas as pd
import psycopg
from contextlib import contextmanager
from fuzzywuzzy import process

@contextmanager
def get_connection(config):
    """Context manager to handle PostgreSQL connection lifecycle."""
    conn = None
    try:
        conn = psycopg.connect(
            dbname=config["dbname"],
            user=config["user"],
            password=config["password"],
            host=config["host"],
            port=config["port"],
        )
        print(f"Connected to database {config['dbname']} successfully.")
        yield conn
    except psycopg.OperationalError as e:
        print(f"Error connecting to database {config['dbname']}: {e}")
        raise
    finally:
        if conn:
            conn.close()
            print(f"Connection to database {config['dbname']} closed.")

def fetch_all_data(conn, table_name):
    """Fetch all data from the specified table."""
    query = f"SELECT * FROM {table_name};"
    with conn.cursor() as cursor:
        cursor.execute(query)
        return cursor.fetchall(), [desc[0] for desc in cursor.description]

def fetch_data_from_table(db_configs, table_name):
    """Fetch all data from the specified table and return it as a DataFrame."""
    dfs = []

    for config in db_configs:
        try:
            with get_connection(config) as conn:
                data, column_names = fetch_all_data(conn, table_name)

                if data:
                    df = pd.DataFrame(data, columns=column_names)
                    df['database'] = config['dbname']
                    dfs.append(df)
                else:
                    print(f"No data returned from {config['dbname']} for table {table_name}.")
        except Exception as e:
            print(f"Error processing database {config['dbname']}: {e}")

    combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
    return combined_df

def fuzzy_search(df, search_term):
    """Perform fuzzy search on the DataFrame based on the search term."""
    results = {}
    
    # Split the search term into individual words
    search_terms = search_term.split()

    for column in df.columns:
        if df[column].dtype == object:  # Check if column type is string
            matches = []
            for term in search_terms:
                # Perform fuzzy matching for each term in the column
                matched_items = process.extract(term, df[column].dropna().unique(), limit=None)
                filtered_matches = [match for match, score in matched_items if score > 80]  # Adjust threshold as needed
                matches.extend(filtered_matches)

            # Remove duplicates while preserving order
            matches = list(dict.fromkeys(matches))

            if matches:
                results[column] = matches

    return results

# Example usage
db_configs = [
    {
        "dbname": "Employees",
        "user": "postgres",
        "password": "root",
        "host": "localhost",
        "port": "5432"
    }
]

# Fetch all data from the specified table
table_name = "employees"
data_df = fetch_data_from_table(db_configs, table_name)

# Print the DataFrame
print("All Data from Employees Table:")
print(data_df)

# Perform a fuzzy search
search_term = "1953 geor cello"  # Example search term
fuzzy_results = fuzzy_search(data_df, search_term)

# Print the fuzzy search results
print(f"\nFuzzy Search Results for '{search_term}':")
for column, matches in fuzzy_results.items():
    print(f"In column '{column}': {matches}")


Connected to database Employees successfully.
Connection to database Employees closed.
All Data from Employees Table:
        emp_no  birth_date first_name last_name gender   hire_date   database
0        10001  1953-09-02     Georgi   Facello      M  1986-06-26  Employees
1        10002  1964-06-02    Bezalel    Simmel      F  1985-11-21  Employees
2        10003  1959-12-03      Parto   Bamford      M  1986-08-28  Employees
3        10004  1954-05-01  Chirstian   Koblick      M  1986-12-01  Employees
4        10005  1955-01-21    Kyoichi  Maliniak      M  1989-09-12  Employees
...        ...         ...        ...       ...    ...         ...        ...
300019  499995  1958-09-24     Dekang  Lichtner      F  1993-01-12  Employees
300020  499996  1953-03-07       Zito      Baaz      M  1990-09-27  Employees
300021  499997  1961-08-03    Berhard    Lenart      M  1986-04-21  Employees
300022  499998  1956-09-05   Patricia   Breugel      M  1993-10-13  Employees
300023  499999  1958-05-

In [25]:
import pandas as pd
import psycopg
from contextlib import contextmanager
from fuzzywuzzy import process
import logging
from typing import List, Dict, Optional, Tuple

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


@contextmanager
def get_connection(config: Dict[str, str]) -> psycopg.Connection:
    """Context manager to handle PostgreSQL connection lifecycle."""
    conn = None
    try:
        conn = psycopg.connect(
            dbname=config["dbname"],
            user=config["user"],
            password=config["password"],
            host=config["host"],
            port=config["port"],
        )
        logger.info(f"Connected to database {config['dbname']} successfully.")
        yield conn
    except psycopg.OperationalError as e:
        logger.error(f"Error connecting to database {config['dbname']}: {e}")
        raise
    finally:
        if conn:
            conn.close()
            logger.info(f"Connection to database {config['dbname']} closed.")


def fetch_all_data(
    conn: psycopg.Connection, table_name: str
) -> Tuple[List[Tuple], List[str]]:
    """Fetch all data from the specified table."""
    query = f"SELECT * FROM {table_name};"
    with conn.cursor() as cursor:
        cursor.execute(query)
        return cursor.fetchall(), [desc[0] for desc in cursor.description]


def fetch_data_from_table(
    db_configs: List[Dict[str, str]], table_name: str
) -> pd.DataFrame:
    """Fetch all data from the specified table and return it as a DataFrame."""
    dfs = []

    for config in db_configs:
        try:
            with get_connection(config) as conn:
                data, column_names = fetch_all_data(conn, table_name)

                if data:
                    df = pd.DataFrame(data, columns=column_names)
                    df["database"] = config["dbname"]
                    dfs.append(df)
                else:
                    logger.warning(
                        f"No data returned from {config['dbname']} for table {table_name}."
                    )
        except Exception as e:
            logger.error(f"Error processing database {config['dbname']}: {e}")

    combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
    return combined_df


def fuzzy_search(
    df: pd.DataFrame, search_term: str, threshold: int = 80
) -> Dict[str, List[str]]:
    """Perform fuzzy search on the DataFrame based on the search term."""
    results = {}
    search_terms = search_term.split()

    for column in df.columns:
        if df[column].dtype == object:  # Check if column type is string
            matches = []
            unique_values = df[column].dropna().unique()  # Handle NaN values
            for term in search_terms:
                matched_items = process.extract(term, unique_values, limit=None)
                filtered_matches = [
                    match for match, score in matched_items if score > threshold
                ]
                matches.extend(filtered_matches)

            matches = list(
                dict.fromkeys(matches)
            )  # Remove duplicates while preserving order

            if matches:
                results[column] = matches

    return results


# Example usage
db_configs = [
    {
        "dbname": "Employees",
        "user": "postgres",
        "password": "root",
        "host": "localhost",
        "port": "5432",
    }
]

# Fetch all data from the specified table
table_name = "employees"
data_df = fetch_data_from_table(db_configs, table_name)

# Print the DataFrame
logger.info("All Data from Employees Table:")
logger.info(data_df)

# Perform a fuzzy search
search_term = "1953 geor cello"  # Example search term
fuzzy_results = fuzzy_search(data_df, search_term)

# Print the fuzzy search results
logger.info(f"\nFuzzy Search Results for '{search_term}':")
for column, matches in fuzzy_results.items():
    logger.info(f"In column '{column}': {matches}")


INFO:__main__:Connected to database Employees successfully.


INFO:__main__:Connection to database Employees closed.
INFO:__main__:All Data from Employees Table:
INFO:__main__:        emp_no  birth_date first_name last_name gender   hire_date   database
0        10001  1953-09-02     Georgi   Facello      M  1986-06-26  Employees
1        10002  1964-06-02    Bezalel    Simmel      F  1985-11-21  Employees
2        10003  1959-12-03      Parto   Bamford      M  1986-08-28  Employees
3        10004  1954-05-01  Chirstian   Koblick      M  1986-12-01  Employees
4        10005  1955-01-21    Kyoichi  Maliniak      M  1989-09-12  Employees
...        ...         ...        ...       ...    ...         ...        ...
300019  499995  1958-09-24     Dekang  Lichtner      F  1993-01-12  Employees
300020  499996  1953-03-07       Zito      Baaz      M  1990-09-27  Employees
300021  499997  1961-08-03    Berhard    Lenart      M  1986-04-21  Employees
300022  499998  1956-09-05   Patricia   Breugel      M  1993-10-13  Employees
300023  499999  1958-05-01  