In [43]:

from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd  # Ensure this is imported if using pandas

import os
os.chdir('/mnt/c/Users/ralvin/OneDrive - Reliant Health Partners/Documents/RHP_dev_RA/Automate_skyvia')
import pandas as pd
from simple_salesforce import Salesforce, SalesforceLogin
import pyodbc, sys, time 
import requests
import numpy as np
from datetime import datetime
from helper_functions_v3 import create_df, updated_data_pull, upsert, delete_record, insert_records, salesforce_connection, read_sftp_data, data_pull
import configparser
import json
import paramiko
import io

pd.set_option('display.max_rows', None) 
pd.set_option('display.max_columns', None) 


##### retrieve source data
config = configparser.ConfigParser()
config.read('config.ini')
sftp_config = config['sftp']
mapping_config = config['mappings']
host = sftp_config.get('host')
port = sftp_config.getint('port', fallback=22) 
username = sftp_config.get('user')
password = sftp_config.get('password')
remote_path = sftp_config.get('remote_path')
json_path = mapping_config.get('json_file_path')


Source_data =  read_sftp_data(remote_path, host, port, username, password)
print(f'Source data shape: {Source_data.shape}')


## get data mappings 
with open(json_path, 'r') as f:
    data = json.load(f)

mappings = {k:v for k,v in data.items()}
Claims_map = mappings['Claim_Object_Map']
Account_map = mappings['Account_Object_Map']
Povider_map = mappings['Provider_Object_Map']
lines_map = mappings['Line_Item_Object_Map']

## sf connection
sf = salesforce_connection(sandbox=True)
sf_prod = salesforce_connection(sandbox=False)


def format_date(value):
    try:
        # Attempt to parse and format the date
        return datetime.strptime(value, '%m/%d/%Y').strftime('%Y-%m-%d')
    except (ValueError, TypeError):
        # Return the value as-is if it’s not a valid date
        return value


def insert_records(new_df, keys, sf):
    object_name = list(keys.keys())[0]
    sf_object = getattr(sf, object_name)
    object_key = list(keys.values())[0]

    error_logs = []
    new_ids = []

    def process_record(record):
        # Ensure NPI__c is properly formatted as a string
        if "NPI__c" in record:
            record["NPI__c"] = str(int(record["NPI__c"])) if pd.notnull(record["NPI__c"]) else None

        # Exclude the key field and replace NaN with None
        record_insert = {k: (None if pd.isna(v) else v) for k, v in record.items()}
        # Format date fields
        record_insert = {k: format_date(v) if 'Date' in k or 'DOS' in k else v for k, v in record_insert.items()}

        try:
            # Perform insert
            new_id = sf_object.create(record_insert)
            return new_id['id'], None
        except Exception as e:
            return None, e

    try:
        with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
            future_to_record = {
                executor.submit(process_record, new_df.iloc[i, :].to_dict()): i
                for i in range(len(new_df))
            }

            for future in as_completed(future_to_record):
                idx = future_to_record[future]
                try:
                    new_id, error = future.result()
                    if new_id:
                        new_ids.append(new_id)
                    if error:
                        error_logs.append((idx, error))
                except Exception as e:
                    print(f"Unexpected error during future processing: {e}")
                    error_logs.append((idx, e))

    except Exception as e:
        print(f"Unexpected error during processing: {e}")
        error_logs.append(e)

    print(f"Total records inserted: {len(new_ids)}")
    print(f"Total errors logged: {len(error_logs)}")
    return new_ids, error_logs



map_claims_table, keys = create_df(Claims_map, Source_data, sf)


Connection established successfully!
File read into DataFrame successfully
Connection closed.
Source data shape: (446, 42)
Connected to Salesforce sandbox
Connected to Salesforce Prod
pulling updated Provider_TIN__c table
pulled Provider_TIN__c
Merged TIN
pulling updated Groups_Clients__c table
pulled Groups_Clients__c
Merged Patient Group/Policy Number
pulling updated Jurisdiction__c table
pulled Jurisdiction__c
Merged JurisdictionState
pulling updated DRG__c table
pulled DRG__c
Merged DRG
pulling updated Provider_Specialty__c table
pulled Provider_Specialty__c
Merged Billing Provider Taxonomy


In [46]:
new_ids, error_logs = insert_records(map_claims_table.drop_duplicates(), keys, sf) ###insert new claims records


Total records inserted: 103
Total errors logged: 6


In [47]:
error_logs

[(25,
  simple_salesforce.exceptions.SalesforceMalformedRequest('https://relianthealthpartners2020--tctdev.sandbox.my.salesforce.com/services/data/v59.0/sobjects/Claims__c/',
                                                          400,
                                                          'Claims__c',
                                                          [{'message': 'Required fields are missing: [Group_Client__c]',
                                                            'errorCode': 'REQUIRED_FIELD_MISSING',
                                                            'fields': ['Group_Client__c']}])),
 (46,
  simple_salesforce.exceptions.SalesforceMalformedRequest('https://relianthealthpartners2020--tctdev.sandbox.my.salesforce.com/services/data/v59.0/sobjects/Claims__c/',
                                                          400,
                                                          'Claims__c',
                                                          [{'messag

In [25]:
import threading

def task(name):
    print(f"Task {name} is running")

# Create and manage threads manually
threads = []
for i in range(5):
    thread = threading.Thread(target=task, args=(f"Thread-{i}",))
    threads.append(thread)
    thread.start()

for thread in threads:
    thread.join()
print("All threads completed.")




Task Thread-0 is running
Task Thread-1 is running
Task Thread-2 is running
Task Thread-3 is running
Task Thread-4 is running
All threads completed.


In [26]:
threads

[<Thread(Thread-18 (task), stopped 140415685621440)>,
 <Thread(Thread-19 (task), stopped 140415685621440)>,
 <Thread(Thread-20 (task), stopped 140415685621440)>,
 <Thread(Thread-21 (task), stopped 140415685621440)>,
 <Thread(Thread-22 (task), stopped 140415685621440)>]

In [24]:
from concurrent.futures import ThreadPoolExecutor

def task(name):
    print(f"Task {name} is running")

# Use ThreadPoolExecutor to manage threads
with ThreadPoolExecutor(max_workers=3) as executor:
    names = [f"Task-{i}" for i in range(5)]
    executor.map(task, names)
print("All tasks completed.")


Task Task-0 is running
Task Task-1 is running
Task Task-2 is running
Task Task-3 is running
Task Task-4 is running
All tasks completed.
