In [99]:
import os
os.chdir('/mnt/c/Users/ralvin/OneDrive - Reliant Health Partners/Documents/RHP_dev_RA/Automate_skyvia')
import pandas as pd
from simple_salesforce import Salesforce, SalesforceLogin
import pyodbc, sys, time 
import requests
import numpy as np
from datetime import datetime
from helper_functions_v2 import create_df, updated_data_pull, upsert, delete_record, insert_records, salesforce_connection, read_sftp_data
import configparser
import json
import paramiko
import io

pd.set_option('display.max_rows', None) 
pd.set_option('display.max_columns', None) 


##### retrieve source data
config = configparser.ConfigParser()
config.read('config.ini')
sftp_config = config['sftp']
mapping_config = config['mappings']
host = sftp_config.get('host')
port = sftp_config.getint('port', fallback=22) 
username = sftp_config.get('user')
password = sftp_config.get('password')
remote_path = sftp_config.get('remote_path')
json_path = mapping_config.get('json_file_path')


Source_data =  read_sftp_data(remote_path, host, port, username, password)
print(f'Source data shape: {Source_data.shape}')


## get data mappings 
with open(json_path, 'r') as f:
    data = json.load(f)

mappings = {k:v for k,v in data.items()}
Claims_map = mappings['Claim_Object_Map']
Account_map = mappings['Account_Object_Map']
Povider_map = mappings['Provider_Object_Map']
lines_map = mappings['Line_Item_Object_Map']

## sf connection
sf = salesforce_connection(sandbox=True)
sf_prod = salesforce_connection(sandbox=False)




## data pull
# sf_objects = ["Claims__c", "Line_Items__c", "Provider_TIN__c", 'Jurisdiction__c', 'Groups_Clients__c', 'DRG__c', 
#                  'HCPCS_CPT_Code__c', 'Account', 'Clients__c', 'Provider_Specialty__c']
# my_sf_objects = updated_data_pull(sf_objects, sf)
# for key in my_sf_objects:
#     print(key)
#     print(my_sf_objects[key].shape)


#### logging is done on a file level... ideally we archive file into review dir and have info on all errors with respect to file 


Connection established successfully!
File read into DataFrame successfully
Connection closed.
Source data shape: (446, 42)
Connected to Salesforce sandbox
Connected to Salesforce Prod


In [100]:
claims_sf = updated_data_pull(['Claims__c'], sf)
Line_items_sf = updated_data_pull(['Line_Items__c'], sf)
print(claims_sf.shape)
print(Line_items_sf.shape)


pulled Claims__c
pulled Line_Items__c
(12937, 101)
(22841, 49)


In [81]:

def data_pull(object_name, id_list, sf):
    
    sf_object = getattr(sf, object_name)

    # Retrieve and print field names
    metadata = sf_object.describe()
    field_names = [field['name'] for field in metadata['fields']]

    # Construct the SOQL query with ID filtering
    id_filter = "', '".join(id_list)
    query = f"SELECT {', '.join(field_names)} FROM {object_name} WHERE Id IN ('{id_filter}')"
    
    # Retrieve records
    records = sf.query(query)
    all_records = records['records']
    

    # Handle pagination
    while not records['done']:
        records = sf.query_more(records['nextRecordsUrl'], True)
        all_records.extend(records['records'])

    # Convert to DataFrame
    df = pd.DataFrame(all_records)

    # Check if DataFrame is empty
    if df.shape == (0, 0):
        raise ValueError("DataFrame is empty (shape is (0, 0)). No data retrieved.")

    # Remove Salesforce metadata keys if present
    df = df.drop(columns=['attributes'], errors='ignore')

    print('pulled data slice!')

    return df



def create_df(source_dict, Source_data, sf, id_list = None):

    new_columns = [col for mapped_cols in source_dict['MAPPINGS'].values() for col in mapped_cols]
    new_df = pd.DataFrame(columns=new_columns)
    keys = source_dict['KEYS']

    try:
        Table_convert_cols = list(source_dict['TABLE_CONVERT'].keys())
    except:
        Table_convert_cols = []

    for source_col, target_cols in source_dict['MAPPINGS'].items():
        if source_col in Table_convert_cols:
            map_table = source_dict['TABLE_CONVERT'][source_col]
            try:
                string_ids = [
                        str(int(float(id))) if pd.notna(id) and id != 'nan' else np.nan
                        for id in Source_data[source_col]                               #### handeling DRG col
                    ]
            except:
                string_ids = [str(id) for id in Source_data[source_col]]
                
            Source_data[source_col] = string_ids #### col update to match same type in foreign table
            foreign_table = list(map_table.keys())[0]
            print(f'pulling updated {foreign_table} table')
        
            try:
                sf_object = data_pull(foreign_table, id_list, sf)  #### passing in idlist to prevent pulling entire object
            except Exception as e:
                sf_object = None  # Ensure sf_object is set to None if there's an exception


            if sf_object is None:
                sf_object = updated_data_pull([foreign_table], sf) #### need logic to pull only the records I need rather than the entire table
                


            merged = Source_data.merge(sf_object.loc[:, map_table[foreign_table]], left_on=source_col, right_on=map_table[foreign_table][0], how='left')
       
            print(f'Merged {source_col}')
            for i, target_col in enumerate(target_cols):

                foreign_col = map_table[foreign_table][i+1]
                new_df[target_col] = merged[foreign_col]
              


        if source_col not in Table_convert_cols:
            if source_col in Source_data.columns:
                for target_col in target_cols:
                    new_df[target_col] = Source_data[source_col]
        



    return new_df, keys

In [None]:
# map_account_table, _ = create_df(Account_map, Source_data, sf)
# map_provider_table, keys = create_df(Povider_map, Source_data, sf)
# ###upsert account and provider records
# new_ids, errors = upsert(map_provider_table.drop_duplicates(), keys, sf)

In [52]:
##### populate claims obj on sf

map_claims_table, keys = create_df(Claims_map, Source_data, sf)
new_ids, errors = insert_records(map_claims_table.drop_duplicates(), keys, sf) ###insert new claims records


pulling updated Provider_TIN__c table
pulled Provider_TIN__c
Merged TIN
pulling updated Groups_Clients__c table
pulled Groups_Clients__c
Merged Patient Group/Policy Number
pulling updated Jurisdiction__c table
pulled Jurisdiction__c
Merged JurisdictionState
pulling updated DRG__c table
pulled DRG__c
Merged DRG
pulling updated Provider_Specialty__c table
pulled Provider_Specialty__c
Merged Billing Provider Taxonomy
Processing record 0: {'Claim_ID__c': 'A3038B5E4B6C914tksft', 'Provider_TIN__c': 'a0FVF000001zh5t2AA', 'Group_Client__c': 'a0C8a00000r3GJ6EAM', 'Claim_Number__c': '431817310700011', 'Jurisdiction__c': 'a024W00000ICbumQAD', 'Diag_Code__c': 'M25562', 'Diag_Code_2nd__c': nan, 'Diag_Code_3rd__c': nan, 'Diag_Code_4th__c': nan, 'Provider__c': 'KAISER FOUNDATION HOSPITALS ', 'Patient__c': 'WATKINS MONIC  ', 'Patient_ID__c': '569713835', 'Date_of_Birth__c': '08/12/1982', 'Provider_Zip__c': 900749998, 'DRG__c': nan, 'QPA__c': 'N', 'NPI__c': nan, 'POS__c': 13, 'Provider_Specialty__c': '

In [83]:

lines_map = mappings['Line_Item_Object_Map']
map_lines_table, keys = create_df(lines_map, Source_data, sf, new_ids)
map_lines_table.head()

pulling updated Claims__c table
pulled data slice!
Merged Claim ID
pulling updated HCPCS_CPT_Code__c table
pulled HCPCS_CPT_Code__c
Merged HCPCS/CPT Code


Unnamed: 0,DOS__c,Rev_Code__c,Billed_Amount__c,RHP_ID__c,Mod__c,Mod_2nd__c,Units__c,MAR__c,HCPCS_CPT_Code__c,Line_ID__c,Exp_Code_2nd__c
0,09/26/2024,320,987.0,a06VF00000AHZLNYA5,,,1,156.72,a0b4W00000y1IFrQAM,1,
1,10/17/2024,510,65.0,a06VF00000AHSIMYA5,,,1,65.0,a0b4W00000y1JRaQAM,1,
2,10/22/2024,278,3.0,a06VF00000AHWCFYA5,,,1,3.0,,1,
3,10/22/2024,300,54.0,a06VF00000AHWCFYA5,,,1,54.0,a0b4W00000y1IqZQAU,2,
4,10/22/2024,310,1000.0,a06VF00000AHWCFYA5,59.0,,4,1000.0,a0b4W00000y1INfQAM,3,


In [88]:
new_ids, errors = insert_records(map_lines_table.drop_duplicates(), keys, sf) ###insert new line records


Processing record 0: {'DOS__c': '09/26/2024', 'Rev_Code__c': 320, 'Billed_Amount__c': 987.0, 'RHP_ID__c': 'a06VF00000AHZLNYA5', 'Mod__c': nan, 'Mod_2nd__c': nan, 'Units__c': 1, 'MAR__c': 156.72, 'HCPCS_CPT_Code__c': 'a0b4W00000y1IFrQAM', 'Line_ID__c': 1, 'Exp_Code_2nd__c': nan}
Processing record 1: {'DOS__c': '10/17/2024', 'Rev_Code__c': 510, 'Billed_Amount__c': 65.0, 'RHP_ID__c': 'a06VF00000AHSIMYA5', 'Mod__c': nan, 'Mod_2nd__c': nan, 'Units__c': 1, 'MAR__c': 65.0, 'HCPCS_CPT_Code__c': 'a0b4W00000y1JRaQAM', 'Line_ID__c': 1, 'Exp_Code_2nd__c': nan}
Processing record 2: {'DOS__c': '10/22/2024', 'Rev_Code__c': 278, 'Billed_Amount__c': 3.0, 'RHP_ID__c': 'a06VF00000AHWCFYA5', 'Mod__c': nan, 'Mod_2nd__c': nan, 'Units__c': 1, 'MAR__c': 3.0, 'HCPCS_CPT_Code__c': nan, 'Line_ID__c': 1, 'Exp_Code_2nd__c': nan}
Processing record 3: {'DOS__c': '10/22/2024', 'Rev_Code__c': 300, 'Billed_Amount__c': 54.0, 'RHP_ID__c': 'a06VF00000AHWCFYA5', 'Mod__c': nan, 'Mod_2nd__c': nan, 'Units__c': 1, 'MAR__c': 54

In [95]:
source_cases = Source_data.drop_duplicates(subset=['Claim ID'])[['Claim ID']]
ids = list(source_cases.merge(updated_data_pull(['Claims__c'], sf), left_on='Claim ID', right_on='Claim_ID__c').loc[:, ['Claim ID', 'Claim_ID__c', 'Id']]['Id'])
len(ids)

pulled Claims__c


103

In [96]:
################# delete records:
keys = {'Claims__c':'Claim_ID__c'}
delete_record(ids, keys, sf)

Object: Claims__c, Records to process: 103
All records deleted successfully.


In [3]:
groups_clients_sf = my_sf_objects['Groups_Clients__c']
groups_clients_sf.shape

(1703, 59)

In [5]:
map_claims_table[map_claims_table['Group_Client__c'].isna()].drop_duplicates()

Unnamed: 0,Claim_ID__c,Provider_TIN__c,Group_Client__c,Claim_Number__c,Jurisdiction__c,Diag_Code__c,Diag_Code_2nd__c,Diag_Code_3rd__c,Diag_Code_4th__c,Provider__c,Patient__c,Patient_ID__c,Date_of_Birth__c,Provider_Zip__c,DRG__c,QPA__c,NPI__c,POS__c,Provider_Specialty__c,Service_Type__c
116,1E6E757A964D548tksft,a0F4W00000W1KH5UAN,,18A91B0008179A1,a024W00000HYAjJQAX,C20,C7951,R6889,,METHODIST HOSPITALSINC.,ROODZANT JAMES,880263595,12/22/1971,464107035,,N,1518035000.0,13,,
202,B17551A5CD4B233tksft,a0F4W00000W1LEEUA3,,18A91B0008179A8,a024W00000HYAjRQAX,Z5181,Z79899,,,FREEMAN NEOSHO HOSPITAL,WILSON ROBERT L,880440969,03/12/1960,648501705,,N,1154990000.0,85,,
214,24BF09BD7288E97tksft,a0FVF0000020PQH2A2,,06A11B00081784F,a024W00000HYAjkQAH,M25532,X500XXA,,,PFLUGERVILLE EMERGENCY CENTER LLC,INABINETT COLE,750076498,05/27/2011,786605965,,Y,1154762000.0,13,,
272,0D8755A2AD0EBC0tksft,a0FVF0000020Onc2AE,,P24319I2000001,a024W00000HYAjRQAX,Z01818,N871,,,CURATORS OF THE UNIVERSITY,ARNOLD ASHLEY,489984314,05/19/1988,652120001,,Y,,22,,
374,0B46B602A7B3B67tksft,a0FVF0000020PbZ2AU,,21040838Accredited,a024W00000HYAj9QAH,M5416,,,,SURGCENTER NORTHERN PHOENIX,PEN KEVIN,XXXXX1120,11/22/1981,850859998,,N,,83,a008a000015yXNEAA2,SURGERY CENTER
431,802EF0AD00A30E7tksft,a0FVF0000020Pen2AE,,18A91B0008179EE,a024W00000HYAjoQAH,M2352,,,,SEATTLE CHILDRENS HOSPITAL,JOHNSTON HARPER A,880402012,09/16/2009,980043829,,N,1316174000.0,13,,


In [18]:
for i in sorted(list(groups_clients_sf['Group_Number__c'])):
    if '' in i:
        print(i)

330803325RELAccidentFund
