In [2]:
import os
os.chdir('/mnt/c/Users/ralvin/OneDrive - Reliant Health Partners/Documents/RHP_dev_RA/Automate_skyvia')
import pandas as pd
from simple_salesforce import Salesforce, SalesforceLogin
import pyodbc, sys, time 
import requests
import numpy as np
from datetime import datetime
from helper_functions_v4 import create_df, updated_data_pull, upsert, delete_record, insert_records, salesforce_connection, read_sftp_data, read_sftp_directory, data_pull
import configparser
import json
import paramiko
import io

pd.set_option('display.max_rows', None) 
pd.set_option('display.max_columns', None) 


##### retrieve source data
config = configparser.ConfigParser()
config.read('config_v2.ini')
sftp_config = config['sftp']
mapping_config = config['mappings']
host = sftp_config.get('host')
port = sftp_config.getint('port', fallback=22) 
username = sftp_config.get('user')
password = sftp_config.get('password')
remote_dir = sftp_config.get('remote_path')
json_path = mapping_config.get('json_file_path')



Source_data = read_sftp_directory(remote_dir, host, port, username, password) ### list of all source dataframes
for i in Source_data:
    print(i)
    print(f'Source data shape: {Source_data[i].shape}')

## sf connection
sf = salesforce_connection(sandbox=True)
sf_prod = salesforce_connection(sandbox=False)


# 13040, 23266 after first file ingested
# 14167, 25784 after second file ingested
# ~35 min to ingest both files

Connection established successfully!
Files in directory: ['837I00111182024.csv', '837P00111182024.csv']
File 837I00111182024.csv read into DataFrame successfully
File 837P00111182024.csv read into DataFrame successfully
Connection closed.
837I00111182024.csv
Source data shape: (446, 42)
837P00111182024.csv
Source data shape: (3106, 42)
Connected to Salesforce sandbox
Connected to Salesforce Prod


In [3]:
def ingest_data(json_path, Source_data, sf):
    ## get data mappings 
    with open(json_path, 'r') as f:
        data = json.load(f)
    mappings = {k:v for k,v in data.items()}
    Claims_map = mappings['Claim_Object_Map']
    Account_map = mappings['Account_Object_Map']
    Povider_map = mappings['Provider_Object_Map']
    lines_map = mappings['Line_Item_Object_Map']

    Errors = {}
    for Source_doc_name in list(Source_data.keys())[:]:
        Source_doc = Source_data[Source_doc_name]
        print(Source_doc_name)
        print(Source_doc.shape)
        print('')
        print('')
        print('')


        ##upsert account and provider records
        map_account_table, _ = create_df(Account_map, Source_doc, sf)
        map_provider_table, keys = create_df(Povider_map, Source_doc, sf)
        print('upserting to providers ... ')
        new_ids, errors = upsert(map_provider_table.drop_duplicates(), keys, sf)
        Errors[(Source_doc_name, 'providers_obj')] = errors ### need to replace source doc with actual string representation
        print('')
        print('________________________________________________________________________________________')


        ##### populate claims obj on sf
        map_claims_table, keys = create_df(Claims_map, Source_doc, sf)
        print('Inserting claims data ... ')
        new_ids_claims, errors = insert_records(map_claims_table.drop_duplicates(), keys, sf) ###insert new claims records
        Errors[(Source_doc_name, 'claims_obj')] = errors
        print('')
        print('________________________________________________________________________________________')

        ##### populate lines obj on sf
        map_lines_table, keys = create_df(lines_map, Source_doc, sf, new_ids_claims)
        print('Inserting lines data ... ')
        new_ids_lines, errors = insert_records(map_lines_table.drop_duplicates(), keys, sf) ###insert new line records
        Errors[(Source_doc_name, 'lines_obj')] = errors
        print('')
        print('________________________________________________________________________________________')

    return Errors

Errors = ingest_data(json_path, Source_data, sf)

837I00111182024.csv
(446, 42)



upserting to providers ... 
Total records inserted: 62
Total errors logged: 0

________________________________________________________________________________________
pulling updated Provider_TIN__c table
pulled Provider_TIN__c
Merged TIN
pulling updated Groups_Clients__c table
pulled Groups_Clients__c
Merged Patient Group/Policy Number
pulling updated Jurisdiction__c table
pulled Jurisdiction__c
Merged JurisdictionState
pulling updated DRG__c table
pulled DRG__c
Merged DRG
pulling updated Provider_Specialty__c table
pulled Provider_Specialty__c
Merged Billing Provider Taxonomy
Inserting claims data ... 
Total records inserted: 103
Total errors logged: 6

________________________________________________________________________________________
pulling updated Claims__c table
pulled data slice!
Merged Claim ID
pulling updated HCPCS_CPT_Code__c table
pulled HCPCS_CPT_Code__c
Merged HCPCS/CPT Code
Inserting lines data ... 
Total records inserted: 425
Total

In [7]:
list(Errors.keys())

[('837I00111182024.csv', 'providers_obj'),
 ('837I00111182024.csv', 'claims_obj'),
 ('837I00111182024.csv', 'lines_obj'),
 ('837P00111182024.csv', 'providers_obj'),
 ('837P00111182024.csv', 'claims_obj'),
 ('837P00111182024.csv', 'lines_obj')]

In [8]:
claims_sf = updated_data_pull(['Claims__c'], sf)
Line_items_sf = updated_data_pull(['Line_Items__c'], sf)
print(claims_sf.shape) ## original shape is 12,937
print(Line_items_sf.shape) ## original shape is 22,841

# 13040, 23266 after first file ingested
# 14167, 25784 after second file ingested
# 14270, 26209 after both files ingested

pulled Claims__c
pulled Line_Items__c
(14270, 101)
(26209, 49)


In [9]:
### delete all cases 




def get_source_ids(Source_data_df):
    source_cases = Source_data_df.drop_duplicates(subset=['Claim ID'])[['Claim ID']]
    ids = list(source_cases.merge(updated_data_pull(['Claims__c'], sf), left_on='Claim ID', right_on='Claim_ID__c').loc[:, ['Claim ID', 'Claim_ID__c', 'Id']]['Id'])
    keys = {'Claims__c':'Claim_ID__c'}
    print(len(ids))
    delete_record(ids, keys, sf)


#### delete all claims from all files ingested
for key in Source_data:
    print(key)
    Source_data_df = Source_data[key]
    get_source_ids(Source_data_df)

837I00111182024.csv
pulled Claims__c
103
Object: Claims__c, Records to process: 103
All records deleted successfully.
837P00111182024.csv
pulled Claims__c
1230
Object: Claims__c, Records to process: 1230
All records deleted successfully.
