In [4]:
# ICIS Claim Data Processing Tutorial
# Step-by-Step Guide for ICIS Claim Data Processing

## Table of Contents
# 1. Setup and Data Loading
#    - Import Libraries
#    - Load Claim Data
#
# 2. Step-by-Step Processing
#    2.1 Data Validation
#    2.2 Data Cleansing
#    2.3 Data Preparation
#    2.4 Date Calculations
#    2.5 Final Merge
#
# 3. Complete Pipeline Processing
#    - Using icis.process()
#    - Error Handling
#    - Results Comparison

## 1. Setup and Data Loading

# Import required libraries
import pandas as pd
from underwriter.icis import ICIS

# Load icis claim data
claim = pd.read_csv('data/claim.csv')
print("Initial claim data:")
print("Shape:", claim.shape)
print("\nColumns:", claim.columns.tolist())
print("\nFirst few rows:")
display(claim.head())

# Load main disease classification data
main = pd.read_csv('data/main.csv')
print("\nMain reference data:")
print("Shape:", main.shape)
print("\nColumns:", main.columns.tolist())
print("\nFirst few rows:")
display(main.head())

## 2. Step-by-Step Processing

# Initialize ICIS processor
icis = ICIS(claim=claim, main=main)

# 2.1 Data Validation
print("\n2.1 Data Validation")
print("-----------------")
try:
    icis.validate_columns()
    print("✓ Column validation successful")
except ValueError as e:
    print(f"✗ Validation error: {e}")

# 2.2 Data Cleansing
print("\n2.2 Data Cleansing")
print("----------------")

print("• Initial claim shape:", icis.claim.shape)
display(icis.claim.head())

print("\n1) Removing duplicates...")
icis.drop_duplicates()
print("• Shape after deduplication:", icis.claim.shape)
display(icis.claim.head())

print("\n2) Forward filling KCD codes...")
icis.fill_kcd_forward()
print("• Shape after forward fill:", icis.filled.shape)
display(icis.filled.head())

print("\n3) Filtering by claim date...")
icis.filter_by_clm_date()
print("• Shape after date filtering:", icis.filled.shape)
display(icis.filled.head())

# 2.3 Data Preparation
print("\n2.3 Data Preparation")
print("------------------")

print("1) Setting medical care types...")
icis.set_type()
print("• Data with medical care types:")
display(icis.filled[['id', 'clm_date', 'type']].head())

print("\n2) Modifying hospital end dates...")
icis.set_hos_edate_mod()
print("• Data with modified hospital end dates:")
display(icis.filled[['id', 'hos_edate', 'hos_edate_mod']].head())

print("\n3) Converting to long format...")
icis.melt()
print("• Melted data shape:", icis.melted.shape)
display(icis.melted.head())

print("\n4) Processing KCD information...")
icis.set_sub_kcd()
icis.merge_main_info()
icis.filter_sub_kcd()
print("• Shape after KCD processing:", icis.melted.shape)
display(icis.melted.head())

# 2.4 Date Calculations
print("\n2.4 Date Calculations")
print("------------------")

print("1) Setting date ranges...")
icis.set_date_range()

print("\n2) Calculating hospitalization days...")
icis.calc_hos_day()
print("• Hospitalized data shape:", icis.hospitalized.shape)
display(icis.hospitalized.head())

print("\n3) Calculating surgery counts...")
icis.calc_sur_cnt()
print("• Surgery data shape:", icis.underwent.shape)
display(icis.underwent.head())

print("\n4) Calculating elapsed days...")
icis.calc_elp_day()
print("• Elapsed days data shape:", icis.elapsed.shape)
display(icis.elapsed.head())

# 2.5 Final Merge
print("\n2.5 Final Merge")
print("-------------")
step_result = icis.merge_calculated()
print("• Final result shape:", step_result.shape)
print("• Final columns:", step_result.columns.tolist())
display(step_result.head())

## 3. Complete Pipeline Processing
print("\n3. Complete Pipeline Processing")
print("-----------------------------")

# Initialize new ICIS instance
icis_pipeline = ICIS(claim=claim, main=main)

# Process ICIS claim data using complete pipeline
print("Processing ICIS claim data using icis.process()...")
try:
    pipeline_result = icis_pipeline.process()
    print("\n✓ Processing completed successfully!")
    print("• Final result shape:", pipeline_result.shape)
    
    # Compare results
    print("\nResults Comparison:")
    print("• Step-by-step shape:", step_result.shape)
    print("• Pipeline shape:", pipeline_result.shape)
    
    are_equal = step_result.equals(pipeline_result)
    print(f"\n✓ Results are identical: {are_equal}")
    
    if not are_equal:
        print("\nDifferences in columns:")
        print(set(step_result.columns) ^ set(pipeline_result.columns))

except ValueError as e:
    print(f"\n✗ Validation error: {e}")
except RuntimeError as e:
    print(f"\n✗ Processing error: {e}")
except Exception as e:
    print(f"\n✗ Unexpected error: {e}")

Initial claim data:
Shape: (11, 13)

Columns: ['id', 'kcd0', 'kcd1', 'kcd2', 'kcd3', 'kcd4', 'clm_date', 'hos_sdate', 'hos_edate', 'hos_day', 'hos_cnt', 'out_cnt', 'sur_cnt']

First few rows:


Unnamed: 0,id,kcd0,kcd1,kcd2,kcd3,kcd4,clm_date,hos_sdate,hos_edate,hos_day,hos_cnt,out_cnt,sur_cnt
0,100000001,,M51,,C44,,20150102,20150102.0,20150108.0,4,1,0,0
1,100000001,,M51,,C44,,20150102,20150102.0,20150108.0,4,1,0,0
2,100000001,S33,G551,,,,20150102,20150102.0,20150105.0,0,2,2,0
3,100000001,M512,,,,,20200901,,,0,0,0,2
4,100000001,S33,M54,M513,,,20220802,20220802.0,20220806.0,5,0,0,0



Main reference data:
Shape: (10, 3)

Columns: ['kcd', 'kcd_main', 'sub_chk']

First few rows:


Unnamed: 0,kcd,kcd_main,sub_chk
0,C73,C73,1
1,D12,D12,1
2,K20,K20,0
3,M51,M51,1
4,M512,M51,1



2.1 Data Validation
-----------------
✓ Column validation successful

2.2 Data Cleansing
----------------
• Initial claim shape: (11, 13)


Unnamed: 0,id,kcd0,kcd1,kcd2,kcd3,kcd4,clm_date,hos_sdate,hos_edate,hos_day,hos_cnt,out_cnt,sur_cnt
0,100000001,,M51,,C44,,2015-01-02,2015-01-02,2015-01-08,4,1,0,0
1,100000001,,M51,,C44,,2015-01-02,2015-01-02,2015-01-08,4,1,0,0
2,100000001,S33,G551,,,,2015-01-02,2015-01-02,2015-01-05,0,2,2,0
3,100000001,M512,,,,,2020-09-01,NaT,NaT,0,0,0,2
4,100000001,S33,M54,M513,,,2022-08-02,2022-08-02,2022-08-06,5,0,0,0



1) Removing duplicates...
• Shape after deduplication: (10, 13)


Unnamed: 0,id,kcd0,kcd1,kcd2,kcd3,kcd4,clm_date,hos_sdate,hos_edate,hos_day,hos_cnt,out_cnt,sur_cnt
0,100000001,,M51,,C44,,2015-01-02,2015-01-02,2015-01-08,4,1,0,0
1,100000001,S33,G551,,,,2015-01-02,2015-01-02,2015-01-05,0,2,2,0
2,100000001,M512,,,,,2020-09-01,NaT,NaT,0,0,0,2
3,100000001,S33,M54,M513,,,2022-08-02,2022-08-02,2022-08-06,5,0,0,0
4,100000001,S33,M54,D12,K20,M514,2022-08-02,2022-08-04,2022-08-04,1,0,0,0



2) Forward filling KCD codes...
• Shape after forward fill: (10, 13)


Unnamed: 0,id,kcd0,kcd1,kcd2,kcd3,kcd4,clm_date,hos_sdate,hos_edate,hos_day,hos_cnt,out_cnt,sur_cnt
0,100000001,M51,C44,,,,2015-01-02,2015-01-02,2015-01-08,4,1,0,0
1,100000001,S33,G551,,,,2015-01-02,2015-01-02,2015-01-05,0,2,2,0
2,100000001,M512,,,,,2020-09-01,NaT,NaT,0,0,0,2
3,100000001,S33,M54,M513,,,2022-08-02,2022-08-02,2022-08-06,5,0,0,0
4,100000001,S33,M54,D12,K20,M514,2022-08-02,2022-08-04,2022-08-04,1,0,0,0



3) Filtering by claim date...
• Shape after date filtering: (8, 13)


Unnamed: 0,id,kcd0,kcd1,kcd2,kcd3,kcd4,clm_date,hos_sdate,hos_edate,hos_day,hos_cnt,out_cnt,sur_cnt
0,100000001,M512,,,,,2020-09-01,NaT,NaT,0,0,0,2
1,100000001,S33,M54,M513,,,2022-08-02,2022-08-02,2022-08-06,5,0,0,0
2,100000001,S33,M54,D12,K20,M514,2022-08-02,2022-08-04,2022-08-04,1,0,0,0
3,100000001,M51,,,,,2022-08-02,2022-08-04,2022-08-07,4,0,0,0
4,100000001,M51,W00,,,,2024-02-05,2024-02-08,2024-02-09,2,0,0,1



2.3 Data Preparation
------------------
1) Setting medical care types...
• Data with medical care types:


Unnamed: 0,id,clm_date,type
0,100000001,2020-09-01,sur
1,100000001,2022-08-02,hos
2,100000001,2022-08-02,hos
3,100000001,2022-08-02,hos
4,100000001,2024-02-05,hos/sur



2) Modifying hospital end dates...
• Data with modified hospital end dates:


Unnamed: 0,id,hos_edate,hos_edate_mod
0,100000001,NaT,NaT
1,100000001,2022-08-06,2022-08-06
2,100000001,2022-08-04,2022-08-04
3,100000001,2022-08-07,2022-08-07
4,100000001,2024-02-09,2024-02-09



3) Converting to long format...
• Melted data shape: (18, 12)


Unnamed: 0,id,clm_date,hos_sdate,hos_edate,hos_edate_mod,hos_day,hos_cnt,out_cnt,sur_cnt,type,kcd_ord,kcd
0,100000001,2020-09-01,NaT,NaT,NaT,0,0,0,2,sur,0,M512
1,100000001,2022-08-02,2022-08-02,2022-08-06,2022-08-06,5,0,0,0,hos,0,S33
2,100000001,2022-08-02,2022-08-04,2022-08-04,2022-08-04,1,0,0,0,hos,0,S33
3,100000001,2022-08-02,2022-08-04,2022-08-07,2022-08-07,4,0,0,0,hos,0,M51
4,100000001,2024-02-05,2024-02-08,2024-02-09,2024-02-09,2,0,0,1,hos/sur,0,M51



4) Processing KCD information...
• Shape after KCD processing: (16, 15)


Unnamed: 0,id,clm_date,hos_sdate,hos_edate,hos_edate_mod,hos_day,hos_cnt,out_cnt,sur_cnt,type,kcd_ord,kcd,sub_kcd,kcd_main,sub_chk
0,100000001,2020-09-01,NaT,NaT,NaT,0,0,0,2,sur,0,M512,0,M51,1
1,100000001,2022-08-02,2022-08-02,2022-08-06,2022-08-06,5,0,0,0,hos,0,S33,0,S33,1
2,100000001,2022-08-02,2022-08-04,2022-08-04,2022-08-04,1,0,0,0,hos,0,S33,0,S33,1
3,100000001,2022-08-02,2022-08-04,2022-08-07,2022-08-07,4,0,0,0,hos,0,M51,0,M51,1
4,100000001,2024-02-05,2024-02-08,2024-02-09,2024-02-09,2,0,0,1,hos/sur,0,M51,0,M51,1



2.4 Date Calculations
------------------
1) Setting date ranges...

2) Calculating hospitalization days...
• Hospitalized data shape: (4, 3)


Unnamed: 0,id,kcd_main,hos_day
0,100000001,D12,1
1,100000001,M51,8
2,100000001,M54,5
3,100000001,S33,5



3) Calculating surgery counts...
• Surgery data shape: (1, 3)


Unnamed: 0,id,kcd_main,sur_cnt
0,100000001,M51,2



4) Calculating elapsed days...
• Elapsed days data shape: (5, 4)


Unnamed: 0,id,kcd_main,elp_day_si,elp_day_std
0,100000001,C73,,264.0
1,100000001,D12,900.0,900.0
2,100000001,M51,346.0,264.0
3,100000001,M54,898.0,898.0
4,100000001,S33,898.0,898.0



2.5 Final Merge
-------------
• Final result shape: (5, 6)
• Final columns: ['id', 'kcd_main', 'hos_day', 'sur_cnt', 'elp_day_si', 'elp_day_std']


Unnamed: 0,id,kcd_main,hos_day,sur_cnt,elp_day_si,elp_day_std
0,100000001,C73,0.0,0.0,,264.0
1,100000001,D12,1.0,0.0,900.0,900.0
2,100000001,M51,8.0,2.0,346.0,264.0
3,100000001,M54,5.0,0.0,898.0,898.0
4,100000001,S33,5.0,0.0,898.0,898.0



3. Complete Pipeline Processing
-----------------------------
Processing ICIS claim data using icis.process()...

✓ Processing completed successfully!
• Final result shape: (5, 6)

Results Comparison:
• Step-by-step shape: (5, 6)
• Pipeline shape: (5, 6)

✓ Results are identical: True


In [5]:
print(icis.__doc__)


    ICIS (Insurance Credit Information System) Claim Information

    Abbreviations:
        uw:  Underwriting - Insurance policy evaluation process
        clm: Claim - Insurance claim submission
        hos: Hospitalization - Medical treatment usually requiring overnight stay (including same-day stay)
        sur: Surgery - Surgical procedures performed
        out: Outpatient - Medical treatment without overnight stay
        elp: Elapsed - Time passed since an event
        day: Day - Number of days
        cnt: Count - Number of occurrences
        mod: Modified - Adjusted or changed value
        
    Column Naming Convention:
        hos_day: Total days of hospitalization per case
        hos_cnt: Number of separate hospitalization events
        sur_cnt: Number of surgical procedures performed
        elp_day: Number of days elapsed since most recent occurrence
        hos_edate_mod: Modified hospital discharge date

    Instance Variables:
        filled: DataFrame with forwa

In [6]:
print(ICIS.__doc__)


    ICIS (Insurance Credit Information System) Claim Information

    Abbreviations:
        uw:  Underwriting - Insurance policy evaluation process
        clm: Claim - Insurance claim submission
        hos: Hospitalization - Medical treatment usually requiring overnight stay (including same-day stay)
        sur: Surgery - Surgical procedures performed
        out: Outpatient - Medical treatment without overnight stay
        elp: Elapsed - Time passed since an event
        day: Day - Number of days
        cnt: Count - Number of occurrences
        mod: Modified - Adjusted or changed value
        
    Column Naming Convention:
        hos_day: Total days of hospitalization per case
        hos_cnt: Number of separate hospitalization events
        sur_cnt: Number of surgical procedures performed
        elp_day: Number of days elapsed since most recent occurrence
        hos_edate_mod: Modified hospital discharge date

    Instance Variables:
        filled: DataFrame with forwa