<a href="https://colab.research.google.com/github/nxxk23/AI-Engineer/blob/main/icd10/redefine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
data = pd.read_csv('/content/drive/MyDrive/AIEngineer/pre-screening/icd10/GraphICD10.csv')
df = data.copy()

In [2]:
import pandas as pd

df_filtered = df[df['ICD-10 Code'].str.len() == 4]
# df_sampled = df_filtered.groupby("Index").apply(lambda x: x.sample(1)).reset_index(drop=True)
# remaining_needed = 100 - len(df_sampled)
# df_remaining = df_filtered[~df_filtered.index.isin(df_sampled.index)].sample(remaining_needed)
# df_final = pd.concat([df_sampled, df_remaining]).sample(frac=1).reset_index(drop=True)  # Shuffle rows
# df_final['Index'].nunique()

In [3]:
df_filtered.shape

(12423, 6)

In [5]:
df_filtered["Last"] = df_filtered["ICD-10 Code"].str[-1]
df_filtered.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered["Last"] = df_filtered["ICD-10 Code"].str[-1]


Unnamed: 0,ICD-10 Code,Descriptions (EN),Descriptions (TH),Index,Two,Three,Last
0,A000,"Cholera due to Vibrio cholerae 01, biovar chol...",อหิวาตกโรคจากเชื้อ วิบริโอ คอเลเร โอ1 ไบโอวาร์...,A,A0,A00,0
1,A001,"Cholera due to Vibrio cholerae 01, biovar eltor",อหิวาตกโรคจากเชื้อ วิบริโอ คอเลเร โอ1 ไบโอวาร์...,A,A0,A00,1
2,A009,"Cholera, unspecified",อหิวาตกโรค ไม่ระบุรายละเอียด,A,A0,A00,9
3,A010,Typhoid fever,ไข้รากสาดน้อย (ไทฟอยด์),A,A0,A01,0
4,A011,Paratyphoid fever A,ไข้รากสาดเทียม (พาราไทฟอยด์) เอ,A,A0,A01,1


In [78]:
import pandas as pd
import json
import requests
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# LLM Parameters
llm_url = "https://ai-api.manageai.co.th/llm-model-02/generate"
llm_headers = {'Content-Type': 'application/json'}
llm_auth = ('manageai2024', 'ManageAI@2024')
model_param = {
    "best_of": 1,
    "frequency_penalty": 1.1,
    "max_new_tokens": 150,
    "repetition_penalty": 1.1,
    "temperature": 0.8,
    "top_k": 10,
    "top_n_tokens": 5,
    "top_p": 0.95,
    "stop": ["assistant"]
}

# LLM Prompt Function
def generate_response(prompt):
    data = json.dumps({"inputs": prompt, "parameters": model_param})
    try:
        response = requests.post(llm_url, data=data, headers=llm_headers, auth=llm_auth, timeout=10)
        if response.status_code == 200:
            response_json = response.json()
            return response_json.get("generated_text", "")
        else:
            print(f"Request failed with status code {response.status_code}")
            return ""
    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        return ""

def extract_refined_description(text):
    match = re.search(r'Refined Description:\s*(.*)', text)
    return match.group(1) if match else None

def refine_query(icd10, description):
    prompt = f"""
    Please take the following input: {icd10} {description}, and provide a more detailed or in-depth description of the condition or diagnosis associated with the ICD10 code.
    Make sure the explanation is concise and approximately 1-2 lines long, comprehensive, and informative. Include key symptoms.
    Format the output as follows:
    Refined Description: <insert detailed explanation here> **never copy this**
        Input: A01.0 Typhoid fever
    Output: Refined Description: Typhoid fever (A01.0) is a bacterial infection caused by Salmonella enterica serotype Typhi.
    It is characterized by a prolonged fever, abdominal pain, and malaise. The infection is transmitted through ingestion of contaminated food or water.
    """
    return generate_response(prompt)

# Batch processing with threading
def batch_process(df, max_workers=5):
    refined_descriptions = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(refine_query, row["ICD-10 Code"], row["Descriptions (EN)"]): index for index, row in df.iterrows()}

        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing rows"):
            index = futures[future]
            try:
                response_text = future.result()
                refined_description = extract_refined_description(response_text)
                refined_descriptions.append((index, refined_description))
            except Exception as e:
                print(f"Error processing row {index}: {e}")
                refined_descriptions.append((index, response_text))

    # Update DataFrame with new refined descriptions
    for index, refined_text in refined_descriptions:
        df.at[index, "Refined Description"] = refined_text

    return df


In [79]:
df = batch_process(df_filtered, max_workers=10)
df

Processing rows: 100%|██████████| 12423/12423 [1:18:30<00:00,  2.64it/s]


Unnamed: 0,ICD-10 Code,Descriptions (EN),Descriptions (TH),Index,Two,Three,Last,Refined Description
0,A000,"Cholera due to Vibrio cholerae 01, biovar chol...",อหิวาตกโรคจากเชื้อ วิบริโอ คอเลเร โอ1 ไบโอวาร์...,A,A0,A00,0,"Cholera due to Vibrio cholerae O1, biovar chol..."
1,A001,"Cholera due to Vibrio cholerae 01, biovar eltor",อหิวาตกโรคจากเชื้อ วิบริโอ คอเลเร โอ1 ไบโอวาร์...,A,A0,A00,1,"Cholera due to Vibrio cholerae 01, biovar elto..."
2,A009,"Cholera, unspecified",อหิวาตกโรค ไม่ระบุรายละเอียด,A,A0,A00,9,"Cholera, unspecified (A00.9) is an acute diarr..."
3,A010,Typhoid fever,ไข้รากสาดน้อย (ไทฟอยด์),A,A0,A01,0,Typhoid fever (A01.0) is a systemic bacterial ...
4,A011,Paratyphoid fever A,ไข้รากสาดเทียม (พาราไทฟอยด์) เอ,A,A0,A01,1,Paratyphoid fever A (A01.1) is a bacterial inf...
...,...,...,...,...,...,...,...,...
38380,Z992,Dependence on renal dialysis,การพึ่งการล้างไต,Z,Z9,Z99,2,Dependence on renal dialysis (Z992) refers to ...
38381,Z993,Dependence on wheelchair,การพึ่งรถเข็นนั่ง,Z,Z9,Z99,3,Dependence on wheelchair (Z993) indicates an i...
38382,Z994,Dependence on artificial heart,การพึ่งหัวใจเทียม,Z,Z9,Z99,4,Dependence on an artificial heart (Z994) refer...
38383,Z998,Dependence on other enabling machines and devices,การพึ่งเครื่องมือและอุปกรณ์อื่น,Z,Z9,Z99,8,"Systemic sclerosis, diffuse type (M34.0), is a..."


In [84]:
df[df['ICD-10 Code']=='K219']['Refined Description'].iloc[0]

'Gastro-oesophageal reflux disease without oesophagitis (K21.9) involves the backflow of stomach contents into the esophagus, causing symptoms such as heartburn, regurgitation, and chest pain, but without visible inflammation of the esophagus lining.'

In [81]:
df.to_csv('บ๊อกๆ.csv', index=False)

In [53]:
import pandas as pd
import json
import requests
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# LLM Parameters
llm_url = "https://ai-api.manageai.co.th/llm-model-02/generate"
llm_headers = {'Content-Type': 'application/json'}
llm_auth = ('manageai2024', 'ManageAI@2024')
model_param = {
    "best_of": 1,
    "frequency_penalty": 1.1,
    "max_new_tokens": 150,
    "repetition_penalty": 1.1,
    "temperature": 0.8,
    "top_k": 10,
    "top_n_tokens": 5,
    "top_p": 0.95,
    "stop": ["assistant"]
}

# LLM Prompt Function
def generate_response(prompt):
    data = json.dumps({"inputs": prompt, "parameters": model_param})
    try:
        response = requests.post(llm_url, data=data, headers=llm_headers, auth=llm_auth, timeout=10)
        if response.status_code == 200:
            response_json = response.json()
            return response_json.get("generated_text", "")
        else:
            print(f"Request failed with status code {response.status_code}")
            return ""  # Return an empty string instead of error
    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        return ""  # Return an empty string on error

def extract_refined_description_only(text):
    match = re.search(r'Refined Description:\s*(.*)', text, re.IGNORECASE)

    if match:
        refined_text = match.group(1).strip()
        return refined_text
    else:
        return text

def refine_query(icd10, description):
    """Generates a refined description for a given ICD-10 code and description."""
    prompt = f"""
    Please take the following input: {icd10} {description}, and provide a more detailed or in-depth description of the condition or diagnosis associated with the ICD10 code.
    Make sure the explanation is concise and approximately 1-2 lines long, comprehensive, and informative. Include key symptoms.
    Format the output as follows:
    Refined Description: <insert detailed explanation here>
    """
    return generate_response(prompt)

# Batch processing with threading
def batch_process(df, max_workers=5):
    refined_descriptions = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submitting all tasks to the executor
        futures = {executor.submit(refine_query, row["ICD-10 Code"], row["Descriptions (EN)"]): index for index, row in df.iterrows()}

        # Collecting the results as they are completed
        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing rows"):
            index = futures[future]
            try:
                response_text = future.result()

                # Extract the refined description or just return the raw response
                refined_description = extract_refined_description(response_text)
                refined_descriptions.append((index, refined_description))
            except Exception as e:
                print(f"Error processing row {index}: {e}")
                refined_descriptions.append((index, response_text))  # Keep the raw response on failure

    # Update DataFrame with new refined descriptions
    for index, refined_text in refined_descriptions:
        df.at[index, "Refined Description"] = refined_text

    return df

In [54]:
re = df[df['Refined Description'] == 'Error processing']
re

Unnamed: 0,ICD-10 Code,Descriptions (EN),Descriptions (TH),Index,Two,Three,Last,Refined Description
150,A309,"Leprosy, unspecified",โรคเรื้อน ไม่ระบุรายละเอียด,A,A3,A30,9,Error processing
335,A829,"Rabies, unspecified",โรคพิษสุนัขบ้า ไม่ระบุรายละเอียด,A,A8,A82,9,Error processing
450,B171,Acute hepatitis C,ตับอักเสบ ซี แบบเฉียบพลัน,B,B1,B17,1,Error processing
525,B351,Tinea unguium,โรคติดเชื้อราที่เล็บ,B,B3,B35,1,Error processing
755,B868,Other scabies (TM),โรคหิดชนิดอื่น,B,B8,B86,8,Error processing
...,...,...,...,...,...,...,...,...
38060,Z576,Occupational exposure to extreme temperatures,การสัมผัสอุณภูมิสุดขั้วจากการทำงาน,Z,Z5,Z57,6,Error processing
38093,Z611,Removal from home in childhood,การจากบ้านในวัยเด็ก,Z,Z6,Z61,1,Error processing
38224,Z841,Family history of disorders of kidney and ureter,ประวัติครอบครัวเป็นโรคไตและทอ่ไต,Z,Z8,Z84,1,Error processing
38239,Z861,Personal history of infectious and parasitic d...,ประวัติส่วนตัวเคยเป็นโรคติดเชื้อและโรคปรสิต,Z,Z8,Z86,1,Error processing


In [55]:
re = df[df['Refined Description'] == 'Error processing']
reprocessed = batch_process(re)

Processing rows:  80%|████████  | 183/228 [01:25<00:18,  2.48it/s]

Error processing row 22285: No axis named  for object type DataFrame


Processing rows:  81%|████████  | 184/228 [01:26<00:23,  1.90it/s]

Error processing row 21965: No axis named  for object type DataFrame


Processing rows:  81%|████████  | 185/228 [01:26<00:19,  2.22it/s]

Error processing row 22549: No axis named  for object type DataFrame


Processing rows:  82%|████████▏ | 187/228 [01:26<00:13,  3.04it/s]

Error processing row 23005: No axis named  for object type DataFrame
Error processing row 23021: No axis named  for object type DataFrame


Processing rows:  82%|████████▏ | 188/228 [01:27<00:17,  2.25it/s]

Error processing row 23613: No axis named  for object type DataFrame


Processing rows:  83%|████████▎ | 189/228 [01:28<00:21,  1.86it/s]

Error processing row 23965: No axis named  for object type DataFrame


Processing rows:  84%|████████▍ | 191/228 [01:28<00:14,  2.57it/s]

Error processing row 24653: No axis named  for object type DataFrame
Error processing row 25213: No axis named  for object type DataFrame


Processing rows:  84%|████████▍ | 192/228 [01:28<00:12,  2.89it/s]

Error processing row 24717: No axis named  for object type DataFrame


Processing rows:  85%|████████▍ | 193/228 [01:29<00:15,  2.23it/s]

Error processing row 25885: No axis named  for object type DataFrame


Processing rows:  86%|████████▌ | 195/228 [01:30<00:13,  2.36it/s]

Error processing row 26589: No axis named  for object type DataFrame
Error processing row 27957: No axis named  for object type DataFrame


Processing rows:  86%|████████▋ | 197/228 [01:31<00:10,  3.01it/s]

Error processing row 27733: No axis named  for object type DataFrame
Error processing row 29053: No axis named  for object type DataFrame


Processing rows:  87%|████████▋ | 198/228 [01:31<00:12,  2.39it/s]

Error processing row 29469: No axis named  for object type DataFrame


Processing rows:  88%|████████▊ | 200/228 [01:33<00:14,  1.95it/s]

Error processing row 31260: No axis named  for object type DataFrame
Error processing row 29525: No axis named  for object type DataFrame


Processing rows:  88%|████████▊ | 201/228 [01:34<00:18,  1.46it/s]

Error processing row 31454: No axis named  for object type DataFrame


Processing rows:  89%|████████▊ | 202/228 [01:34<00:14,  1.76it/s]

Error processing row 30165: No axis named  for object type DataFrame
Error processing row 32950: No axis named  for object type DataFrame


Processing rows:  89%|████████▉ | 204/228 [01:34<00:09,  2.42it/s]

Error processing row 31052: No axis named  for object type DataFrame


Processing rows:  90%|████████▉ | 205/228 [01:35<00:08,  2.63it/s]

Error processing row 33398: No axis named  for object type DataFrame


Processing rows:  90%|█████████ | 206/228 [01:36<00:11,  1.93it/s]

Error processing row 33790: No axis named  for object type DataFrame


Processing rows:  91%|█████████ | 207/228 [01:36<00:09,  2.16it/s]

Error processing row 34086: No axis named  for object type DataFrame


Processing rows:  91%|█████████ | 208/228 [01:36<00:09,  2.03it/s]

Error processing row 33806: No axis named  for object type DataFrame
Error processing row 34614: No axis named  for object type DataFrame


Processing rows:  92%|█████████▏| 210/228 [01:37<00:06,  2.93it/s]

Error processing row 35574: No axis named  for object type DataFrame


Processing rows:  93%|█████████▎| 211/228 [01:38<00:09,  1.76it/s]

Error processing row 36294: No axis named  for object type DataFrame


Processing rows:  93%|█████████▎| 212/228 [01:39<00:08,  1.82it/s]

Error processing row 35670: No axis named  for object type DataFrame


Processing rows:  93%|█████████▎| 213/228 [01:39<00:07,  1.88it/s]

Error processing row 37485: No axis named  for object type DataFrame


Processing rows:  94%|█████████▍| 214/228 [01:39<00:06,  2.23it/s]

Error processing row 37379: No axis named  for object type DataFrame
Error processing row 37474: No axis named  for object type DataFrame


Processing rows:  95%|█████████▍| 216/228 [01:40<00:05,  2.02it/s]

Error processing row 37559: No axis named  for object type DataFrame


Processing rows:  95%|█████████▌| 217/228 [01:41<00:05,  1.97it/s]

Error processing row 37818: No axis named  for object type DataFrame
Error processing row 37621: No axis named  for object type DataFrame


Processing rows:  96%|█████████▌| 219/228 [01:41<00:03,  2.89it/s]

Error processing row 37799: No axis named  for object type DataFrame


Processing rows:  96%|█████████▋| 220/228 [01:42<00:02,  2.74it/s]

Error processing row 37792: No axis named  for object type DataFrame


Processing rows:  97%|█████████▋| 221/228 [01:43<00:03,  1.89it/s]

Error processing row 37984: No axis named  for object type DataFrame


Processing rows:  98%|█████████▊| 223/228 [01:43<00:02,  2.21it/s]

Error processing row 38055: No axis named  for object type DataFrame
Error processing row 38060: No axis named  for object type DataFrame


Processing rows:  98%|█████████▊| 224/228 [01:44<00:01,  2.20it/s]

Error processing row 37993: No axis named  for object type DataFrame
Error processing row 38093: No axis named  for object type DataFrame


Processing rows:  99%|█████████▉| 226/228 [01:45<00:00,  2.21it/s]

Error processing row 38224: No axis named  for object type DataFrame


Processing rows: 100%|██████████| 228/228 [01:46<00:00,  2.40it/s]

Error processing row 38380: No axis named  for object type DataFrame
Error processing row 38239: No axis named  for object type DataFrame


Processing rows: 100%|██████████| 228/228 [01:46<00:00,  2.15it/s]


In [64]:
reprocessed['Refined Description'].iloc[2]

' Key Symptoms: <insert list of key symptoms separated by commas> Refine Description: Acute hepatitis C is an infection caused by the hepatitis C virus (HCV), characterized by inflammation of the liver that occurs within 6 months after exposure to the virus. \nKey Symptoms: jaundice, fatigue, dark urine, abdominal pain, loss of appetite, nausea, fever\n\n---\n\nThis format ensures the information is clear and concise while providing essential details about the condition and its symptoms. If you need any further adjustments, feel free to let me know!'

In [58]:
import re

def extract_refined_description_only(text):
    match = re.search(r'Refined Description:\s*(.*)', text, re.IGNORECASE)

    if match:
        refined_text = match.group(1).strip()
        return refined_text
    else:
        return text

# Extract the description after 'Refined Description:'
reprocessed['Refined Description'] = reprocessed['Refined Description'].apply(extract_refined_description_only)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reprocessed['Refined Description'] = reprocessed['Refined Description'].apply(extract_refined_description_only)


In [65]:
import re

def extract_refined_description_only(text):
    # This pattern captures everything after 'Refined Description:' until it encounters 'Key Symptoms:' or the end of the description
    match = re.search(r'Refined Description:\s*(.*?)(?=\s*Key Symptoms|$)', text, re.IGNORECASE)

    if match:
        refined_text = match.group(1).strip()
        return refined_text
    else:
        return text  # If no match, return the original text

# Example usage in the DataFrame
reprocessed['Refined Description'] = reprocessed['Refined Description'].apply(extract_refined_description_only)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reprocessed['Refined Description'] = reprocessed['Refined Description'].apply(extract_refined_description_only)


In [70]:
# Replace the value at the specific index
reprocessed['Refined Description'].iloc[2] = "Acute hepatitis C is an infection caused by the hepatitis C virus (HCV), characterized by inflammation of the liver that occurs within 6 months after exposure to the virus."
reprocessed['Refined Description'].iloc[2]

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  reprocessed['Refined Description'].iloc[2] = "Acute hepatitis C is an infection caused by the hepatitis C virus (HCV), characterized by inflammation of the liver that occurs within 6 months after exposure to the virus."
A value is trying to be set on a copy of a slice fr

'Acute hepatitis C is an infection caused by the hepatitis C virus (HCV), characterized by inflammation of the liver that occurs within 6 months after exposure to the virus.'

In [74]:
df[df['Refined Description'] == 'Error processing']

Unnamed: 0,ICD-10 Code,Descriptions (EN),Descriptions (TH),Index,Two,Three,Last,Refined Description
0,A000,"Cholera due to Vibrio cholerae 01, biovar chol...",อหิวาตกโรคจากเชื้อ วิบริโอ คอเลเร โอ1 ไบโอวาร์...,A,A0,A00,0,
1,A001,"Cholera due to Vibrio cholerae 01, biovar eltor",อหิวาตกโรคจากเชื้อ วิบริโอ คอเลเร โอ1 ไบโอวาร์...,A,A0,A00,1,
2,A009,"Cholera, unspecified",อหิวาตกโรค ไม่ระบุรายละเอียด,A,A0,A00,9,
3,A010,Typhoid fever,ไข้รากสาดน้อย (ไทฟอยด์),A,A0,A01,0,
4,A011,Paratyphoid fever A,ไข้รากสาดเทียม (พาราไทฟอยด์) เอ,A,A0,A01,1,
...,...,...,...,...,...,...,...,...
12418,Z992,Dependence on renal dialysis,การพึ่งการล้างไต,Z,Z9,Z99,2,Z992 refers to a chronic medical condition whe...
12419,Z993,Dependence on wheelchair,การพึ่งรถเข็นนั่ง,Z,Z9,Z99,3,
12420,Z994,Dependence on artificial heart,การพึ่งหัวใจเทียม,Z,Z9,Z99,4,
12421,Z998,Dependence on other enabling machines and devices,การพึ่งเครื่องมือและอุปกรณ์อื่น,Z,Z9,Z99,8,


In [71]:
reprocessed

Unnamed: 0,ICD-10 Code,Descriptions (EN),Descriptions (TH),Index,Two,Three,Last,Refined Description
150,A309,"Leprosy, unspecified",โรคเรื้อน ไม่ระบุรายละเอียด,A,A3,A30,9,A309 refers to leprosy (Hansen's disease) wher...
335,A829,"Rabies, unspecified",โรคพิษสุนัขบ้า ไม่ระบุรายละเอียด,A,A8,A82,9,"Rabies, unspecified refers to an infection by ..."
450,B171,Acute hepatitis C,ตับอักเสบ ซี แบบเฉียบพลัน,B,B1,B17,1,Acute hepatitis C is an infection caused by th...
525,B351,Tinea unguium,โรคติดเชื้อราที่เล็บ,B,B3,B35,1,"Tinea unguium, also known as onychomycosis, is..."
755,B868,Other scabies (TM),โรคหิดชนิดอื่น,B,B8,B86,8,"Other forms of scabies, including those that d..."
...,...,...,...,...,...,...,...,...
38060,Z576,Occupational exposure to extreme temperatures,การสัมผัสอุณภูมิสุดขั้วจากการทำงาน,Z,Z5,Z57,6,Occupational exposure to extreme temperatures ...
38093,Z611,Removal from home in childhood,การจากบ้านในวัยเด็ก,Z,Z6,Z61,1,Z611 indicates an individual's history of remo...
38224,Z841,Family history of disorders of kidney and ureter,ประวัติครอบครัวเป็นโรคไตและทอ่ไต,Z,Z8,Z84,1,This code indicates a family history of kidney...
38239,Z861,Personal history of infectious and parasitic d...,ประวัติส่วนตัวเคยเป็นโรคติดเชื้อและโรคปรสิต,Z,Z8,Z86,1,Z861 indicates a personal history of infectiou...


In [None]:
df3= df[df['ICD-10 Code'].str.len() == 3]
df3.shape

'Typhoid fever (A01.0) is a bacterial infection causing prolonged fever, abdominal pain, and malaise, transmitted via contaminated food or water.'

In [None]:
df3= df_filtered[df_filtered['ICD-10 Code'].str.len() == 3]
