In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('./csv/Doctor_Asiful_Sk.csv')

In [3]:
df

Unnamed: 0,Doctor ID,Symptom,Prescriptions,Dignosis,Notes
0,60897,Bodyache,CALPOL 500MG (50654); PARACETAMOL (500MG) (51603),(),No sugar - no sweet in diet. to take rice & po...
1,60897,Fever,CALPOL 500MG (50654); PARACETAMOL (500MG) (51603),(),No sugar - no sweet in diet. to take rice & po...
2,60897,Chest pain,CALPOL 500MG (50654); PARACETAMOL (500MG) (51603),(),
3,60897,Fever,CALPOL 500MG (50654); PARACETAMOL (500MG) (51603),(),
4,60897,Headache,CALPOL 500MG (50654); PARACETAMOL (500MG) (51603),(),
...,...,...,...,...,...
543,60897,dryness of mouth,Rest DSR Capsule (157441),"('ACS - Acute coronary syndrome', 'UTI - Urina...",No sugar - no sweet in diet. to take rice & po...
544,60897,Burning Pain On Urination,Rest DSR Capsule (157441),"('ACS - Acute coronary syndrome', 'UTI - Urina...",No sugar - no sweet in diet. to take rice & po...
545,60897,Abdominal pain,,"('ACUTE VIRAL FEVER', 'Enteric fever', 'UTI - ...",
546,60897,Vomiting,,"('ACUTE VIRAL FEVER', 'Enteric fever', 'UTI - ...",


In [4]:
# Encoding categorical variables
le_symptom = LabelEncoder()
df['Symptom_encoded'] = le_symptom.fit_transform(df['Symptom'])

le_medicine = LabelEncoder()#Encon
df['Medicine_encoded'] = le_medicine.fit_transform(df['Prescriptions'])

le_notes = LabelEncoder()
df['Notes_encoded'] = le_notes.fit_transform(df['Notes'])

le_dignosis = LabelEncoder()
df['Dignosis_encoded'] = le_dignosis.fit_transform(df['Dignosis'])

In [5]:
def data_split(data, ratio):
    np.random.seed(42)
    shuffled = np.random.permutation(len(data))
    test_set_size = int(len(data) * ratio)
    test_indices = shuffled[:test_set_size]
    train_indices = shuffled[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

In [6]:
train, test = data_split(df, 0.3)

In [7]:
X_train = train[['Symptom_encoded']].to_numpy()
X_test = test[['Symptom_encoded']].to_numpy()

In [8]:
Y_train = train[['Medicine_encoded','Notes_encoded','Dignosis_encoded']].to_numpy()
Y_test = test[['Medicine_encoded','Notes_encoded','Dignosis_encoded']].to_numpy()

In [9]:
med_model = RandomForestClassifier(n_estimators=100, random_state=42)
med_model.fit(X_train, Y_train)

In [10]:
# Assume symptoms_list is a list of symptoms
symptoms_list = ['Bodyache']

# Initialize lists to store predictions for each symptom
predicted_notes_encoded_list = []
predicted_medicine_encoded_list = []
predicted_diagnosis_encoded_list = []

# Loop through each symptom
for symptom in symptoms_list:
    # Encode the symptom
    new_symptom_encoded = le_symptom.transform([symptom])
    
    # Predict notes, medicine, and diagnosis for the symptom
    predicted_notes_encoded = med_model.predict([new_symptom_encoded])
    predicted_medicine_encoded = med_model.predict([new_symptom_encoded])
    predicted_diagnosis_encoded = med_model.predict([new_symptom_encoded])
    
    # Append predictions to lists
    predicted_notes_encoded_list.append(predicted_notes_encoded)
    predicted_medicine_encoded_list.append(predicted_medicine_encoded)
    predicted_diagnosis_encoded_list.append(predicted_diagnosis_encoded)

predicted_notes_encoded_list = np.reshape(predicted_notes_encoded_list, (-1,))
predicted_medicine_encoded_list = np.reshape(predicted_medicine_encoded_list, (-1,))
predicted_diagnosis_encoded_list = np.reshape(predicted_diagnosis_encoded_list, (-1,))

# Handle unseen labels
predicted_notes_encoded_list = np.where(predicted_notes_encoded_list < le_notes.classes_.shape[0], predicted_notes_encoded_list, 0)
predicted_medicine_encoded_list = np.where(predicted_medicine_encoded_list < le_medicine.classes_.shape[0], predicted_medicine_encoded_list, 0)
predicted_diagnosis_encoded_list = np.where(predicted_diagnosis_encoded_list < le_dignosis.classes_.shape[0], predicted_diagnosis_encoded_list, 0)

In [11]:
# Decode the predictions
predicted_notes = le_notes.inverse_transform(predicted_notes_encoded_list)
predicted_medicine = le_medicine.inverse_transform(predicted_medicine_encoded_list)
predicted_diagnosis = le_dignosis.inverse_transform(predicted_diagnosis_encoded_list)

In [12]:
print(f"Predicted Medicine: {predicted_medicine}")
print(f"predicted_notes: {predicted_notes}")
print(f"predicted_dignosis: {predicted_diagnosis}")

Predicted Medicine: [nan
 'Alastin 10Mg Tablet (157452); Azee 500 Tablet (157450); Coldmine Tablet (157451); Dolo 650 Tablet (157440); GRELIREX SYP (157453)'
 'Arkamin Tablet (157527); Micatel-M 40Mg/50Mg Tablet (157526); Stresnil 0.5 Tablet (157456)']
predicted_notes: ['Eat less oily-chili-spicy food items. do not eat outside food.' nan
 'Eat less oily-chili-spicy food items. do not eat outside food.']
predicted_dignosis: ["('3rd Degree Internal Piles',)"
 "('ACS - Acute coronary syndrome', 'UTI - Urinary tract infection')" '()']


In [13]:
test_predictions = med_model.predict(X_test)

In [14]:
# Calculate accuracy for each target variable
accuracy_medicine = accuracy_score(Y_test[:, 0], test_predictions[:, 0])
accuracy_notes = accuracy_score(Y_test[:, 1], test_predictions[:, 1])
accuracy_diagnosis = accuracy_score(Y_test[:, 2], test_predictions[:, 2])


In [15]:
overall_accuracy = (accuracy_medicine + accuracy_notes + accuracy_diagnosis) / 3


In [16]:
print("Accuracy for Medicine:", accuracy_medicine)
print("Accuracy for Notes:", accuracy_notes)
print("Accuracy for Diagnosis:", accuracy_diagnosis)
print("Overall Accuracy:", overall_accuracy)

Accuracy for Medicine: 0.5060975609756098
Accuracy for Notes: 0.9451219512195121
Accuracy for Diagnosis: 0.8597560975609756
Overall Accuracy: 0.7703252032520326


In [18]:
print(type(predicted_notes))

<class 'numpy.ndarray'>


In [19]:
unique_array = np.unique(predicted_notes)

TypeError: '<' not supported between instances of 'float' and 'str'

In [None]:
print(unique_array)

In [22]:
original_array = np.array(['"Bell\'s palsy", \'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA' '"GRAVES\' DISEASE", \'H/O DIFFUSE THYROIDITIS' "3 MONTH GESTATION', 'PRIMARY HYPOTHYROIDISM" "ANEMIA', 'CKD', 'DIABETIC RETINOPATHY', 'H/O- AMPUTATION', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA" "ANEMIA', 'HYPERURICEMIA', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA" "PRIMARY HYPOTHYROIDISM'," "TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA'," "TYPE 2 DIABETES MELLITUS',"])

# Remove duplicates
unique_array = np.unique(original_array)

print(unique_array)

['"Bell\'s palsy", \'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA"GRAVES\' DISEASE", \'H/O DIFFUSE THYROIDITIS3 MONTH GESTATION\', \'PRIMARY HYPOTHYROIDISMANEMIA\', \'CKD\', \'DIABETIC RETINOPATHY\', \'H/O- AMPUTATION\', \'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIAANEMIA\', \'HYPERURICEMIA\', \'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIAPRIMARY HYPOTHYROIDISM\',TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA\',TYPE 2 DIABETES MELLITUS\',']


In [23]:
print(type(original_array))

<class 'numpy.ndarray'>


In [44]:
original_list = ["ANEMIA', 'HYPERURICEMIA', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA", "3 MONTH GESTATION', 'PRIMARY HYPOTHYROIDISM", "TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA',", "ANEMIA', 'CKD', 'DIABETIC RETINOPATHY', 'H/O- AMPUTATION', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA", '"GRAVES\' DISEASE", \'H/O DIFFUSE THYROIDITIS', "PRIMARY HYPOTHYROIDISM',", '"Bell\'s palsy", \'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA', "3 MONTH GESTATION', 'PRIMARY HYPOTHYROIDISM", "TYPE 2 DIABETES MELLITUS',"]

list = []
for item in original_list:
    


# Clean each element
cleaned_list = [element.strip(" '\",/") for element in unique_list]
print(cleaned_list)
print(cleaned_list[0])


TypeError: 'list' object is not callable

In [41]:
# Your list
data_list = [["GRAVES' DISEASE", 'H/O DIFFUSE THYROIDITIS'], ["Bell's palsy", 'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA'], ['ANEMIA', 'CKD', 'DIABETIC RETINOPATHY', 'H/O- AMPUTATION', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA'], ['PRIMARY HYPOTHYROIDISM'], ['3 MONTH GESTATION', 'PRIMARY HYPOTHYROIDISM'], ['TYPE 2 DIABETES MELLITUS'], ['TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA'], ['ANEMIA', 'HYPERURICEMIA', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA']]

# Set to store unique elements
unique_elements = set()

# List to store cleaned sublists
cleaned_list = []

for sublist in data_list:
    cleaned_sublist = []
    for item in sublist:
        cleaned_item = item.strip(" '\"/\\")
        # Check if the cleaned item is unique
        if cleaned_item not in unique_elements:
            cleaned_sublist.append(cleaned_item)
            unique_elements.add(cleaned_item)
    # Add the cleaned sublist to the cleaned list
    cleaned_list.append(cleaned_sublist)

print(cleaned_list)
print(" ")

comman_list=[]
for sublist in cleaned_list:
    for item in sublist:
        comman_list.append(item)
print(comman_list)

[["GRAVES' DISEASE", 'H/O DIFFUSE THYROIDITIS'], ["Bell's palsy", 'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA'], ['ANEMIA', 'CKD', 'DIABETIC RETINOPATHY', 'H/O- AMPUTATION', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA'], ['PRIMARY HYPOTHYROIDISM'], ['3 MONTH GESTATION'], ['TYPE 2 DIABETES MELLITUS'], [], ['HYPERURICEMIA']]
 
["GRAVES' DISEASE", 'H/O DIFFUSE THYROIDITIS', "Bell's palsy", 'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA', 'ANEMIA', 'CKD', 'DIABETIC RETINOPATHY', 'H/O- AMPUTATION', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', 'PRIMARY HYPOTHYROIDISM', '3 MONTH GESTATION', 'TYPE 2 DIABETES MELLITUS', 'HYPERURICEMIA']


In [39]:
# Your list
data_list = [["GRAVES' DISEASE", 'H/O DIFFUSE THYROIDITIS'], ["Bell's palsy", 'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA'], ['ANEMIA', 'CKD', 'DIABETIC RETINOPATHY', 'H/O- AMPUTATION', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA'], ['PRIMARY HYPOTHYROIDISM'], ['3 MONTH GESTATION'], ['TYPE 2 DIABETES MELLITUS'], [], ['HYPERURICEMIA']]

# Merge sublists into a single list
merged_list = []

for sublist in data_list:
    merged_list += sublist

# Convert the merged list into a set to remove duplicates and then back to a list
merged_list = list(set(merged_list))

print(merged_list)


TypeError: 'list' object is not callable

In [46]:
data_string = ["ANEMIA', 'HYPERURICEMIA', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA", "3 MONTH GESTATION', 'PRIMARY HYPOTHYROIDISM", "TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA',", "ANEMIA', 'CKD', 'DIABETIC RETINOPATHY', 'H/O- AMPUTATION', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA", '"GRAVES\' DISEASE", \'H/O DIFFUSE THYROIDITIS', "PRIMARY HYPOTHYROIDISM',", '"Bell\'s palsy", \'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA', "3 MONTH GESTATION', 'PRIMARY HYPOTHYROIDISM", "TYPE 2 DIABETES MELLITUS',"]


# Split the string by comma and then clean each element
cleaned_list = [element.strip("'") for element in data_string.split(',')]

print(cleaned_list)


['"Bell\'s palsy"', ' \'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA"GRAVES\' DISEASE"', " 'H/O DIFFUSE THYROIDITIS3 MONTH GESTATION", " 'PRIMARY HYPOTHYROIDISMANEMIA", " 'CKD", " 'DIABETIC RETINOPATHY", " 'H/O- AMPUTATION", " 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIAANEMIA", " 'HYPERURICEMIA", " 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIAPRIMARY HYPOTHYROIDISM", " 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA", " 'TYPE 2 DIABETES MELLITUS"]


In [60]:
list = ["ANEMIA', 'HYPERURICEMIA', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA", 
 "3 MONTH GESTATION', 'PRIMARY HYPOTHYROIDISM", 
 "TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA',",
 "ANEMIA', 'CKD', 'DIABETIC RETINOPATHY', 'H/O- AMPUTATION', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA",
 '"GRAVES\' DISEASE", \'H/O DIFFUSE THYROIDITIS', "PRIMARY HYPOTHYROIDISM',", 
 '"Bell\'s palsy", \'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA', 
 "3 MONTH GESTATION', 'PRIMARY HYPOTHYROIDISM", 
 "TYPE 2 DIABETES MELLITUS',"]

cleaned_list = [element.replace("'", "").replace('"', '') for element in list]

print(cleaned_list)
print("")
main_list = []

for item in cleaned_list:
    list1 = item.split(',')
    main_list.append(list1)
print(main_list)
print("")
comman_list=[]

for sublist in main_list:
    for item in sublist:
        comman_list.append(item)
print(comman_list)
print("")
# Step 1: Filter out null values ('')
filtered_list = [item for item in comman_list if item != '']

print(filtered_list)
print("")

unique_list = []
for item in filtered_list:
    if item not in unique_list:
        unique_list.append(item)

print(unique_list)

['ANEMIA, HYPERURICEMIA, TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', '3 MONTH GESTATION, PRIMARY HYPOTHYROIDISM', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA,', 'ANEMIA, CKD, DIABETIC RETINOPATHY, H/O- AMPUTATION, TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', 'GRAVES DISEASE, H/O DIFFUSE THYROIDITIS', 'PRIMARY HYPOTHYROIDISM,', 'Bells palsy, TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA', '3 MONTH GESTATION, PRIMARY HYPOTHYROIDISM', 'TYPE 2 DIABETES MELLITUS,']

[['ANEMIA', ' HYPERURICEMIA', ' TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA'], ['3 MONTH GESTATION', ' PRIMARY HYPOTHYROIDISM'], ['TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', ''], ['ANEMIA', ' CKD', ' DIABETIC RETINOPATHY', ' H/O- AMPUTATION', ' TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA'], ['GRAVES DISEASE', ' H/O DIFFUSE THYROIDITIS'], ['PRIMARY HYPOTHYROIDISM', ''], ['Bells palsy', ' TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA'], ['3 MONTH GESTATION', ' PRIMARY HY

In [68]:
# Your list
data_list = ['ANEMIA', ' HYPERURICEMIA', ' TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', '3 MONTH GESTATION', ' PRIMARY HYPOTHYROIDISM', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', ' CKD', ' DIABETIC RETINOPATHY', ' H/O- AMPUTATION', 'GRAVES DISEASE', ' H/O DIFFUSE THYROIDITIS', 'PRIMARY HYPOTHYROIDISM', 'Bells palsy', ' TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA', 'TYPE 2 DIABETES MELLITUS']

# Count occurrences of each element
element_count = {}
for element in data_list:
    if element in element_count:
        element_count[element] += 1
    else:
        element_count[element] = 1

# Filter out elements that appear more than once
unique_list = [element for element, count in element_count.items() if count == 1]

print(unique_list)
if unique_list[2] == unique_list[5]:
    print("The first and second elements are the same.")
else:
    print("The first and second elements are not the same.")
    print(unique_list[2])
    print(unique_list[5])

['ANEMIA', ' HYPERURICEMIA', ' TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', '3 MONTH GESTATION', ' PRIMARY HYPOTHYROIDISM', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', ' CKD', ' DIABETIC RETINOPATHY', ' H/O- AMPUTATION', 'GRAVES DISEASE', ' H/O DIFFUSE THYROIDITIS', 'PRIMARY HYPOTHYROIDISM', 'Bells palsy', ' TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA', 'TYPE 2 DIABETES MELLITUS']
The first and second elements are not the same.
 TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA
TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA


In [69]:
# Your list
my_list = ['ANEMIA', ' HYPERURICEMIA', ' TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', '3 MONTH GESTATION', ' PRIMARY HYPOTHYROIDISM', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', ' CKD', ' DIABETIC RETINOPATHY', ' H/O- AMPUTATION', 'GRAVES DISEASE', ' H/O DIFFUSE THYROIDITIS', 'PRIMARY HYPOTHYROIDISM', 'Bells palsy', ' TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA', 'TYPE 2 DIABETES MELLITUS']

# Remove extra spaces from each element
cleaned_list = [element.strip() for element in my_list]

print(cleaned_list)


['ANEMIA', 'HYPERURICEMIA', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', '3 MONTH GESTATION', 'PRIMARY HYPOTHYROIDISM', 'TYPE 2 DIABETES MELLITUS - HYPERTENSION - DYSLIPIDEMIA', 'CKD', 'DIABETIC RETINOPATHY', 'H/O- AMPUTATION', 'GRAVES DISEASE', 'H/O DIFFUSE THYROIDITIS', 'PRIMARY HYPOTHYROIDISM', 'Bells palsy', 'TYPE 2 DIABETES MELLITUS - DYSLIPIDEMIA', 'TYPE 2 DIABETES MELLITUS']


In [77]:
def find_repeated_elements(input_list):
    element_count = {}
    repeated_elements = []

    for item in input_list:
        # If the item is already in the dictionary, increment its count
        if item in element_count:
            element_count[item] += 1
        # Otherwise, add it to the dictionary with count 1
        else:
            element_count[item] = 1

    # Check for elements with count greater than 1 (repeated elements)
    for item, count in element_count.items():
        if count > 1:
            repeated_elements.append(item)

    return repeated_elements
list = ['AMARYL 2MG (185870); CORCAL D 500IU (211411); DERMATOLOGIST OPINION (195877); GLUCOBAY-M 50 (50/500) (195544); LANTUS 100IU (195584); PREGEB M OD 75MG (195529); REMO-V (196123); ROSUCOR ASP (10/75) (195554); TERIPERATIDE 20MCG INJECTON (234339); THYROX 50MCG (198710); ZOLPHOS 5 MG - IV INFUSION OVER 1 HOUR (GIVE DOLO 650 TWICE IN DAY FOR 2 DAYS) (212008)', 'ADMISSION FOR GLYCEMIC CONTROL & LRTI (305464); GLARGINE (196424); HUMARAP 40IU (212517); MEET NEPHROLOGIST REGULARLY (212641); METOCARD XL 50MG (196467); NICARDIA 20MG RETARD (196872); PRAZOPRESS XL 5MG (195930); TELMIRIDE 20MG (195514)', 'ESCIFRESH PLUS 10 (211160); PIOZ 15MG (195627); ROSUCOR ASP (10/75) (195554); STANLIP 145MG (195950); TELMIRIDE H 40MG (196491); TORGLIP R (195571); ZORYL M 3MG FORTE (195777)', 'AMARYL 1 MG (195634); AMARYL M 2MG (239540); CALCIROL SOFTGEL 60K (199608); GLIMIPRIME M2 (199549); GLUCOBAY-M 50 (50/500) (195544); PANTOCID DSR (198551); ROSUVAS 10MG (234184); SITAZION D 10/100 MG (199263); SUPRADYN (140427); TELMIRIDE AM (195735); TRESIBA 100IU 3ML (195829)', 'ACTRAPID 100IU (195711); BASALOG 100IU/ML (195694); CREON 10000MG (212563); REST AS ADVISED BY NEPHROLOGIST (195763); STOP ALCOHOL (195947); THYRONORM 75MCG (195541); UDAPA 5 MG (195705); VIMPRO POWDER CHOCOLATE (199656)', 'DAPARYL-L 10/5 MG (210825); DIAMICRON XR MEX 500MG (200717); NOVOMIX 30 (195678); REST AS ADVISED BY CARDIOLOGIST & NEPHROLOGIST (196598); ZUCCHERO 50MG (199264)', 'GLUCONORM SR 500MG (195676); PRESTO DSR 40/30 (199554); REMOZEN V (195614)', 'ADMISSION FOR GLYCEMIC CONTROL & LRTI (305464); GLARGINE (196424); HUMARAP 40IU (212517); MEET NEPHROLOGIST REGULARLY (212641); METOCARD XL 50MG (196467); NICARDIA 20MG RETARD (196872); PRAZOPRESS XL 5MG (195930); TELMIRIDE 20MG (195514)', 'FORCAL XT (199819); GLIMISON-M2 (195511); PRESTO DSR (224961); ROSULIP-F 10 (196594); SITAZION D 10/100 MG (199263); TELMA CT 40/12.5 (198698); ZUCCHERO 50MG (199264)']

print(find_repeated_elements(list))

unique_list = []
for item in list:
    if item not in unique_list:
        unique_list.append(item)
print(unique_list[0])

main_list =[]
for item in unique_list:
    element = item.split(';')
    main_list.append(element)
print(main_list)

['ADMISSION FOR GLYCEMIC CONTROL & LRTI (305464); GLARGINE (196424); HUMARAP 40IU (212517); MEET NEPHROLOGIST REGULARLY (212641); METOCARD XL 50MG (196467); NICARDIA 20MG RETARD (196872); PRAZOPRESS XL 5MG (195930); TELMIRIDE 20MG (195514)']
AMARYL 2MG (185870); CORCAL D 500IU (211411); DERMATOLOGIST OPINION (195877); GLUCOBAY-M 50 (50/500) (195544); LANTUS 100IU (195584); PREGEB M OD 75MG (195529); REMO-V (196123); ROSUCOR ASP (10/75) (195554); TERIPERATIDE 20MCG INJECTON (234339); THYROX 50MCG (198710); ZOLPHOS 5 MG - IV INFUSION OVER 1 HOUR (GIVE DOLO 650 TWICE IN DAY FOR 2 DAYS) (212008)
[['AMARYL 2MG (185870)', ' CORCAL D 500IU (211411)', ' DERMATOLOGIST OPINION (195877)', ' GLUCOBAY-M 50 (50/500) (195544)', ' LANTUS 100IU (195584)', ' PREGEB M OD 75MG (195529)', ' REMO-V (196123)', ' ROSUCOR ASP (10/75) (195554)', ' TERIPERATIDE 20MCG INJECTON (234339)', ' THYROX 50MCG (198710)', ' ZOLPHOS 5 MG - IV INFUSION OVER 1 HOUR (GIVE DOLO 650 TWICE IN DAY FOR 2 DAYS) (212008)'], ['ADMISS

In [None]:
unique_list = []
for item in filtered_list:
    if item not in unique_list:
        unique_list.append(item)

In [79]:
def process_diagnosis(filtered_diagnosis):
    # Remove parentheses and single quotes
    output_diagnosis = [x.strip("()").strip("'") for x in filtered_diagnosis]
    # Remove single and double quotes
    cleaned_list = [element.replace("'", "").replace('"', '') for element in output_diagnosis]
    # Split by comma
    main_list = [item.split(';') for item in cleaned_list]
    # Flatten the list
    comman_list = [item for sublist in main_list for item in sublist]
    # Remove empty strings
    filtered_list = [item for item in comman_list if item != '']
    # Remove extra spaces
    no_extra_list = [element.strip() for element in filtered_list]
    # Remove duplicates
    unique_list = []
    for item in no_extra_list:
        if item not in unique_list:
            unique_list.append(item)
    return unique_list
filtered_notes = ['AMARYL 2MG (185870); CORCAL D 500IU (211411); DERMATOLOGIST OPINION (195877); GLUCOBAY-M 50 (50/500) (195544); LANTUS 100IU (195584); PREGEB M OD 75MG (195529); REMO-V (196123); ROSUCOR ASP (10/75) (195554); TERIPERATIDE 20MCG INJECTON (234339); THYROX 50MCG (198710); ZOLPHOS 5 MG - IV INFUSION OVER 1 HOUR (GIVE DOLO 650 TWICE IN DAY FOR 2 DAYS) (212008)', 'ADMISSION FOR GLYCEMIC CONTROL & LRTI (305464); GLARGINE (196424); HUMARAP 40IU (212517); MEET NEPHROLOGIST REGULARLY (212641); METOCARD XL 50MG (196467); NICARDIA 20MG RETARD (196872); PRAZOPRESS XL 5MG (195930); TELMIRIDE 20MG (195514)', 'ESCIFRESH PLUS 10 (211160); PIOZ 15MG (195627); ROSUCOR ASP (10/75) (195554); STANLIP 145MG (195950); TELMIRIDE H 40MG (196491); TORGLIP R (195571); ZORYL M 3MG FORTE (195777)', 'AMARYL 1 MG (195634); AMARYL M 2MG (239540); CALCIROL SOFTGEL 60K (199608); GLIMIPRIME M2 (199549); GLUCOBAY-M 50 (50/500) (195544); PANTOCID DSR (198551); ROSUVAS 10MG (234184); SITAZION D 10/100 MG (199263); SUPRADYN (140427); TELMIRIDE AM (195735); TRESIBA 100IU 3ML (195829)', 'ACTRAPID 100IU (195711); BASALOG 100IU/ML (195694); CREON 10000MG (212563); REST AS ADVISED BY NEPHROLOGIST (195763); STOP ALCOHOL (195947); THYRONORM 75MCG (195541); UDAPA 5 MG (195705); VIMPRO POWDER CHOCOLATE (199656)', 'DAPARYL-L 10/5 MG (210825); DIAMICRON XR MEX 500MG (200717); NOVOMIX 30 (195678); REST AS ADVISED BY CARDIOLOGIST & NEPHROLOGIST (196598); ZUCCHERO 50MG (199264)', 'GLUCONORM SR 500MG (195676); PRESTO DSR 40/30 (199554); REMOZEN V (195614)', 'ADMISSION FOR GLYCEMIC CONTROL & LRTI (305464); GLARGINE (196424); HUMARAP 40IU (212517); MEET NEPHROLOGIST REGULARLY (212641); METOCARD XL 50MG (196467); NICARDIA 20MG RETARD (196872); PRAZOPRESS XL 5MG (195930); TELMIRIDE 20MG (195514)', 'FORCAL XT (199819); GLIMISON-M2 (195511); PRESTO DSR (224961); ROSULIP-F 10 (196594); SITAZION D 10/100 MG (199263); TELMA CT 40/12.5 (198698); ZUCCHERO 50MG (199264)']
print(process_diagnosis(filtered_notes))

['AMARYL 2MG (185870)', 'CORCAL D 500IU (211411)', 'DERMATOLOGIST OPINION (195877)', 'GLUCOBAY-M 50 (50/500) (195544)', 'LANTUS 100IU (195584)', 'PREGEB M OD 75MG (195529)', 'REMO-V (196123)', 'ROSUCOR ASP (10/75) (195554)', 'TERIPERATIDE 20MCG INJECTON (234339)', 'THYROX 50MCG (198710)', 'ZOLPHOS 5 MG - IV INFUSION OVER 1 HOUR (GIVE DOLO 650 TWICE IN DAY FOR 2 DAYS) (212008', 'ADMISSION FOR GLYCEMIC CONTROL & LRTI (305464)', 'GLARGINE (196424)', 'HUMARAP 40IU (212517)', 'MEET NEPHROLOGIST REGULARLY (212641)', 'METOCARD XL 50MG (196467)', 'NICARDIA 20MG RETARD (196872)', 'PRAZOPRESS XL 5MG (195930)', 'TELMIRIDE 20MG (195514', 'ESCIFRESH PLUS 10 (211160)', 'PIOZ 15MG (195627)', 'STANLIP 145MG (195950)', 'TELMIRIDE H 40MG (196491)', 'TORGLIP R (195571)', 'ZORYL M 3MG FORTE (195777', 'AMARYL 1 MG (195634)', 'AMARYL M 2MG (239540)', 'CALCIROL SOFTGEL 60K (199608)', 'GLIMIPRIME M2 (199549)', 'PANTOCID DSR (198551)', 'ROSUVAS 10MG (234184)', 'SITAZION D 10/100 MG (199263)', 'SUPRADYN (140427