In [3]:
import re
import json
import pandas as pd
import Levenshtein as lev
from fuzzywuzzy import process

In [4]:
def load_json(file_name):
    with open(file_name, "r") as json_file:
        database = json.load(json_file)
    
    return database

def dump_json(filename, database):
    with open(filename, "w", encoding="utf-8") as json_file:
        json.dump(database, json_file, ensure_ascii=False, indent=4)


def to_snake_case(column_name):
    return '_'.join(column_name.split()).lower()


def find_medicine(query, choices, threshold=80):
    """
    Find the best match for a query in the specified field of the data.
    :param query: The user's input.
    :param data: The DataFrame containing medicine information.
    :param field: The field to search in ('name_of_medicinal_product' or 'active_substance').
    :param threshold: The similarity threshold.
    :return: The best match if above the threshold, else None.
    """

    best_match, similarity = process.extractOne(query, choices)
    if similarity >= threshold:
        return best_match
    else:
        return None



def find_closest_match(query, choices):
    """
    Find the closest match for a query in a list of choices based on Levenshtein distance.
    :param query: The user's input.
    :param choices: A list of possible choices (e.g., medicine names or active substances).
    :return: The closest match.
    """
    closest_match = None
    shortest_distance = float('inf')

    for choice in choices:
        distance = lev.distance(query.lower(), choice.lower())
        if distance < shortest_distance:
            shortest_distance = distance
            closest_match = choice

    return closest_match

In [39]:
def prepare_df(pth: str):
    
    df = pd.read_excel(pth)

    cols = df.columns.to_list()
    cols[-1] = "MAXIMUM RETAIL PRICE"
    df.columns = cols

    df = df.rename(columns=to_snake_case)

    df.dropna(subset=['maximum_retail_price'], inplace=True)
    df.dropna(subset=['active_substance'], inplace=True)
    df.dropna(subset=['name_of_medicinal_product'], inplace=True)
    df.reset_index(inplace=True, drop=True)
    df['name'] = df['name_of_medicinal_product'].apply(lambda x: x.split()[0])
    df['name'] = df['name'].apply(lambda x: x.split('/')[0])
    df['name'] = df['name'].apply(lambda x: x.split('+')[0])
    gr_exception = "Εξαίρεση αναγραφής τιμής στον τιμοκατάλογο (Κανονισμός 4, ΚΔΠ 98/2019)"
    gr_exception_1 = "* Εξαίρεση αναγραφής τιμής στον τιμοκατάλογο (Κανονισμός 4, ΚΔΠ 98/2019)"
    df['maximum_retail_price'].replace(gr_exception, 'not_listed', inplace=True)
    df['maximum_retail_price'].replace(gr_exception_1, 'not_listed', inplace=True)

    new_order = [
    'name', 'maximum_retail_price', 'active_substance', 
    'name_of_medicinal_product', 'package', 
    'marketing_authorisation_holder', 'local_pricing_representative',
    'pricing_code'
    ]

    return df[new_order]

In [40]:
df_2023 = prepare_df("csv/ΤΙΜΟΚΑΤΑΛΟΓΟΣ ΣΤ2023_24-11-2023_(05-12-2023)_WEB_EN.xlsx")
df_2024 = prepare_df("csv/ΤΙΜΟΚΑΤΑΛΟΓΟΣ Β2024_10.04.2024_(29.03.2024)_WEB_EN.xlsx")

In [33]:
# df_2023.reset_index().to_json('drugs_2023.json', orient='records', indent=4)

In [34]:
# df_2024.reset_index().to_json('drugs_2024.json', orient='records', indent=4)

# load jsons

In [58]:
df_2023 = load_json('drugs_2023.json')
df_2024 = load_json("drugs_2024.json")

In [73]:
DATE1 = '29-03-2024'
DATE2 = '05-12-2023'

def fill_dc(ind: int, d: dict) -> dict:
    
    dc = {}
    dc['ind'] = ind
    dc['_name'] = d['name'].capitalize()
    dc['_full_name'] = d['name_of_medicinal_product'].capitalize()
    dc['_active'] = d['active_substance'].capitalize()
    dc['_package'] = d['package'].split(' ', 1)
    dc['_price'] = d['maximum_retail_price']

    dc[DATE1] = {}
    dc[DATE2] = {}

    return dc

In [82]:
res = []
ind = 0

for d3 in df_2023:

    dc = fill_dc(ind, d3)

    dc[DATE2].update(d3)

    res.append(dc)
    ind += 1

In [83]:
res2 = []
for d4 in df_2024:
    code = d4.get('pricing_code')

    new = True
    for r in res:
        if code == r[DATE2]['pricing_code']:
            r[DATE1].update(d4)
            new = False
            break


    if new:
        dc = fill_dc(ind, d4)
        dc[DATE1].update(d4)
        
        res2.append(dc)
        ind += 1     


In [87]:
res2[0]

{'ind': 6096,
 '_name': 'Abrysvo',
 '_full_name': 'Abrysvo powder and solvent for solution for injection',
 '_active': 'Respiratory syncytial virus vaccines',
 '_package': ['PACK', 'WITH 1 VIAL POWDER X 0.5ML WITH SOLVENT & 1 NEEDLE'],
 '_price': 252.27,
 '29-03-2024': {'index': 23,
  'name': 'ABRYSVO',
  'maximum_retail_price': 252.27,
  'active_substance': 'RESPIRATORY SYNCYTIAL VIRUS VACCINES',
  'name_of_medicinal_product': 'ABRYSVO POWDER AND SOLVENT FOR SOLUTION FOR INJECTION',
  'package': 'PACK WITH 1 VIAL POWDER X 0.5ML WITH SOLVENT & 1 NEEDLE',
  'marketing_authorisation_holder': 'PFIZER EUROPE MA EEIG',
  'local_pricing_representative': 'PFIZER HELLAS AE (CYPRUS BRANCH)',
  'pricing_code': 'C175201/1'},
 '05-12-2023': {}}

In [88]:
res += res2



# dump_json("drugs.json", res)

In [92]:
sorted_res = sorted(res, key=lambda x: x['_name'])

In [102]:
for i, s in enumerate(sorted_res):
    s['ind'] = i
    if s[DATE1] != {}:
        s['_price'] = s[DATE1]["maximum_retail_price"]
    else:
        s['_price'] = s[DATE2]["maximum_retail_price"]


In [101]:
dc= {}
dc[DATE1] = {}
dc.get(DATE1, False)

{}

In [1]:
# dump_json('drugs.json', sorted_res)

# Clear drugs.json

In [28]:
df = load_json("drugs.json")

In [29]:
for d in df:
    d['_active'] = [s.strip().capitalize() for s in d['_active'].replace("(", ",").replace(")", ",").split(',') if s != '']

In [31]:
dump_json('drugs.json', df)

In [27]:
string = "Clindamycin phosphate, tretinoin (bla)"
[s.strip().capitalize() for s in string.replace("(", ",").replace(")", ",").split(',') if s != '']

['Clindamycin phosphate', 'Tretinoin', 'Bla']

In [25]:
string = "Clindamycin phosphate, tretinoin (bla)",
string.split(',')

AttributeError: 'tuple' object has no attribute 'split'

In [15]:
dump_json('drugs.json', df)

In [13]:
cols_join = ["pricing_code", 'maximum_retail_price']
_key = 'pricing_code'
df_join = df_2024[cols_join].set_index(_key).join(df_2023[cols_join].set_index(_key), lsuffix='_24', rsuffix='_23')

In [14]:
cols_merge = ['pricing_code', 'name']
merged_df = df_2024.merge(df_2023, on=cols_merge, how='left', indicator=True)
merged_df.query("_merge == 'left_only'")

Unnamed: 0,name,maximum_retail_price_x,active_substance_x,name_of_medicinal_product_x,package_x,marketing_authorisation_holder_x,local_pricing_representative_x,pricing_code,maximum_retail_price_y,active_substance_y,name_of_medicinal_product_y,package_y,marketing_authorisation_holder_y,local_pricing_representative_y,_merge
23,ABRYSVO,252.27,RESPIRATORY SYNCYTIAL VIRUS VACCINES,ABRYSVO POWDER AND SOLVENT FOR SOLUTION FOR IN...,PACK WITH 1 VIAL POWDER X 0.5ML WITH SOLVENT &...,PFIZER EUROPE MA EEIG,PFIZER HELLAS AE (CYPRUS BRANCH),C175201/1,,,,,,,left_only
32,ABSKEN,39.04,PRUCALOPRIDE,"ABSKEN TABLET, FILM COATED 1MG",PACK WITH 28 TABS IN BLISTER(S),WIN MEDICA PHARMACEUTICAL S.A. (TRADING AS WIN...,COSTAKIS TSISIOS & CO LTD,42M0067/2,,,,,,,left_only
33,ABSKEN,59.63,PRUCALOPRIDE,"ABSKEN TABLET, FILM COATED 2MG",PACK WITH 28 TABS IN BLISTER(S),WIN MEDICA PHARMACEUTICAL S.A. (TRADING AS WIN...,COSTAKIS TSISIOS & CO LTD,42M0068/2,,,,,,,left_only
143,AKEEGA,5104.04,NIRAPARIB\nABIRATERONE ACETATE,"AKEEGA TABLET, FILM COATED 100MG/500MG",PACK WITH 56 TABS IN BLISTER(S),JANSSEN-CILAG INTERNATIONAL NV,VARNAVAS HADJIPANAYIS LTD,C172202/1,,,,,,,left_only
144,AKEEGA,5104.04,NIRAPARIB\nABIRATERONE ACETATE,"AKEEGA TABLET, FILM COATED 50MG/500MG",PACK WITH 56 TABS IN BLISTER(S),JANSSEN-CILAG INTERNATIONAL NV,VARNAVAS HADJIPANAYIS LTD,C172201/1,,,,,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5422,TRATIM,10.7,TRAVOPROST\nTIMOLOL MALEATE,"TRATIM EYE DROPS, SOLUTION (40MCG/5MG)/ML",PACK WITH 1 VIAL X 2.5ML (PP),VERISFIELD SINGLE MEMBER S.A.,C A PAPAELLINAS LTD,3700050/2,,,,,,,left_only
5567,UNIGESIC,3.28,ORPHENADRINE\nPARACETAMOL,UNIGESIC TABLET 35MG/450MG,PACK WITH 30 TABS IN BLISTER(S),UNI-PHARMA KLEON TSETIS PHARMACEUTICAL LABORAT...,PHARMACEUTICAL TRADING CO LTD,40M0181/1,,,,,,,left_only
5687,VERILIGO,11.68,METOCLOPRAMIDE,"VERILIGO NASAL SPRAY, SOLUTION 10MG/DOSE",PACK WITH 1 BOTTLE X 6ML,VERISFIELD SINGLE MEMBER S.A.,C A PAPAELLINAS LTD,43S0012/1,,,,,,,left_only
5885,XELJANZ,773.45,TOFACITINIB,XELJANZ ORAL SOLUTION 1MG/ML,PACK WITH 1 BOTTLE X 250ML,PFIZER EUROPE MA EEIG,PFIZER HELLAS AE (CYPRUS BRANCH),C117815/1,,,,,,,left_only


In [43]:
merged_df = df_2023.merge(df_2024, on=cols_merge, how='left', indicator=True)
len(merged_df.query("_merge == 'left_only'")['name'].unique())

56

In [41]:
(merged_df['name'].unique())

array(['ABACAVIR', 'ABASAGLAR', 'ABATOR', ..., 'ZYRTEC', 'ZYTIGA',
       'ZYVOXID'], dtype=object)

In [23]:
df_join.query("maximum_retail_price_24 != maximum_retail_price_23")

Unnamed: 0_level_0,maximum_retail_price_24,maximum_retail_price_23
pricing_code,Unnamed: 1_level_1,Unnamed: 2_level_1
39M0189/6,198.98,231.96
C944009/1,116.07,115.25
C882002/1,345.02,349.87
C276034/1,67.76,69.9
C04276A/1,41.83,47.46
...,...,...
C125001/1,23.39,27.79
9000251/1,Price not published (Ministerial notice No. 98...,not_listed
C714001/1,3064.81,3265.03
C714002/1,2788.08,2946.77


# Updating current drugs.json

In [39]:
drugs = load_json("drugs.json")
df_2023 = load_json("drugs_2023.json")

In [21]:
for el in df_2023:
    for d in drugs:

        match_name = el.get("name").lower() == d.get("_name").lower()
        match_price = el.get("maximum_retail_price") == d.get("_price")

        if match_name and match_price:
            d.update(el)
            el['_id'] = d.get('_id')

In [22]:
ind = 1
for d in drugs:
    if len(d) == 6:
        for el in df_2023:
            match_name = el.get("name").lower() == d.get("_name").lower()
            price = el.get("maximum_retail_price") != "not_listed"

            if match_name and price and not el.get('_id', False):
                d['_price'] = el["maximum_retail_price"]
                d.update(el)
                el['_id'] = d.get('_id')

In [40]:
drugs = load_json("drugs.json")
df_2024 = load_json("drugs_2024.json")

In [41]:
new_drugs = []
for d in drugs:
    dc = {}
    dc["_id"] = d["_id"]
    dc["_name"] = d["_name"]
    dc["_full_name"] = d["_full_name"]
    dc["_active"] = d["_active"]
    dc["_package"] = d["_package"]
    dc["_price"] = d["_price"]

    if len(d) > 6:
        dd = '31.09.2023'
        dc[dd] = {}
        dc[dd]["name"] = d["name"]
        dc[dd]["maximum_retail_price"] = d["maximum_retail_price"]
        dc[dd]["active_substance"] = d["active_substance"]
        dc[dd]["name_of_medicinal_product"] = d["name_of_medicinal_product"]
        dc[dd]["package"] = d["package"]
        dc[dd]["marketing_authorisation_holder"] = d["marketing_authorisation_holder"]
        dc[dd]["local_pricing_representative"] = d["local_pricing_representative"]
        dc[dd]["pricing_code"] = d["pricing_code"]



    

SyntaxError: incomplete input (4276646924.py, line 1)

In [30]:
drugs[0]

{'_id': 0,
 '_name': 'Abacavir',
 '_full_name': 'Abacavir accord tablet, film coated 300mg',
 '_active': ['Abacavir'],
 '_package': ['PACK', '60 tabs in blister(s) (alu/alu)'],
 '_price': 231.96,
 'name': 'ABACAVIR',
 'maximum_retail_price': 231.96,
 'active_substance': 'ABACAVIR',
 'name_of_medicinal_product': 'ABACAVIR ACCORD TABLET, FILM COATED 300MG',
 'package': 'PACK WITH 60 TABS IN BLISTER(S) (ALU/ALU)',
 'marketing_authorisation_holder': 'ACCORD HEALTHCARE S.L.U',
 'local_pricing_representative': 'MEDILINK PHARMACEUTICALS LTD',
 'pricing_code': '39M0189/6'}

In [31]:
df_2024[0]

{'name': 'ABACAVIR',
 'maximum_retail_price': 198.98,
 'active_substance': 'ABACAVIR',
 'name_of_medicinal_product': 'ABACAVIR ACCORD TABLET, FILM COATED 300MG',
 'package': 'PACK WITH 60 TABS IN BLISTER(S) (ALU/ALU)',
 'marketing_authorisation_holder': 'ACCORD HEALTHCARE S.L.U',
 'local_pricing_representative': 'MEDILINK PHARMACEUTICALS LTD',
 'pricing_code': '39M0189/6'}

In [35]:
for d in drugs:
    if d.get("pricing_code", False):
        for el in df_2024:
            if el["pricing_code"] == d["pricing_code"]:
                not_match = False
                for k in ['name', 'active_substance', "name_of_medicinal_product", "package"]:
                    if el[k] != d[k]:
                        not_match = True

                if not_match:
                    print(d['_id'])
 

7
23
24
25
26
27
28
29
30
209
327
430
431
477
478
479
524
823
824
825
826
827
828
826
830
831
832
848
849
1174
1175
1176
1177
1178
1179
1180
1182
1296
1297
1314
1375
1514
1515
1516
1517
1650
1651
1681
1688
1689
1735
1736
1735
1736
1774
1775
1775
1774
1812
1813
1814
1814
1816
1817
1818
1908
1908
2042
2086
2087
2105
2105
2105
2137
2138
2139
2170
2183
2243
2244
2270
2309
2310
2358
2437
2438
2442
2443
2444
2445
2446
2568
2570
2617
2618
2744
2745
2746
2747
2763
2763
2763
2763
2842
2843
2904
3297
3563
3564
3565
3566
3567
3715
3780
3865
3865
3973
3974
3989
4019
4020
4026
4103
4107
4170
4171
4172
4345
4346
4347
4348
4349
4350
4351
4379
4425
4426
4427
4477
4707
4707
4710
4831
4906
4908
4906
4908
4906
5041
5165
5166
5453
5453
5600
5622
5623
5624
5638
5639
5877
5915
5916
6084
6085
6086


In [36]:
for k in ['name', 'active_substance', "name_of_medicinal_product", "package"]:
    if drugs[7][k] != df_2024[7][k]:
        print(k)

name_of_medicinal_product


In [37]:
drugs[7]["name_of_medicinal_product"]

'ABILIFY MAINTENA POWDER & SOLVENT FOR PROLONGED RELEASE SUSPENION FOR INJECTION 400MG/VIAL'

In [38]:
df_2024[7]["name_of_medicinal_product"]

'ABILIFY MAINTENA POWDER & SOLVENT FOR PROLONGED RELEASE SUSPENSION FOR INJECTION 400MG/VIAL'

# Compare 2023 and 2024

In [15]:
old_json = 'drugs_2023.json'
new_json = 'drugs_2024.json'

In [16]:
df_2023 = load_json(old_json)
df_2024 = load_json(new_json)

In [None]:
for n in df_2024:
    for o in df_2023:
        if n


In [58]:
old_json = 'drugs.json'
new_json = 'drugs_new.json'

with open(old_json, "r") as json_file:
    database = json.load(json_file)

for index, db in enumerate(database):
    db['id'] = index + 1
    db['key_substance'] = re.sub(r'[()|",\n-/]', ' ', db['active_substance'])
    db['key_substance'] = list(set(db['key_substance'].split()))

with open(new_json, "w", encoding="utf-8") as json_file:
    json.dump(database, json_file, ensure_ascii=False, indent=4)

In [15]:
key_counts = {}
for db in database:
    for k in db['key_substance']:
        if k not in key_counts.keys():
            key_counts[k] = 1
        else:
            key_counts[k] += 1

# Sort the dictionary by its values
sorted_medications = sorted(key_counts.items(), key=lambda item: item[1], reverse=True)

# If you need a dictionary as the output, convert the list of tuples back to a dictionary
sorted_dict = dict(sorted_medications)


with open("freq_substance.json", "w", encoding="utf-8") as json_file:
    json.dump([sorted_dict], json_file, ensure_ascii=False, indent=4)

In [59]:
res = []
for k in sorted_dict.keys():
    dc = {}
    dc[k] = []
    dc[k].append(sorted_dict[k])
    for db in database:
        if k in db['key_substance']:
            if db['name'] not in dc[k]:
                dc[k].append(db['name'])
    res.append(dc)

In [17]:
with open("freq.json", "w", encoding="utf-8") as json_file:
    json.dump(res, json_file, ensure_ascii=False, indent=4)

In [60]:
temp = []
for r in res:
    data = {}
    k = list(r.keys())[0]
    data[k] = [r[k][0], len(r[k][1:])]
    temp.append(data)

In [61]:
new_res = sorted(temp, key = lambda x: list(x.values())[0][1], reverse=True)

In [62]:
for el in new_res:
    name = list(el.keys())[0]
    el['name'] = name
    el['freq_in_db'] = el[name][0]
    el['freq'] = el[name][1]
    el['active'] = 1
    del el[name]


In [63]:
# with open("stat.json", "w", encoding="utf-8") as json_file:
#     json.dump(new_res, json_file, ensure_ascii=False, indent=4)

In [3]:
stat = load_json('stat.json')

non_active = []
active = []
for s in stat:
    if s['active'] == 0:
        non_active.append(s['name'])
    else:
        active.append(s['name'])

In [54]:
active[900:]

['DIGOXIN',
 'PROTEINSUCCINYLATE',
 'LENVATINIB',
 'INCLISIRAN',
 'DETEMIR',
 'VARDENAFIL',
 'FENTICONAZOLE',
 'LORLATINIB',
 'RANIBIZUMAB',
 'MOSUNETUZUMAB',
 'OLAPARIB',
 'DIMETICONE',
 'TRAMETINIB',
 'MECOBALAMIN',
 'NALOXEGOL',
 'MICAFUNGIN',
 'TROPICAMIDE',
 'TYBE',
 'SOMATROGON',
 'NITRAZEPAM',
 'TETRAHYDRATE',
 'MEDIUM',
 'CHAIN',
 'OMEGA',
 'OBETICHOLIC',
 'SODIUMLYCEROPHOSPHATE',
 'HYDRATED',
 'PERFLUTREN',
 'ORPHENADRINE',
 'GANIRELIX',
 'APREMILAST',
 'CHORIOGONADOTROPIN',
 'NEFOPAM',
 'LACTOBACILLUS',
 'PETHIDINE',
 'PIZOTIFEN',
 'POLATUZUMAB',
 'PONESIMOD',
 'ALIROCUMAB',
 'ZICONOTIDE',
 'LACTIC',
 'TRIBENOSIDE',
 'TONICS',
 'LUSPATERCEPT',
 'TREPROSTINIL',
 'ELTROMBOPAG',
 'PARICALCITOL',
 'ENTRECTINIB',
 'RUPATADINE',
 'ISATUXIMAB',
 'ASCIMINIB',
 'PASSIFLORA',
 'INCARNATA',
 'AMYLMETACRESOL',
 'DICHLORBENZYL',
 'ALCOHOL',
 'DESFLURANE',
 'ASENAPINE',
 'CAPMATINIB',
 'DABRAFENIB',
 'OSIMERTINIB',
 'IXEKIZUMAB',
 'TALQUETAMAB',
 'TALAZOPARIB',
 'NILOTINIB',
 'ATEZOLIZUMAB

In [66]:
substances.update({
    'RADIUM': 'РАДИЙ',
    'DICHLORIDE': 'ДИХЛОРИД',
    'ENZALUTAMIDE': 'ЭНЗАЛУТАМИД',
    'OXYBATE': 'ОКСИБАТ',
    'BROMFENAC': 'БРОМФЕНАК',
    'AVIBACTAM': 'АВИБАКТАМ',
    'VEMURAFENIB': 'ВЕМУРАФЕНИБ',
    'ELBASVIR': 'ЭЛБАСВИР',
    'GRAZOPREVIR': 'ГРАЗОПРЕВИР',
    'CEFTOLOZANE': 'ЦЕФТОЛОЗАН',
    'IBRITUMOMAB': 'ИБРИТУМОМАБ',
    'TIUXETAN': 'ТИУКСЕТАН',
    'CEFTAROLINE': 'ЦЕФТАРОЛИН',
    'FOSAMIL': 'ФОЗАМИЛ',
    'NOMEGESTROL': 'НОМЕГЕСТРОЛ',
    'ZOLMITRIPTAN': 'ЗОЛМИТРИПТАН',
    'ZOSTER': 'ЗОСТЕР',
})

# Пример использования:
print(substances['RADIUM'])  # Выведет 'РАДИЙ'


РАДИЙ


In [52]:
len(substances)

943

In [53]:
substances

{'HYDROCHLOROTHIAZIDE': 'ГИДРОХЛОРОТИАЗИД',
 'PARACETAMOL': 'ПАРАЦЕТАМОЛ',
 'FUMARATE': 'ФУМАРАТ',
 'DEXAMETHASONE': 'ДЕКСАМЕТАЗОН',
 'METFORMIN': 'МЕТФОРМИН',
 'FACTOR': 'ФАКТОР',
 'MALEATE': 'МАЛЕАТ',
 'AMLODIPINE': 'АМЛОДИПИН',
 'MAGNESIUM': 'МАГНИЙ',
 'INSULIN': 'ИНСУЛИН',
 'TENOFOVIR': 'ТЕНОФОВИР',
 'TARTRATE': 'ТАРТРАТ',
 'LIDOCAINE': 'ЛИДОКАИН',
 'TIMOLOL': 'ТИМОЛОЛ',
 'ATORVASTATIN': 'АТОРВАСТАТИН',
 'CIPROFLOXACIN': 'ЦИПРОФЛОКСАЦИН',
 'ROSUVASTATIN': 'РОЗУВАСТАТИН',
 'CITRATE': 'ЦИТРАТ',
 'BUDESONIDE': 'БУДЕЗОНИД',
 'SULFATE': 'СУЛЬФАТ',
 'SITAGLIPTIN': 'СИТАГЛИПТИН',
 'AMOXICILLIN': 'АМОКСИЦИЛЛИН',
 'IRBESARTAN': 'ИРБЕСАРТАН',
 'DICLOFENAC': 'ДИКЛОФЕНАК',
 'EZETIMIBE': 'ЭЗЕТИМИБ',
 'FLUTICASONE': 'ФЛУТИКАЗОН',
 'VALSARTAN': 'ВАЛСАРТАН',
 'BESYLATE': 'БЕЗИЛАТ',
 'SUCCINATE': 'СУКЦИНАТ',
 'ACETYLSALICYLIC': 'АЦЕТИЛСАЛИЦИЛОВАЯ',
 'EMTRICITABINE': 'ЭМТРИЦИТАБИН',
 'COAGULATION': 'КОАГУЛЯЦИЯ',
 'IBUPROFEN': 'ИБУПРОФЕН',
 'LEVOFLOXACIN': 'ЛЕВОФЛОКСАЦИН',
 'HEXAHYDRATE': 'ГЕКСАГИДРА

In [68]:
'THIAMAZOLE'.isascii()

True

In [69]:
'ТИАМАЗОЛ'.isascii()

False

In [70]:
stat = load_json('stat.json')
en_ru = load_json('en_ru.json')

In [72]:
for s in stat:
    s['ru_name'] = en_ru.get(s['name'], None)

In [74]:
with open('stat.json', "w", encoding="utf-8") as json_file:
    json.dump(stat, json_file, ensure_ascii=False, indent=4)

In [None]:
'dfdf'.isascii()

In [71]:
final_drugs = load_json("drugs_new.json")

In [74]:
for f in final_drugs:
    f['key_substance'] = [item for item in f['key_substance'] if item not in non_active]

In [75]:
dump_json('final_drugs.json', final_drugs)

In [23]:
# Sort the dictionary by its values
sorted_medications = sorted(key_counts.items(), key=lambda item: item[1], reverse=True)

# If you need a dictionary as the output, convert the list of tuples back to a dictionary
sorted_dict = dict(sorted_medications)

print(sorted_dict)

{'HYDROCHLORIDE': 306, 'SODIUM': 287, 'CALCIUM': 142, 'MONOHYDRATE': 112, 'HYDROCHLOROTHIAZIDE': 104, 'RABEPRAZOLE': 104, 'ROSUVASTATIN': 103, '/': 97, 'ACID': 90, 'SITAGLIPTIN': 86, 'HUMAN': 81, 'IODIDE': 75, 'I)': 73, '(131': 72, 'ALFA': 71, 'ATORVASTATIN': 65, 'TEMOZOLOMIDE': 65, 'PARACETAMOL': 65, 'CHLORIDE': 65, 'VALSARTAN': 63, 'AMOXICILLIN': 63, 'RISPERIDONE': 62, 'IRBESARTAN': 60, 'METFORMIN': 54, 'FACTOR': 53, 'OLANZAPINE': 51, 'AMLODIPINE': 48, 'TRIHYDRATE': 47, 'LEVETIRACETAM': 47, 'DICLOFENAC': 46, 'VIII': 44, 'MEMANTINE': 42, 'ARIPIPRAZOLE': 41, 'PRAMIPEXOLE': 41, 'ACICLOVIR': 39, 'LEVOFLOXACIN': 39, 'COAGULATION': 38, 'IBUPROFEN': 38, 'SILDENAFIL': 37, 'INSULIN': 36, 'ESCITALOPRAM': 36, 'PHOSPHATE': 35, 'OXALATE': 33, 'LACOSAMIDE': 33, 'TADALAFIL': 32, 'DEXAMETHASONE': 32, 'PREGABALIN': 32, 'EZETIMIBE': 31, 'CLAVULANIC ACID': 31, 'VILDAGLIPTIN': 31, 'LAMOTRIGINE': 31, 'CEFUROXIME': 30, 'SODIUM CHLORIDE': 30, 'SUCCINATE': 30, 'SUNITINIB': 29, 'EPOETIN': 28, 'PANTOPRAZOLE':

In [5]:
old_json = 'drugs.json'
new_json = 'drugs_new.json'

with open(new_json, "r") as json_file:
    database = json.load(json_file)

for db in database:
    if ')' in db['active_substance_src']:
        print(db['id'], db['active_substance'])
        print(f"   {db['active_substance_src']}")

53 ['GUAIFENESIN', 'PSEUDOEPHEDRINE', 'TRIPROLIDINE']
   COUGH AND COLD PREPARATIONS (GUAIFENESIN, PSEUDOEPHEDRINE, TRIPROLIDINE) 
59 ['ADSORBED ON ALUMINIUM PHOSPHATE', 'PERTUSSIS TOXOID(PT)', 'PERTACTIN (PRN)', 'TETANUS TOXOID', 'FIMBRIAE TYPES 2 AND 3', 'DIPHTHERIA TOXOID', 'FILAMENTOUS HAEMAGGLUTININ (FHA)']
   ADSORBED ON ALUMINIUM PHOSPHATE  | PERTUSSIS TOXOID(PT)  | PERTACTIN (PRN)  | TETANUS TOXOID  | FIMBRIAE TYPES 2 AND 3| DIPHTHERIA TOXOID | FILAMENTOUS HAEMAGGLUTININ (FHA) 
234 ['ANTACIDS (ALUMINIUM', 'MAGNESIUM)']
   ANTACIDS (ALUMINIUM, MAGNESIUM)
235 ['ANTACIDS (ALUMINIUM', 'MAGNESIUM)']
   ANTACIDS (ALUMINIUM, MAGNESIUM)
500 ['OTOLOGICALS (CHOLINE SALICYLATE)']
   OTOLOGICALS (CHOLINE SALICYLATE)
510 ['COUGH AND COLD PREPARATIONS (CARBINOXAMINE', 'DEXTROMETHORPHAN', 'PHENYLEPHRINE', 'SODIUM CITRATE)']
   COUGH AND COLD PREPARATIONS (CARBINOXAMINE, DEXTROMETHORPHAN, PHENYLEPHRINE, SODIUM CITRATE)  
514 ['VACCINE (HEPATITIS A)']
   VACCINE (HEPATITIS A)
693 ['FOLLITROPIN 

In [19]:
a = ['CLINDAMYCIN PHOSPHATE', 'TRETINOIN']
'CLINDAMYCIN' in a

False

In [13]:
medicines = df['name'].unique()
best_match = find_medicine('ABATOR', medicines)

if best_match:
    json_file = 'drugs.json'

    with open(json_file, "r") as json_file:
        database = json.load(json_file)

    for db in database:
        if db['name'] == best_match:
            print(db['name_of_medicinal_product'])
            print(db['package'])
            print(db['maximum_retail_price'])
            print(15*"#")

ABATOR TABLET, FILM COATED 10MG
PACK WITH 30 TABS IN BLISTER(S)
4.32
###############
ABATOR TABLET, FILM COATED 20MG
PACK WITH 30 TABS IN BLISTER(S)
6
###############
ABATOR TABLET, FILM COATED 40MG
PACK WITH 30 TABS IN BLISTER(S)
7.19
###############


In [15]:
json_file = 'drugs.json'

with open(json_file, "r") as json_file:
    database = json.load(json_file)

names = []
for db in database:
    if db['name'] not in names:
        names.append(db['name'])

In [16]:
names

['ABACAVIR',
 'ABASAGLAR',
 'ABATOR',
 'ABERNIL',
 'ABILIFY',
 'ABIRATERONE',
 'ABIRATERONE/PHARMAZAC',
 'ABRAXANE',
 'ABSEAMED',
 'ABSTRAL',
 'ACAPRIL',
 'ACARILBIAL',
 'ACCOFIL',
 'ACCU-THYROX',
 'ACEPROTIN',
 'ACETAZOLAMIDE',
 'ACICLOVIR',
 'ACLASTA',
 'ACMOTREN',
 'ACNATAC',
 'ACTIFED',
 'ACTILYSE',
 'ACTONEL',
 'ACTRAPID',
 'ADACEL',
 'ADAFERIN',
 'ADAGREL',
 'ADCETRIS',
 'ADDAMEL',
 'ADELONE',
 'ADEMPAS',
 'ADENOSAN',
 'ADENURIC',
 'ADRENALINE',
 'ADRIBLASTINA',
 'ADVAGRAF',
 'ADVANTAN',
 'ADVATE',
 'ADVECIT',
 'ADYNOVI',
 'AERINAZE',
 'AERIUS',
 'AFEKSIN',
 'AFENTRAL',
 'AFINITOR',
 'AFITEN',
 'AFSTYLA',
 'AGELMIN',
 'AGGRAFIBAN',
 'AGGRASTAT',
 'AGREGEX',
 'AIDOL',
 'AIMOVIG',
 'AIRTAL',
 'AJOVY',
 'AKAMON',
 'AKILEN',
 'AKINETON',
 'AKLONIL',
 'AKTIPROL',
 'ALBIGONE',
 'ALBIOMIN',
 'ALBUMAN',
 'ALBUMEON',
 'ALBUNORM',
 'ALBUREX',
 'ALCAINE',
 'ALDARA',
 'ALDOCUMAR',
 'ALDOSOMNIL',
 'ALDURAZYME',
 'ALECENSA',
 'ALENDRONIC',
 'ALERTAN',
 'ALFINOR',
 'ALFOXAN',
 'ALFURAL',
 'ALFU

In [13]:
# Example usage
medicines = df['name'].unique()
user_query = "FFSGSFG"

closest_medicine = find_closest_match(user_query, medicines)
# for med in medicines:
#     if closest_medicine in med:
#         print(med)
df.query("name == @closest_medicine").sort_values(by="maximum_retail_price")

Unnamed: 0,pricing_code,name_of_medicinal_product,package,active_substance,marketing_authorisation_holder,local_pricing_representative,maximum_retail_price,name
113,39M0160/7,AFEKSIN SOLUBLE TABLET 20MG,PACK WITH 30 TABLETS IN BLISTER(S),FLUOXETINE,TEVA BV,C A PAPAELLINAS LTD,7.16,AFEKSIN


In [11]:
df.sample(1)

Unnamed: 0,pricing_code,name_of_medicinal_product,package,active_substance,marketing_authorisation_holder,local_pricing_representative,maximum_retail_price,name
3503,C157901/1,NEXVIADYME POWDER FOR CONCENTRATE FOR SOLUTION...,PACK WITH 1 VIAL X 100MG,AVALGLUCOSIDASE ALFA,SANOFI B.V.,C A PAPAELLINAS LTD,1281.37,NEXVIADYME


In [6]:
df.to_json('drugs.json', orient='records', indent=4)

In [9]:
import json

In [6]:
'SODIUM CITRATE | SODIUM LAURYL SULFOACETATE | SORBITOL CRYSTALLINE'.strip('|')

'SODIUM CITRATE | SODIUM LAURYL SULFOACETATE | SORBITOL CRYSTALLINE'

In [7]:
'dfd'.strip('|')

'dfd'

In [19]:
df

Unnamed: 0,pricing_code,name_of_medicinal_product,package,active_substance,marketing_authorisation_holder,local_pricing_representative,maximum_retail_price,name
0,39M0189/6,"ABACAVIR ACCORD TABLET, FILM COATED 300MG",PACK WITH 60 TABS IN BLISTER(S) (ALU/ALU),ABACAVIR,ACCORD HEALTHCARE S.L.U,MEDILINK PHARMACEUTICALS LTD,231.96,ABACAVIR
1,C944009/1,ABASAGLAR SOLUTION FOR INJECTION 100U/ML,PACK WITH 10 CARTRIDGES X 3ML,INSULIN GLARGINE,ELI LILLY NEDERLAND BV,PHADISCO LTD,115.25,ABASAGLAR
2,C944012/1,ABASAGLAR SOLUTION FOR INJECTION 100U/ML,PACK WITH 5 PRE-FILLED PENS x 3ML (Kwikpen),INSULIN GLARGINE,ELI LILLY NEDERLAND BV,PHADISCO LTD,55.57,ABASAGLAR
3,4100182/1,"ABATOR TABLET, FILM COATED 10MG",PACK WITH 30 TABS IN BLISTER(S),ATORVASTATIN,SAPIENS PHARMACEUTICALS LTD,SAPIENS PHARMACEUTICALS LTD,4.32,ABATOR
4,4100183/1,"ABATOR TABLET, FILM COATED 20MG",PACK WITH 30 TABS IN BLISTER(S),ATORVASTATIN,SAPIENS PHARMACEUTICALS LTD,SAPIENS PHARMACEUTICALS LTD,6,ABATOR
...,...,...,...,...,...,...,...,...
6091,9000251/1,"ZYRTEC TABLET, FILM COATED 10MG",PACK WITH 10 TABS IN BLISTER(S),CETIRIZINE,UCB PHARMA SA,LIFEPHARMA (Z.A.M.) LTD,exception,ZYRTEC
6092,C714001/1,ZYTIGA TABLET 250MG,BOTTLE WITH 120 TABS,ABIRATERONE,JANSSEN-CILAG INTERNATIONAL NV,VARNAVAS HADJIPANAYIS LTD,3265.03,ZYTIGA
6093,C714002/1,"ZYTIGA TABLET, FILM COATED 500MG",PACK WITH 56 TABS IN BLISTER(S),ABIRATERONE,JANSSEN-CILAG INTERNATIONAL NV,VARNAVAS HADJIPANAYIS LTD,2946.77,ZYTIGA
6094,39M0069/4,ZYVOXID SOLUTION FOR INFUSION 2MG/ML,PACK WITH 10 BAGS X 300ML,LINEZOLID,PFIZER HELLAS AE,PFIZER HELLAS AE (CYPRUS BRANCH),452.78,ZYVOXID


In [7]:
df.query("name == 'LECALCIF'")

Unnamed: 0,pricing_code,name_of_medicinal_product,package,active_substance,marketing_authorisation_holder,local_pricing_representative,maximum_retail_price,name
2741,32M0265/1,LECALCIF ORAL DROPS SOLUTION 2400IU/ML,PACK WITH 1 DROPPER X 10ML,COLECALCIFEROL,RAFARM S.A.,COSTAKIS TSISIOS & CO LTD,* Εξαίρεση αναγραφής τιμής στον τιμοκατάλογο (...,LECALCIF
2742,31M0073/5,LECALCIF ORAL SOLUTION 100000IU,PACK WITH 3 AMP X 1ML,COLECALCIFEROL,RAFARM S.A.,COSTAKIS TSISIOS & CO LTD,* Εξαίρεση αναγραφής τιμής στον τιμοκατάλογο (...,LECALCIF
2743,31M0072/2,LECALCIF ORAL SOLUTION 25000IU,PACK WITH 4 AMPS X 1ML,COLECALCIFEROL,RAFARM S.A.,COSTAKIS TSISIOS & CO LTD,13.91,LECALCIF


In [8]:
df.loc[2741, 'maximum_retail_price']

'* Εξαίρεση αναγραφής τιμής στον τιμοκατάλογο (Κανονισμός 4, ΚΔΠ 98/2019)'

In [12]:
for el in df['active_substance'].unique():
    print(el)

ABACAVIR
INSULIN GLARGINE
ATORVASTATIN
NALTREXONE 
ARIPIPRAZOLE 
ARIPIPRAZOLE
ABIRATERONE
ABIRATERONE ACETATE
PACLITAXEL
EPOETIN ALFA
FENTANYL
RAMIPRIL  
BENZYL BENZOATE  
FILGRASTIM 
LEVOTHYROXINE SODIUM
CAPTOPRIL
ACETAZOLAMIDE  
ACICLOVIR
ZOLEDRONIC ACID
ISOTRETINOIN  
CLINDAMYCIN PHOSPHATE, TRETINOIN 
COUGH AND COLD PREPARATIONS (GUAIFENESIN, PSEUDOEPHEDRINE, TRIPROLIDINE) 
ALTEPLASE
RISEDRONATE
INSULIN SOLUBLE
ADSORBED ON ALUMINIUM PHOSPHATE  | PERTUSSIS TOXOID(PT)  | PERTACTIN (PRN)  | TETANUS TOXOID  | FIMBRIAE TYPES 2 AND 3| DIPHTHERIA TOXOID | FILAMENTOUS HAEMAGGLUTININ (FHA) 
ADAPALENE    
CLOPIDOGREL BESILATE
CLOPIDOGREL
BRENTUXIMAB VEDOTIN
ELECTROLYTES IN COMB WITH OTHER DRUGS
PREDNISOLONE SODIUM PHOSPHATE 
RIOCIGUAT
KETOCONAZOLE
FEBUXOSTAT
EPINEPHRINE
DOXORUBICIN
DOXORUBICIN 
TACROLIMUS  
METHYLPREDNISOLONE ACEPONATE
METHYLPREDNISOLONE 
HUMAN COAGULATION FACTOR VIII
TEMOZOLOMIDE
DESLORATADINE, PSEUDOEPHEDRINE
DESLORATADINE
FLUOXETINE 
PAROXETINE
EVEROLIMUS
AMLODIPINE BESYLA

In [14]:

def find_closest_match(query, choices):
    """
    Find the closest match for a query in a list of choices based on Levenshtein distance.
    :param query: The user's input.
    :param choices: A list of possible choices (e.g., medicine names or active substances).
    :return: The closest match.
    """
    closest_match = None
    shortest_distance = float('inf')

    for choice in choices:
        distance = lev.distance(query.lower(), choice.lower())
        if distance < shortest_distance:
            shortest_distance = distance
            closest_match = choice

    return closest_match

# Example usage
medicines = df['active_substance'].unique()
user_query = "IRINOTECAN"

closest_medicine = find_closest_match(user_query, medicines)
for med in medicines:
    if closest_medicine in med:
        print(med)

print("Closest match for", user_query, "is", closest_medicine)


IRINOTECAN HYDROCHLORIDE
IRINOTECAN HYDROCHLORIDE TRIHYDRATE 
IRINOTECAN HYDROCHLORIDE TRIHYDRATE
IRINOTECAN
Closest match for IRINOTECAN is IRINOTECAN


In [20]:
len(medicines)

1883

1. Поиск лекарства по названию => Аналоги с подобным действующим веществом
2. Поиск аналогов по действующему веществу