In [1]:
!pip install --upgrade --quiet  langchain-core langchain-community langchain-openai langchain-groq langchain neo4j python-dotenv


[notice] A new release of pip is available: 23.0.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
from langchain_community.graphs import Neo4jGraph
from langchain_groq import ChatGroq
from langchain.chains import GraphCypherQAChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
import os
from dotenv import load_dotenv
load_dotenv()

groq_api = os.getenv("GROQ_API_KEY")

# Neo4j 
neo4j_url = os.getenv("NEO4J_CONNECTION_URL")
neo4j_user = os.getenv("NEO4J_USER")
neo4j_password = os.getenv("NEO4J_PASSWORD")

# https://api.python.langchain.com/en/latest/graphs/langchain_community.graphs.neo4j_graph.Neo4jGraph.html
graph = Neo4jGraph(neo4j_url,neo4j_user,neo4j_password)

# Prepare Data

In [6]:
df

Unnamed: 0,id,nomor_registrasi,types,prepro_indications,cleaned_names
0,7471,TR143677741,,,teenmax
1,2022,TR153689541,,,mixsaga
2,2021,TR153689541,,,mixsaga
3,448,TR143381131,kapsul,,pasopati
4,2269,GKL2004529817B1,tablet,,etoricoxib
...,...,...,...,...,...
10053,4900,DKL9513011046A1,tetes,gangguan inflamasi ocular diindikasikan kortik...,ximex optixitrol eye drop
10054,7492,TI134347671,kapsul,"membantu memelihara kesehatan , membantu mence...",natures health grape seed extract
10055,9441,GKL1405049017A1,tablet,eyesight kitty kawaii mini olivia light brown ...,eyesight kitty kawaii mini olivia light brown ...
10056,7491,TI134347671,kapsul,membantu mencegah timbulnya enzim merusak jari...,natures health grape seed extract


In [7]:
bpom

Unnamed: 0.1,Unnamed: 0,nomor_registrasi,produk,nama_produk,bentuk_sediaan,komposisi,klaim
0,0,DTL2341200729A1,Obat,NIZORAL,KRIM 20 MG,- KETOCONAZOLE,
1,1,DBL1721207537A1,Obat,DULCOLACTOL,SIRUP,- LACTULOSE CONCENTRATE,
2,2,DKI0585600517A1,Obat,ARAVA,TABLET SALUT SELAPUT 20 MG,- LEFLUNOMIDE,
3,3,DKL1035307649A2,Obat,RONAZOL,INFUS 500 MG,- METRONIDAZOLE,
4,4,DKL0635305209A1,Obat,CYTROPIL 800,KAPLET SALUT SELAPUT 800 MG,- PIRACETAM,
...,...,...,...,...,...,...,...
40717,40717,TI134347481,Obat Tradisional,BU SHEN YI SHOU JIAO NANG,Botol @ 60 kapsul @ 0.3 gram,- ganoderma fructificatio,memelihara kesehatan
40718,40718,TR152587911,Obat Tradisional,HEBATOP,"Dus, Amplop @ 5 Blister @ 2 Kaplet",- Ekstrak Eurycomae Radix,Membantu memelihara stamina pria
40719,40719,TR033429701,Obat Tradisional,MAJAKANI PERAPAT,"Dus, 12 bungkus @ 6 pil @ 200 mg\nDus, 3 bungk...",- Areca Catechu Semen,Membantu mengurangi lendir yang berlebihan pad...
40720,40720,TR182310241,Obat Tradisional,OLIVIN,"Dus, 3 blister @ 10 kapsul",- Olea Europaea setara dengan hydroxytirosol 5 mg,membantu memelihara kesehatan


In [3]:
df = pd.read_csv('data_obat_fix_ordered (2).csv', sep = ',')
df = df[['id','nomor_registrasi','types','prepro_indications','cleaned_names']]
bpom = pd.read_csv('bpom_full.csv')
merge= pd.merge(df, bpom, on='nomor_registrasi', how='left')[['id','nomor_registrasi','types','prepro_indications','cleaned_names','nama_produk','komposisi','klaim']]
merge['komposisi'] = merge['komposisi'].astype(str).apply(lambda x: x.lower().replace('- ', '').replace('.', '|'))
merge['nama'] = merge['nama_produk'].astype(str).apply(lambda x: x.lower())
merge['prepro_indications'] = merge['prepro_indications'].fillna(merge['klaim'])
merge = merge[['id','nomor_registrasi','nama','types','komposisi','prepro_indications']]
merge.drop_duplicates(subset=['nomor_registrasi', 'nama'], inplace = True)
merge = merge.sort_values(by = 'id').reset_index(drop=True)
merge

Unnamed: 0,id,nomor_registrasi,nama,types,komposisi,prepro_indications
0,0,GKL1433531717A1,sildenafil citrate,tablet,sildenafil citrate,terapi disfungsi ereksi pria dewasa
1,5,DKI1690401417A1,viagra,tablet,sildenafil citrate,terapi disfungsi ereksi pria dewasa
2,7,DKL1333528917A1,ericfil,tablet,sildenafil citrate,ericfil diindikasikan mengobati disfungsi ereksi
3,9,DKI1973401817C1,cialis,tablet,tadalafil,pengobatan ketidakmampuan mencapai mempertahan...
4,12,DKL2233554519A1,ericfil 50,odf,sildenafil citrate,terapi disfungsi ereksi pria dewasa
...,...,...,...,...,...,...
4749,10053,TR132474581,sinargi astramuno,pil,astragalus membranaceus radix,membantu memelihara kesehatan
4750,10054,TR213603191,probiomag,madu,ekstrak cinamommum burmani cortex,"meringankan gangguan pencernaan , perut mual k..."
4751,10055,TR183613971,madu gurah fit,madu,abrus precatorius folium extract,membantu meredakan batuk
4752,10056,TR183622691,madu nurutenz,madu,apium graveolens herba ekstrak,meringankan gejala hipertensi


In [16]:
model = ChatGroq(model="llama3-70b-8192",groq_api_key = groq_api)

def process_indikasi(text):
  template = """
      Extract the types of diseases that can be cured from a given text.
      If more than one, separate them with '|', do not use any other symbols.
      Do not add any explanations, reasoning or preambles, if none exist then return 'None'.
      Your output should be in Bahasa Indonesia

      Example:

      text = "membantu meringankan tekanan darah ringan"
      helpful answer = "tekanan darah ringan"

      text = "pengobatan ketidakmampuan mencapai mempertahankan ereksi berhubungan seksual disfungsi ereksi pria dewasa , mengobati penderita pembesaran prostat jinak bph hipertensi pulmonal tekanan pembuluh darah arteri paru - paru"
      helpful answer = "disfungsi ereksi|pembesaran prostat jinak|hipertensi|tekanan pembuluh darah arteri paru-paru"

      text: {text}
      helpful answer:
      """

  prompt = ChatPromptTemplate.from_template(template = template)
  output_parser = StrOutputParser()

  chain = prompt | model | output_parser

  result = chain.invoke({"text": text})
  print(result)
  return result

In [14]:
result = process_indikasi("mengurangi kejadian aterosklerosis infark miokard , stroke kematian vaskular pasien aterosklerosis ditandai stroke , infark miokard penyakit arteri")

disfungsi ereksi|pembesaran prostat|hipertensi|infark miokard|stroke|penyakit arteri


In [None]:
merge

Unnamed: 0,id,nomor_registrasi,nama,types,komposisi,prepro_indications
0,0,GKL1433531717A1,sildenafil citrate,tablet,sildenafil citrate,terapi disfungsi ereksi pria dewasa
1,5,DKI1690401417A1,viagra,tablet,sildenafil citrate,terapi disfungsi ereksi pria dewasa
2,7,DKL1333528917A1,ericfil,tablet,sildenafil citrate,ericfil diindikasikan mengobati disfungsi ereksi
3,9,DKI1973401817C1,cialis,tablet,tadalafil,pengobatan ketidakmampuan mencapai mempertahan...
4,12,DKL2233554519A1,ericfil 50,odf,sildenafil citrate,terapi disfungsi ereksi pria dewasa
...,...,...,...,...,...,...
4749,10053,TR132474581,sinargi astramuno,pil,astragalus membranaceus radix,membantu memelihara kesehatan
4750,10054,TR213603191,probiomag,madu,ekstrak cinamommum burmani cortex,"meringankan gangguan pencernaan , perut mual k..."
4751,10055,TR183613971,madu gurah fit,madu,abrus precatorius folium extract,membantu meredakan batuk
4752,10056,TR183622691,madu nurutenz,madu,apium graveolens herba ekstrak,meringankan gejala hipertensi


In [25]:
sample = merge.sample(frac=10, replace = True).sample(n=100)
sample

Unnamed: 0,id,nomor_registrasi,nama,types,komposisi,prepro_indications
1621,3514,DKL1007810604A1,hufadon,kaplet,domperidone,terapi mual muntah disebabkan levodopa bromokr...
4175,8759,DKL0804520010A1,norizec,tablet,glimepiride,diabetes melitus tipe 2 non - insulin - depend...
4356,9097,DKI0367501817C1,diovan,tablet,valsartan,"hipertensi , gagal jantung , pasca infark miokard"
2165,4681,DKL1203813646A1,lentikular,tetes,pirenoxine,membantu mengatasi kekeringan mata
287,926,DKL0804130337A1,comtusi (rasa strawberry),sirup,oxomemazine| guaifenesin,"untuk meringankan gejala batuk alergi , batuk ..."
...,...,...,...,...,...,...
3324,6728,DKL0304421302B1,oscal,kapsul,calcitriol,osteoporosis pasca menopause osteodistrofi re...
3090,6354,DKL0609215704A1,rexavin,kaplet,griseofulvin,
1683,3612,DKL1733537682A1,monell,tablet,domperidone,"mual muntah akut , dispepsia gangguan pencerna..."
2463,5356,DKL0434601629A1,afucid,krim,fusidic acid,"impetigo kontagiosum , folikulitis superfisial..."


In [26]:
sample['indikasi'] = sample['prepro_indications'].map(process_indikasi)

mual muntah|kanker|dispepsia fungsional
diabetes melitus tipe 2|non-insulin-dependent type ii diabetes melitus|niddm
hipertensi|gagal jantung|infark miokard
kekeringan mata
batuk alergi|batuk berdahak
asma bronkial|bronkritis kronik|emfisema
alergi saluran pernapasan|alergi kulit|mata
None
None
batuk|demam|sakit kepala|hiding bersin-bersin
alergi rinitis nasal|rinitis intermiten persisten|pruritus|urtikaria khronik idiopathik
Here is the answer:

infeksi sal cerna|infeksi sal nafas|infeksi bakteri
hipertensi|penyakit arteri koroner
kandidiasis mukokutan khronis|infeksi jamur sistemik
hipertensi|gagal jantung kronik
batuk pilek
batuk
disfungsi ereksi|pembesaran prostat jinak|hipertensi|tekanan pembuluh darah arteri paru-paru
None
sakir kepala|sakit gigi|dismenore primer|nyeri trauma|nyeri otot|nyeri operasi
infeksi saluran kemih|otitis medium|radang rongga gendang telinga|faringitis|tonsilitis|bronkhitis akut|bronkhitis kronis
sakit kepala
dehidrasi ringan|diare|muntah
hipsterplasia pro

In [44]:
sample.to_csv('sample_skripsi.csv', index = False)

In [3]:
sample = pd.read_csv('sample_skripsi.csv')
sample

Unnamed: 0,id,nomor_registrasi,nama,tipe,komposisi,indikasi
0,3514,DKL1007810604A1,hufadon,kaplet,domperidone,mual muntah|kanker|dispepsia fungsional
1,8759,DKL0804520010A1,norizec,tablet,glimepiride,diabetes melitus tipe 2|non-insulin-dependent ...
2,9097,DKI0367501817C1,diovan,tablet,valsartan,hipertensi|gagal jantung|infark miokard
3,4681,DKL1203813646A1,lentikular,tetes,pirenoxine,kekeringan mata
4,926,DKL0804130337A1,comtusi (rasa strawberry),sirup,oxomemazine| guaifenesin,batuk alergi|batuk berdahak
...,...,...,...,...,...,...
95,6728,DKL0304421302B1,oscal,kapsul,calcitriol,osteoporosis|osteodistrofi renal|gangguan ginj...
96,6354,DKL0609215704A1,rexavin,kaplet,griseofulvin,
97,3612,DKL1733537682A1,monell,tablet,domperidone,mual muntah akut|mual muntah anak akibat kemot...
98,5356,DKL0434601629A1,afucid,krim,fusidic acid,impetigo kontagiosum|folikulitis superfisial|f...


In [12]:
data = pd.read_csv('data_obat_fix_ordered (2).csv')
# print(data[data['nomor_registrasi'] == 'DKL1007810604A1']['prepro_indications'].to_list()[0])
data

Unnamed: 0,id,nomor_registrasi,produk,name,komposisi,price,Aturan Pakai,Dosis,Indikasi Umum,Kemasan,Kontra Indikasi,Perhatian,types,prepro_komposisi,prepro_indications,cleaned_names,summary,word_count,max_length
0,7471,TR143677741,Obat Tradisional,Teenmax 350 ml,TEENMAX,Rp192.800 - Rp240.000,,,,"Dus, Botol @ 350 ml",,,,teenmax,,teenmax,teenmax,1,16
1,2022,TR153689541,Obat Tradisional,Mixsaga 10 ml,MIXSAGA,Rp25.000 - Rp33.700,,,,"Dus, Botol Plastik @ 10 ml",,,,mixsaga,,mixsaga,mixsaga,1,16
2,2021,TR153689541,Obat Tradisional,Mixsaga 10 ml,MIXSAGA,Rp25.000 - Rp33.700,,,,"Dus, Botol Plastik @ 10 ml",,,,mixsaga,,mixsaga,mixsaga,1,16
3,448,TR143381131,Obat Tradisional,Pasopati 15 Kapsul,PASOPATI,Rp52.300 - Rp84.800,,,,,,,kapsul,pasopati,,pasopati,pasopati kapsul,2,16
4,2269,GKL2004529817B1,Obat,Etoricoxib 120 mg 10 Tablet,- ETORICOXIB,Rp83.200 - Rp97.700,,,,"Dus, 3 Strip @ 10 Tablet Salut Selaput",,,tablet,etoricoxib,,etoricoxib,etoricoxib tablet,2,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10053,4900,DKL9513011046A1,Obat,Ximex Optixitrol Eye Drop 5 ml,- NEOMYCIN SULFATE. DEXAMETHASONE SODIUM PHOSP...,Rp26.300 - Rp27.400,teteskan pada mata yang terinfeksi,PENGGUNAAN OBAT INI HARUS SESUAI DENGAN PETUNJ...,gangguan inflamasi ocular yang diindikasikan k...,Botol @ 5 ml,Keratitis herpes simplex epithelial (dendritis...,HARUS DENGAN RESEP DOKTER. Penggunaan jangka p...,tetes,neomycin sulfate dexamethasone sodium phosphat...,gangguan inflamasi ocular diindikasikan kortik...,ximex optixitrol eye drop,ximex optixitrol eye drop tetes neomycin sulfa...,79,128
10054,7492,TI134347671,Obat Tradisional,Nature's Health Grape Seed Extract 60 Kapsul,NATURES HEALTH GRAPE SEED EXTRACT,Rp216.700 - Rp245.700,Sesudah makan,2 kapsul/hari,"Membantu memelihara kesehatan, membantu menceg...",Botol @ 60 Kapsul,,"Simpan dalam tempat tertutup rapat, Hindari da...",kapsul,natures health grape seed extract,"membantu memelihara kesehatan , membantu mence...",natures health grape seed extract,natures health grape seed extract kapsul memba...,81,128
10055,9441,GKL1405049017A1,Obat,Eyesight Kitty Kawaii Mini Olivia Light Brown ...,- KOMBINASI-VALSARTAN BLEND 32%,Rp106.200,Bersihkan tangan sebelum menggunakan softlens....,,Eyesight Kitty Kawaii Mini Olivia Light Brown ...,Box @ 2 Soft Contact Lense.,,Produk ini merupakan Softlens Minus yang dituj...,tablet,kombinasi-valsartan blend 32%,eyesight kitty kawaii mini olivia light brown ...,eyesight kitty kawaii mini olivia light brown ...,eyesight kitty kawaii mini olivia light brown ...,81,128
10056,7491,TI134347671,Obat Tradisional,Nature's Health Grape Seed Extract 30 Kapsul,NATURES HEALTH GRAPE SEED EXTRACT,Rp161.000 - Rp281.300,,2 kapsul per hari,Membantu mencegah timbulnya enzim yang merusak...,Botol @ 30 Kapsul,,,kapsul,natures health grape seed extract,membantu mencegah timbulnya enzim merusak jari...,natures health grape seed extract,natures health grape seed extract kapsul memba...,81,128


In [36]:
import numpy as np

def filter_parts(value):
    if value == 'None':
        return np.nan
    split_values = value.lower().split(':')
    filtered_parts = [part for part in split_values if '|' in part]
    return '|'.join(filtered_parts) if filtered_parts else value.lower()

# Create a new column with filtered parts
sample['indikasi'] = sample['indikasi'].map(filter_parts)
sample['indikasi'] = sample['indikasi'].str.replace('"', '').str.replace('\n', '')

In [37]:
sample

Unnamed: 0,id,nomor_registrasi,nama,types,komposisi,prepro_indications,indikasi,filtered_indikasi
1621,3514,DKL1007810604A1,hufadon,kaplet,domperidone,terapi mual muntah disebabkan levodopa bromokr...,mual muntah|kanker|dispepsia fungsional,mual muntah|kanker|dispepsia fungsional
4175,8759,DKL0804520010A1,norizec,tablet,glimepiride,diabetes melitus tipe 2 non - insulin - depend...,diabetes melitus tipe 2|non-insulin-dependent ...,diabetes melitus tipe 2|non-insulin-dependent ...
4356,9097,DKI0367501817C1,diovan,tablet,valsartan,"hipertensi , gagal jantung , pasca infark miokard",hipertensi|gagal jantung|infark miokard,hipertensi|gagal jantung|infark miokard
2165,4681,DKL1203813646A1,lentikular,tetes,pirenoxine,membantu mengatasi kekeringan mata,kekeringan mata,kekeringan mata
287,926,DKL0804130337A1,comtusi (rasa strawberry),sirup,oxomemazine| guaifenesin,"untuk meringankan gejala batuk alergi , batuk ...",batuk alergi|batuk berdahak,batuk alergi|batuk berdahak
...,...,...,...,...,...,...,...,...
3324,6728,DKL0304421302B1,oscal,kapsul,calcitriol,osteoporosis pasca menopause osteodistrofi re...,osteoporosis|osteodistrofi renal|gangguan ginj...,osteoporosis|osteodistrofi renal|gangguan ginj...
3090,6354,DKL0609215704A1,rexavin,kaplet,griseofulvin,,,
1683,3612,DKL1733537682A1,monell,tablet,domperidone,"mual muntah akut , dispepsia gangguan pencerna...",mual muntah akut|mual muntah anak akibat kemot...,mual muntah akut|mual muntah anak akibat kemot...
2463,5356,DKL0434601629A1,afucid,krim,fusidic acid,"impetigo kontagiosum , folikulitis superfisial...",impetigo kontagiosum|folikulitis superfisial|f...,impetigo kontagiosum|folikulitis superfisial|f...


In [38]:
sample.rename(columns={'types': 'tipe'}, inplace=True)
sample.drop(['prepro_indications'], axis=1, inplace=True)
sample

In [43]:
sample

Unnamed: 0,id,nomor_registrasi,nama,tipe,komposisi,indikasi
1621,3514,DKL1007810604A1,hufadon,kaplet,domperidone,mual muntah|kanker|dispepsia fungsional
4175,8759,DKL0804520010A1,norizec,tablet,glimepiride,diabetes melitus tipe 2|non-insulin-dependent ...
4356,9097,DKI0367501817C1,diovan,tablet,valsartan,hipertensi|gagal jantung|infark miokard
2165,4681,DKL1203813646A1,lentikular,tetes,pirenoxine,kekeringan mata
287,926,DKL0804130337A1,comtusi (rasa strawberry),sirup,oxomemazine| guaifenesin,batuk alergi|batuk berdahak
...,...,...,...,...,...,...
3324,6728,DKL0304421302B1,oscal,kapsul,calcitriol,osteoporosis|osteodistrofi renal|gangguan ginj...
3090,6354,DKL0609215704A1,rexavin,kaplet,griseofulvin,
1683,3612,DKL1733537682A1,monell,tablet,domperidone,mual muntah akut|mual muntah anak akibat kemot...
2463,5356,DKL0434601629A1,afucid,krim,fusidic acid,impetigo kontagiosum|folikulitis superfisial|f...


# Insert to Neo4J

In [3]:
graph.refresh_schema()
print(graph.schema)

Node properties:

Relationship properties:

The relationships:



In [17]:
df = pd.read_csv('sample_skripsi.csv')
df.fillna('-',inplace= True)
df.head(15)
df.to_csv('sample_skripsi.csv', inplace = True)

Unnamed: 0,id,nomor_registrasi,nama,tipe,komposisi,indikasi
0,3514,DKL1007810604A1,hufadon,kaplet,domperidone,mual muntah|kanker|dispepsia fungsional
1,8759,DKL0804520010A1,norizec,tablet,glimepiride,diabetes melitus tipe 2|non-insulin-dependent ...
2,9097,DKI0367501817C1,diovan,tablet,valsartan,hipertensi|gagal jantung|infark miokard
3,4681,DKL1203813646A1,lentikular,tetes,pirenoxine,kekeringan mata
4,926,DKL0804130337A1,comtusi (rasa strawberry),sirup,oxomemazine| guaifenesin,batuk alergi|batuk berdahak
5,552,DKL7804407610B1,salbron,tablet,salbutamol sulfate,asma bronkial|bronkritis kronik|emfisema
6,4170,DKL1208018337A1,bdm,sirup,betamethasone,alergi saluran pernapasan|alergi kulit|mata
7,7873,TR132673741,jagak - bebas gula,-,-,-
8,1496,DTL2032209837A1,nipe expectorant adult,sirup,guaifenesin,-
9,1022,DTL0504129337A1,obh combi batuk + flu rasa madu,sirup,succus liquiritiae| paracetamol| ammonium chlo...,batuk|demam|sakit kepala|hiding bersin-bersin


In [19]:
df

Unnamed: 0,id,nomor_registrasi,nama,tipe,komposisi,indikasi
0,3514,DKL1007810604A1,hufadon,kaplet,domperidone,mual muntah|kanker|dispepsia fungsional
1,8759,DKL0804520010A1,norizec,tablet,glimepiride,diabetes melitus tipe 2|non-insulin-dependent ...
2,9097,DKI0367501817C1,diovan,tablet,valsartan,hipertensi|gagal jantung|infark miokard
3,4681,DKL1203813646A1,lentikular,tetes,pirenoxine,kekeringan mata
4,926,DKL0804130337A1,comtusi (rasa strawberry),sirup,oxomemazine| guaifenesin,batuk alergi|batuk berdahak
...,...,...,...,...,...,...
95,6728,DKL0304421302B1,oscal,kapsul,calcitriol,osteoporosis|osteodistrofi renal|gangguan ginj...
96,6354,DKL0609215704A1,rexavin,kaplet,griseofulvin,-
97,3612,DKL1733537682A1,monell,tablet,domperidone,mual muntah akut|mual muntah anak akibat kemot...
98,5356,DKL0434601629A1,afucid,krim,fusidic acid,impetigo kontagiosum|folikulitis superfisial|f...


In [7]:
# insert_query = """
# LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/projectwilsen/skripsi/main/sample_skripsi.csv' AS row 
# MERGE (obat:Obat {nama: row.nama, id: row.id, no_regis: row.nomor_registrasi}) 
# FOREACH (kom in CASE WHEN row.komposisi IS NOT NULL THEN split(row.komposisi, '|') ELSE [] END | 
#     MERGE (komposisi:Komposisi {nama:trim(kom)}) 
#     MERGE (obat)-[:MENGANDUNG]->(komposisi)) 
# FOREACH (ind in CASE WHEN row.indikasi IS NOT NULL THEN split(row.indikasi, '|') ELSE [] END | 
#     MERGE (indikasi:Indikasi {nama:trim(ind)}) 
#     MERGE (obat)-[:MENGOBATI]->(indikasi)) 
# FOREACH (t in CASE WHEN row.tipe IS NOT NULL THEN [row.tipe] ELSE [] END | 
#     MERGE (tipe:Tipe {nama:t}) 
#     MERGE (obat)-[:TIPE]->(tipe))
# """

# insert_query = """
# LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/projectwilsen/skripsi/main/sample_skripsi.csv' AS row 
# MERGE (obat:Obat {nama: row.nama, id: row.id, no_regis: row.nomor_registrasi}) 
# WITH row, obat, 
#      CASE WHEN row.komposisi IS NOT NULL THEN split(row.komposisi, '|') ELSE [] END AS komposisiList,
#      size(CASE WHEN row.komposisi IS NOT NULL THEN split(row.komposisi, '|') ELSE [] END) AS komposisiCount,
#      CASE WHEN row.indikasi IS NOT NULL THEN split(row.indikasi, '|') ELSE [] END AS indikasiList,
#      CASE WHEN row.tipe IS NOT NULL THEN [row.tipe] ELSE [] END AS tipeList

# FOREACH (kom in komposisiList |
#     MERGE (komposisi:Komposisi {nama: trim(kom)})
#     MERGE (obat)-[r:MENGANDUNG]->(komposisi)
#     SET r.weight = 1.0 / komposisiCount
# )

# FOREACH (ind in indikasiList |
#     MERGE (indikasi:Indikasi {nama: trim(ind)})
#     MERGE (obat)-[:MENGOBATI]->(indikasi)
# )

# FOREACH (t in tipeList |
#     MERGE (tipe:Tipe {nama: t})
#     MERGE (obat)-[:TIPE]->(tipe)
# )

# """

insert_query = """
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/projectwilsen/skripsi/main/sample_skripsi.csv' AS row 
MERGE (obat:Obat {nama: row.nama, id: row.id, no_regis: row.nomor_registrasi, tipe: COALESCE(row.tipe, 'None')}) 
WITH row, obat, 
     CASE WHEN row.komposisi IS NOT NULL THEN split(row.komposisi, '|') ELSE [] END AS komposisiList,
     size(CASE WHEN row.komposisi IS NOT NULL THEN split(row.komposisi, '|') ELSE [] END) AS komposisiCount,
     CASE WHEN row.indikasi IS NOT NULL THEN split(row.indikasi, '|') ELSE [] END AS indikasiList

FOREACH (kom in komposisiList |
    MERGE (komposisi:Komposisi {nama: trim(kom)})
    MERGE (obat)-[r:MENGANDUNG]->(komposisi)
    SET r.weight = 1.0 / komposisiCount
)

FOREACH (ind in indikasiList |
    MERGE (indikasi:Indikasi {nama: trim(ind)})
    MERGE (obat)-[:MENGOBATI]->(indikasi)
)


"""

graph.query(insert_query)

[]

In [8]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Obat {id: STRING, nama: STRING, no_regis: STRING, tipe: STRING}
Komposisi {nama: STRING}
Indikasi {nama: STRING}
Relationship properties:
MENGANDUNG {weight: FLOAT}
The relationships:
(:Obat)-[:MENGANDUNG]->(:Komposisi)
(:Obat)-[:MENGOBATI]->(:Indikasi)


In [7]:
graph.query(""" MATCH (komposisi:Komposisi {nama: 'paracetamol'})<-[r:MENGANDUNG]-(obat:Obat)
RETURN obat.nama AS Medicine,obat.id, r.weight AS Weight
ORDER BY r.weight DESC""")

[{'Medicine': 'pacetik-500', 'obat.id': '2621', 'Weight': 1.0},
 {'Medicine': 'bodrex demam', 'obat.id': '2243', 'Weight': 1.0},
 {'Medicine': 'panadol', 'obat.id': '2086', 'Weight': 1.0},
 {'Medicine': 'xepamol', 'obat.id': '3001', 'Weight': 1.0},
 {'Medicine': 'turpas forte', 'obat.id': '2999', 'Weight': 1.0},
 {'Medicine': 'spasmal', 'obat.id': '3657', 'Weight': 0.5},
 {'Medicine': 'hustab', 'obat.id': '1405', 'Weight': 0.3333333333333333},
 {'Medicine': 'flucadex pe flu & batuk berdahak',
  'obat.id': '1337',
  'Weight': 0.3333333333333333},
 {'Medicine': 'flunax', 'obat.id': '1347', 'Weight': 0.3333333333333333},
 {'Medicine': 'ternix plus', 'obat.id': '1652', 'Weight': 0.25},
 {'Medicine': 'obh combi batuk + flu rasa madu',
  'obat.id': '1022',
  'Weight': 0.2}]

# Chat

In [26]:
model = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key = groq_api)
chain = GraphCypherQAChain.from_llm(graph=graph, llm=model, verbose=True)

In [27]:

questions = ["Apa obat untuk batuk?"]

for q in questions:
    print("\n", q)
    try:
        result = chain.invoke(q)['result']
        print(result)
    except:
        pass


 Apa obat untuk batuk?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (o:Obat)-[:MENGOBATI]->(i:Indikasi) WHERE i.nama = "Batuk" RETURN o.nama;[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I don't know the answer.


# ER Fasttext

In [None]:
import fasttext

model = fasttext.load_model('cc.id.300.bin')

In [1]:
import fasttext
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def load_fasttext_model(model_path):
    """
    Load a FastText model from a given file path.
    """
    return fasttext.load_model(model_path)

def get_sentence_vector(model, sentence):
    """
    Get a vector representation of a sentence by averaging the word vectors.
    """
    words = sentence.split()
    word_vectors = [model.get_word_vector(word) for word in words if word in model.words]
    if len(word_vectors) == 0:
        return np.zeros((model.get_dimension(),))
    return np.mean(word_vectors, axis=0)

def map_names_to_standardized_fasttext(model, standardized_names, names_to_map, threshold=0.5):
    """
    Maps names from `names_to_map` to `standardized_names` using FastText embeddings and cosine similarity.
    
    Parameters:
    - model: Loaded FastText model.
    - standardized_names (list of str): List of standardized names.
    - names_to_map (list of str): List of names that need to be mapped.
    - threshold (float): Minimum cosine similarity score to consider a match.
    
    Returns:
    - tuple: 
        1. dict: A dictionary where keys are standardized names, and values are lists of mapped names.
        2. list: A list of names from `names_to_map` that haven't been mapped.
    """
    # Compute vectors for standardized names
    standardized_vectors = np.array([get_sentence_vector(model, name) for name in standardized_names])
    
    # Initialize a dictionary to store results
    mapping_dict = {name: [] for name in standardized_names}
    unmapped_names = []
    
    # Iterate over each name to map
    for name in names_to_map:
        # Get vector for the name to be mapped
        name_vector = get_sentence_vector(model, name).reshape(1, -1)
        
        # Calculate cosine similarities
        similarities = cosine_similarity(name_vector, standardized_vectors).flatten()
        
        # Find the index of the standardized name with the highest similarity
        best_match_index = np.argmax(similarities)
        best_match_score = similarities[best_match_index]
        
        # If the similarity is above the threshold, map it to the best match
        if best_match_score >= threshold:
            standardized_name = standardized_names[best_match_index]
            mapping_dict[standardized_name].append(name)
        else:
            # Add to unmapped names if no match found
            unmapped_names.append(name)
    
    return mapping_dict, unmapped_names

# Example usage:
model_path = 'cc.id.300.bin'  # Replace with your actual model path
fasttext_model = load_fasttext_model(model_path)

standardized_names = ["Hypertension", "Diabetes", "Cardiovascular Disease"]
names_to_map = ["High blood pressure", "Type 2 diabetes", "Heart disease", "Asthma"]

mapped_dict, unmapped_diseases = map_names_to_standardized_fasttext(fasttext_model, standardized_names, names_to_map, threshold=0.3)
print("Mapped Names:", mapped_dict)
print("Unmapped Names:", unmapped_diseases)


ModuleNotFoundError: No module named 'fasttext'