In [3]:
import json
from elasticsearch import Elasticsearch

# Connect to Elasticsearch
es = Elasticsearch(
    "https://localhost:9200",
    # ca_certs=".\certs\ca\ca.crt",
    basic_auth=("elastic", "hammasir"),
    verify_certs=False,
    ssl_show_warn=False,
)
es.info()

ObjectApiResponse({'name': 'es01', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'toAtBBXbQKm8i7sXCH722Q', 'version': {'number': '8.15.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179', 'build_date': '2024-08-05T10:05:34.233336849Z', 'build_snapshot': False, 'lucene_version': '9.11.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [None]:
with open("../data/processed/base_dataset.json") as f:
    json_data = json.load(f)

In [31]:
import pandas as pd

data = pd.read_csv("../data/processed/base_dataset.csv")

In [32]:
import ast


def safe_literal_eval(val):
    try:
        if isinstance(val, str):
            return ast.literal_eval(val)
        else:
            return val  # If it's not a string, return the original value
    except (ValueError, SyntaxError):
        # Return None or a default value if evaluation fails
        return None

In [33]:
data["clinic"] = data["clinic"].apply(safe_literal_eval)
# data["insurances"] = data["insurances"].apply(safe_literal_eval)

In [34]:
import numpy as np

data = data.replace(np.nan, None)

In [35]:
mappings = {
    "properties": {
        "gender": {"type": "text"},
        "expertise": {"type": "text"},
        "title": {"type": "text"},
        "star": {"type": "float"},
        "rates_count": {"type": "integer"},
        "number_of_visits": {"type": "integer"},
        "view": {"type": "text"},
        "insurances": {"type": "text"},
        "experience": {"type": "integer"},
        "doctor_encounter": {"type": "float"},
        "explanation_of_issue": {"type": "float"},
        "quality_of_treatment": {"type": "float"},
        "comments_count": {"type": "integer"},
        "waiting_time": {"type": "float"},
        "clinic": {"type": "object"},
    }
}

In [36]:
es.indices.delete(index="doctors")

ObjectApiResponse({'acknowledged': True})

In [37]:
es.indices.create(
    index="doctors",
    mappings=mappings,
    settings={
        "analysis": {
            "char_filter": {
                "zero_width_spaces": {
                    "type": "mapping",
                    "mappings": ["\\u200C=>\\u0020"],
                }
            },
            "filter": {"persian_stop": {"type": "stop", "stopwords": "_persian_"},
                       "persian_stemmer": {"type": "stemmer", "language": "persian"}
                },
            "analyzer": {
                "rebuilt_persian": {
                    "tokenizer": "standard",
                    "char_filter": ["zero_width_spaces"],
                    "filter": [
                        "lowercase",
                        "decimal_digit",
                        "arabic_normalization",
                        "persian_normalization",
                        "persian_stop",
                        "persian_stemmer"
                    ],
                }
                
            },
        }
    },
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'doctors'})

In [38]:
for i, row in data.iterrows():
    doc = {
        "expertise": row["display_expertise"],
        "gender": row["gender"],
        "experience": row["experience"],
        "title": row["title"],
        "star": row["star"],
        "rates_count": row["rates_count"],
        "number_of_visits": row["number_of_visits"],
        "view": row["view"],
        "insurances": row["insurances"],
        "doctor_encounter": row["doctor_encounter"],
        "explanation_of_issue": row["explanation_of_issue"],
        "quality_of_treatment": row["quality_of_treatment"],
        "comments_count": row["comments_count"],
        "waiting_time": row["waiting_time"],
        "clinic": row["clinic"],
    }

    es.index(index="doctors", id=i, document=doc)

In [39]:
es.indices.refresh(index="doctors")
es.cat.count(index="doctors", format="json")

ListApiResponse([{'epoch': '1725367511', 'timestamp': '12:45:11', 'count': '16296'}])

In [40]:
es.search(index="doctors", q="احمداباد")

ObjectApiResponse({'took': 119, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 209, 'relation': 'eq'}, 'max_score': 6.481121, 'hits': [{'_index': 'doctors', '_id': '10923', '_score': 6.481121, '_source': {'expertise': 'متخصص زنان و زایمان, دکترای حرفه\u200cای پزشکی', 'gender': 'F', 'experience': 43.0, 'title': 'نفیسه ثقفی', 'star': 4.25, 'rates_count': 122.0, 'number_of_visits': 17197.0, 'view': '17K', 'insurances': '[]', 'doctor_encounter': 4.40625, 'explanation_of_issue': 3.939393939393939, 'quality_of_treatment': 4.336633663366337, 'comments_count': 40.0, 'waiting_time': 0.280701754385964, 'clinic': [{'city': 'مشهد', 'number': '8406017, 8402767', 'address': 'محل کار: احمداباد-بیمارستان قایم | مطب: احمداباد-پرستار1 - ساختمان 4', 'province_name': 'خراسان رضوی'}]}}, {'_index': 'doctors', '_id': '12385', '_score': 6.3974333, '_source': {'expertise': 'متخصص بیماری های داخلی, دکترای حرفه ای پزشکی', 'gender': 'M', 'exper

In [41]:
def search_doctors(search_params, index_name="doctors"):
    # Initialize the base query
    query = {
        "bool": {
            "must": [],
            "should": [],
            "filter": []
        }
    }
    
    multi_match_fields = []

    # Add conditions based on the presence of each field in the search_params
    if "problem" in search_params:
        multi_match_fields.append("specialty")
    
    if "expertise" in search_params:
        multi_match_fields.append("expertise")  # Boost the expertise field by 3
    
    if "city" in search_params:
        # Assuming city is a top-level field (adjust as necessary)
        query["bool"]["filter"].append({
            "term": {
                "clinic.city": search_params["city"]
            }
        })
        
    if "gender" in search_params:
        query["bool"]["filter"].append({
            "term": {
                "gender": search_params["gender"]
            }
        })

    # Only add the multi_match part if there are fields to search
    if multi_match_fields:
        query["bool"]["must"].append({
            "multi_match": {
                "query": search_params.get("problem", search_params.get("expertise", "")),
                "fields": multi_match_fields
            }
        })
    # Execute the search query
    response = es.search(index=index_name, body={"query": query})

    # Return the search results
    return response['hits']['hits']

In [48]:
# Example usage
search_params = {
    "city": "مشهد",
    "gender": "f",
    "expertise": "مغز",
}

results = search_doctors(search_params)

# Display the results
for result in results:
    print(result["_source"])

len(results)

{'expertise': 'مغز و اعصاب', 'gender': 'F', 'experience': None, 'title': 'نیلوفر رمضانی', 'star': 4.6, 'rates_count': 55.0, 'number_of_visits': 9556.0, 'view': '9K', 'insurances': '[]', 'doctor_encounter': 4.6, 'explanation_of_issue': 4.7, 'quality_of_treatment': 4.5, 'comments_count': 38.0, 'waiting_time': 1.0, 'clinic': [{'city': 'مشهد', 'number': '09368127879', 'address': 'احمد اباد ۶- چهار راه سمت راست -روبه روی خیابان صبوری - ساختمان پزشکان ۳۳ طبقه اول', 'province_name': 'خراسان رضوی'}]}
{'expertise': 'متخصص جراحی مغز و اعصاب, متخصص جراحی مغز و اعصاب', 'gender': 'F', 'experience': None, 'title': 'مهدی آبیلی', 'star': 3.6, 'rates_count': 81.0, 'number_of_visits': 7678.0, 'view': '7K', 'insurances': '[]', 'doctor_encounter': 4.5, 'explanation_of_issue': 4.1, 'quality_of_treatment': 4.5, 'comments_count': 12.0, 'waiting_time': 0.0, 'clinic': [{'city': 'مشهد', 'number': None, 'address': 'جاده سنتو - مقابل سه راه فردوسی : مـرکـزآموزشـی، پژوهشـی و درمـانـی طـالـقـانـی', 'province_name':

10