In [None]:
import json
from elasticsearch import Elasticsearch

# Connect to Elasticsearch
es = Elasticsearch(
    "https://localhost:9200",
    # ca_certs=".\certs\ca\ca.crt",
    basic_auth=("elastic", "hammasir"),
    verify_certs=False,
    ssl_show_warn=False,
)
es.info()

In [None]:
with open("../data/processed/base_dataset.json") as f:
    json_data = json.load(f)

In [None]:
import pandas as pd

data = pd.read_csv("../data/processed/base_dataset.csv")

In [None]:
import ast


def safe_literal_eval(val):
    try:
        if isinstance(val, str):
            return ast.literal_eval(val)
        else:
            return val  # If it's not a string, return the original value
    except (ValueError, SyntaxError):
        # Return None or a default value if evaluation fails
        return None

In [None]:
data["clinic"] = data["clinic"].apply(safe_literal_eval)
# data["insurances"] = data["insurances"].apply(safe_literal_eval)

In [None]:
import numpy as np

data = data.replace(np.nan, None)

In [None]:
mappings = {
    "properties": {
        "gender": {"type": "keyword"},
        "expertise": {"type": "text"},
        "title": {"type": "text"},
        "star": {"type": "float"},
        "rates_count": {"type": "integer"},
        "number_of_visits": {"type": "integer"},
        "view": {"type": "text"},
        "insurances": {"type": "text"},
        "experience": {"type": "integer"},
        "doctor_encounter": {"type": "float"},
        "explanation_of_issue": {"type": "float"},
        "quality_of_treatment": {"type": "float"},
        "comments_count": {"type": "integer"},
        "waiting_time": {"type": "float"},
        "clinic": {"type": "object"},
    }
}

In [None]:
es.indices.delete(index="doctors")

In [None]:
es.indices.create(
    index="doctors",
    mappings=mappings,
    settings={
        "analysis": {
            "char_filter": {
                "zero_width_spaces": {
                    "type": "mapping",
                    "mappings": ["\\u200C=>\\u0020", "-=>\\u0020", "_=>\\u0020"],
                }
            },
            "filter": {
                "persian_stop": {"type": "stop", "stopwords": "_persian_"},
                "persian_stemmer": {"type": "stemmer", "language": "persian"},
                "remove_suffix_m": {
                    "type": "pattern_replace",
                    "pattern": "م$",
                    "replacement": "",
                },
            },
            "analyzer": {
                "rebuilt_persian": {
                    "tokenizer": "standard",
                    "char_filter": ["zero_width_spaces"],
                    "filter": [
                        "lowercase",
                        "decimal_digit",
                        "arabic_normalization",
                        "persian_normalization",
                        "persian_stop",
                        "remove_suffix_m",
                        "persian_stemmer",
                    ],
                }
            },
        }
    },
)

In [None]:
for i, row in data.iterrows():
    doc = {
        "expertise": row["display_expertise"],
        "gender": row["gender"],
        "experience": row["experience"],
        "title": row["title"],
        "star": row["star"],
        "rates_count": row["rates_count"],
        "number_of_visits": row["number_of_visits"],
        "view": row["view"],
        "insurances": row["insurances"],
        "doctor_encounter": row["doctor_encounter"],
        "explanation_of_issue": row["explanation_of_issue"],
        "quality_of_treatment": row["quality_of_treatment"],
        "comments_count": row["comments_count"],
        "waiting_time": row["waiting_time"],
        "clinic": row["clinic"],
    }

    es.index(index="doctors", id=i, document=doc)

In [None]:
es.indices.refresh(index="doctors")
es.cat.count(index="doctors", format="json")

In [None]:
es.search(index="doctors", q="احمداباد")

In [None]:
def search_doctors(search_params, index_name="doctors"):
    query = {"bool": {"must": [], "should": [], "filter": []}}
    for field in ["problem", "expertise"]:
        if field in search_params:
            query["bool"]["should"].extend(
                [{"match": {"expertise": e}} for e in search_params[field]]
            )

    if "city" in search_params:
        query["bool"]["filter"].append(
            {"term": {"clinic.city": search_params["city"][0]}}
        )

    if "gender" in search_params:
        query["bool"]["filter"].append({"term": {"gender": search_params["gender"][0]}})
    print(query)

    response = es.search(index=index_name, body={"query": query})

    return response["hits"]["hits"]

In [None]:
# Example usage
search_params = {
    "city": ["مشهد"],
    "gender": ["F"],
    "expertise": ["قلب"],
}

results = search_doctors(search_params)

# Display the results
for result in results:
    print(result["_source"])

len(results)

In [None]:
json.dumps({'bool': {'must': [], 'should': [{'match': {'expertise': 'قلب'}}], 'filter': [{'term': {'clinic.city': 'مشهد'}}, {'term': {'gender': 'F'}}]}}
,ensure_ascii=False)