In [71]:
from elasticsearch import Elasticsearch, RequestsHttpConnection, helpers
from requests_aws4auth import AWS4Auth
import boto3
import json
from datetime import datetime

In [60]:
# Constant
index_suffix = "_20210623"

In [57]:
# Check Connection

host = 'search-dev-search-es-63-v2-dkl6ndeea7eens5fs2b2nlncbe.ap-northeast-2.es.amazonaws.com'
region = 'ap-northeast-2'

service = 'es'
session = boto3.Session()
credentials = session.get_credentials()
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token)
# awsauth = AWS4Auth(access_key, secret_key, region, service)

es = Elasticsearch(
    hosts = [{'host': host, 'port': 443}],
    http_auth = awsauth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    timeout=30, max_retries=10, retry_on_timeout=True
)

print(es.info())

{'name': 'WhY2Tkc', 'cluster_name': '175979101058:dev-search-es-63-v2', 'cluster_uuid': 'd-ksNi6XSbS-kjWr14qYhw', 'version': {'number': '6.3.1', 'build_flavor': 'oss', 'build_type': 'zip', 'build_hash': 'Unknown', 'build_date': '2020-09-14T07:50:43.115348Z', 'build_snapshot': False, 'lucene_version': '7.3.1', 'minimum_wire_compatibility_version': '5.6.0', 'minimum_index_compatibility_version': '5.0.0'}, 'tagline': 'You Know, for Search'}


In [58]:
        
settings = {
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 0,
    "index.search.slowlog.threshold.query.warn": "3s",
    "index": {
      "analysis": {
        "analyzer": {
          "korean_synonyms": {
            "type":"custom",
            "tokenizer":"seunjeon",
            "filter": [
              "lowercase",
              "stop",
              "synonym_filter"
            ]
          },
          "korean_no_synonyms": {
            "type":"custom",
            "tokenizer":"seunjeon",
            "filter": [
              "lowercase",
              "stop"
            ]
          },
          "korean_searcher": {
            "type":"custom",
            "tokenizer":"seunjeon",
            "filter": [
              "lowercase",
              "stop"
            ]
          }
        },
        "tokenizer": {
          "seunjeon": {
            "type": "seunjeon_tokenizer",
            "index_eojeol": False,
            "decompound": True,
            "user_dict_path": "analyzers/F58588551"
          }
        },
        "filter": {
          "synonym_filter": {
            "type": "synonym",
            "synonyms_path": "analyzers/F111096865"
          }
        }
      }
    }
  }
}


def make_index(es, index_name):
    if es.indices.exists(index=index_name):
        es.indices.delete(index=index_name)
        print("delete index :", index_name)
    es.indices.create(index=index_name, body=settings)
    print("create index :", index_name)


In [75]:
def indexing_dump(es, index_name, file_path, make_index_yn=True, mapping=None):
    if make_index_yn:
        make_index(es, index_name)
    
    if mapping is not None:
        resp = es.indices.put_mapping(index=index_name, body={**mapping}, doc_type="_doc")
        print(resp)

    if file_path is not None:
        with open(file_path) as json_file:
            json_docs = json.load(json_file)
            ok, err = helpers.bulk(es, json_docs, index=index_name, doc_type="_doc")
            print(ok, err);
            json_file.close()
            
def get_doc_from_line(line):
    doc = json.loads(line)
    if (doc["_op"] == "D"):
        doc["_op_type"] = "delete"
    else:
        doc["_op_type"] = "index"
    del doc["_op"]
    
    if (doc["start_time"] == ""):
        doc["start_time"] = None
    if (doc["end_time"] == ""):
        doc["end_time"] = None
    return doc

def indexing_dump_jsonline(es, index_name, file_path, make_index_yn=True, mapping=None):
    print("indexing start: {}".format(datetime.today()))
    if make_index_yn:
        make_index(es, index_name)
    
    if mapping is not None:
        resp = es.indices.put_mapping(index=index_name, body={**mapping}, doc_type="_doc")
        print(resp)

    f = open(file_path)                                                                                       
    while True:
        lines = f.readlines(10 * 1024 * 1024) # 10M 단위로 읽기                                                             
        if not lines:                                                                                                      
            break
        json_docs = [get_doc_from_line(line) for line in lines]
        ok, err = helpers.bulk(es, json_docs, index=index_name, doc_type="_doc")
        print(ok, err);
    print("indexing end: {}".format(datetime.today()))
            
def switch_alais(es, alais_name, new_index_name):
    indices = list()
    if es.indices.exists_alias(name=alais_name):
        indices=es.indices.get_alias(alais_name).keys()
    
    update_alias = list()
    for index in indices:
        update_alias.append({"remove": {"alias": alais_name, "index": index}})
    update_alias.append({"add": {"alias": alais_name, "index": new_index_name}})
    action = { "actions": update_alias }

    if update_alias:
        # repoint the alias to point to the newly created index
        result = es.indices.update_aliases(
            body={
                **action
            }
        )
        print(result)

In [5]:
# indexing dump - aliance, benefit, place, coupon

alias_alliance_index_name = 'ocb_keyword_alliance52_v2'
alias_benefit_index_name = 'ocb_keyword_benefit52_v2'
alias_place_index_name = 'ocb_keyword_place52_v2'
alias_coupon_index_name = 'ocb_keyword_coupon52_v2'

alliance_index_name = alias_alliance_index_name + index_suffix
benefit_index_name = alias_benefit_index_name + index_suffix
place_index_name = alias_place_index_name + index_suffix
coupon_index_name = alias_coupon_index_name + index_suffix

mapping_alliance = {
    "properties": {
      "alliance_nm": {
        "type": "text",
        "fields": {
          "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"},
          "no_syn" : {"type" : "text", "analyzer" : "korean_no_synonyms", "search_analyzer": "korean_searcher"}
        },
        "analyzer": "korean_synonyms",
        "search_analyzer": "korean_searcher"
      }
    }
}
indexing_dump(es, alliance_index_name, None, mapping=mapping_alliance)

mapping_benefit = {
    "properties": {
      "text": {
        "type": "text",
        "fields": {
          "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"},
          "no_syn" : {"type" : "text", "analyzer" : "korean_no_synonyms", "search_analyzer": "korean_searcher"}
        },
        "analyzer": "korean_synonyms",
        "search_analyzer": "korean_searcher"
      },
      "keyword": {
        "type": "text",
        "fields": {
          "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"},
          "no_syn" : {"type" : "text", "analyzer" : "korean_no_synonyms", "search_analyzer": "korean_searcher"}
        },
        "analyzer": "korean_synonyms",
        "search_analyzer": "korean_searcher"
      },
      "biz_nm": {
        "type": "text",
        "analyzer": "korean_no_synonyms",
        "search_analyzer": "korean_searcher"
      }
    }
}
indexing_dump(es, benefit_index_name, "search_data/ocb/dev/event.json", mapping=mapping_benefit)
indexing_dump(es, benefit_index_name, "search_data/ocb/dev/shopping.json", make_index_yn=False)

mapping_place = {
    "properties": {
      "biz_nm": {
        "type": "text",
        "fields": {
          "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"},
          "no_syn" : {"type" : "text", "analyzer" : "korean_no_synonyms", "search_analyzer": "korean_searcher"}
        },
        "analyzer": "korean_synonyms",
        "search_analyzer": "korean_searcher",
      },
      "latlon": {
        "type": "geo_point"
      }
    }
}
indexing_dump(es, place_index_name, "search_data/ocb/dev/place_1.json", mapping=mapping_place)
indexing_dump(es, place_index_name, "search_data/ocb/dev/place_2.json", make_index_yn=False)
indexing_dump(es, place_index_name, "search_data/ocb/dev/place_3.json", make_index_yn=False)
indexing_dump(es, place_index_name, "search_data/ocb/dev/place_4.json", make_index_yn=False)
indexing_dump(es, place_index_name, "search_data/ocb/dev/place_5.json", make_index_yn=False)
indexing_dump(es, place_index_name, "search_data/ocb/dev/place_6.json", make_index_yn=False)

# coupon, gitficon, shopping 검색 데이터는 coupon52 로 묶어서 indexing을 한다.
mapping_coupon = {
    "properties": {
      "biz_nm": {
        "type": "text",
        "fields": {
          "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"},
          "no_syn" : {"type" : "text", "analyzer" : "korean_no_synonyms", "search_analyzer": "korean_searcher"}
        },
        "analyzer": "korean_synonyms",
        "search_analyzer": "korean_searcher"
      },
      "text": {
        "type": "text",
        "analyzer": "korean_no_synonyms",
        "search_analyzer": "korean_searcher"
      }
    }
}
indexing_dump(es, coupon_index_name, "search_data/ocb/dev/tc_intg_coupon.json", mapping=mapping_coupon)
indexing_dump(es, coupon_index_name, "search_data/ocb/dev/gifticon.json", make_index_yn=False)


create index : ocb_keyword_alliance52_1115
{'acknowledged': True}
create index : ocb_keyword_benefit52_1115
{'acknowledged': True}
176 []
39 []
create index : ocb_keyword_place52_1115
{'acknowledged': True}
10000 []
10000 []
10000 []
10000 []
10000 []
1885 []
create index : ocb_keyword_coupon52_1115
{'acknowledged': True}
134 []
22540 []


In [6]:
switch_alais(es, alias_alliance_index_name, alliance_index_name)
switch_alais(es, alias_benefit_index_name, benefit_index_name)
switch_alais(es, alias_place_index_name, place_index_name)
switch_alais(es, alias_coupon_index_name, coupon_index_name)


{'acknowledged': True}
{'acknowledged': True}
{'acknowledged': True}
{'acknowledged': True}


In [17]:
# indexing dump - ohsara_ice

alias_ohsara_index_name = 'ocb_keyword_ohsara_ice52_v2'
ohsara_index_name = alias_ohsara_index_name + index_suffix

mapping_ohsara = {
    "properties": {
      "name": {
        "type": "text",
        "fields": {
            "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"}    
        },
        "analyzer": "korean_no_synonyms",
        "search_analyzer": "korean_searcher"
      },
      "tag": {
        "type": "text",
        "fields": {
            "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"}    
        },
        "analyzer": "korean_no_synonyms",
        "search_analyzer": "korean_searcher"
      },
      "maker": {
        "type": "text",
        "fields": {
            "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"}    
        },
        "analyzer": "korean_no_synonyms",
        "search_analyzer": "korean_searcher"
      },
      "brand": {
        "type": "text",
        "fields": {
            "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"}    
        },
        "analyzer": "korean_no_synonyms",
        "search_analyzer": "korean_searcher"
      }        
    }
}

indexing_dump_jsonline(es, ohsara_index_name, "search_data/ocb/dev/ohsara.json", mapping=mapping_ohsara)

delete index : ocb_keyword_ohsara_ice52_v2_20210518
create index : ocb_keyword_ohsara_ice52_v2_20210518
{'acknowledged': True}
1040 []
{'acknowledged': True}


In [None]:
switch_alais(es, alias_ohsara_index_name, ohsara_index_name)

In [77]:
# indexing dump - homeshopping

alias_homeshopping_index_name = 'ocb_keyword_homeshopping52_v2'
homeshopping_index_name = alias_homeshopping_index_name + index_suffix

mapping_homeshopping = {
    "properties": {
      "biz_nm": {
        "type": "text",
        "fields": {
            "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"}    
        },
        "analyzer": "korean_no_synonyms",
        "search_analyzer": "korean_searcher"
      },
      "text": {
        "type": "text",
        "fields": {
            "syn" : {"type" : "text", "analyzer" : "korean_synonyms", "search_analyzer": "korean_searcher"}    
        },
        "analyzer": "korean_no_synonyms",
        "search_analyzer": "korean_searcher"
      }
    }
}

indexing_dump_jsonline(es, homeshopping_index_name, "search_data/ocb/dev/homeShopping_1.json", mapping=mapping_homeshopping)
indexing_dump_jsonline(es, homeshopping_index_name, "search_data/ocb/dev/homeShopping_2.json", make_index_yn=False)
indexing_dump_jsonline(es, homeshopping_index_name, "search_data/ocb/dev/homeShopping_3.json", make_index_yn=False)
indexing_dump_jsonline(es, homeshopping_index_name, "search_data/ocb/dev/homeShopping_4.json", make_index_yn=False)
indexing_dump_jsonline(es, homeshopping_index_name, "search_data/ocb/dev/homeShopping_5.json", make_index_yn=False)

indexing start: 2021-06-23 16:34:48.149450
delete index : ocb_keyword_homeshopping52_v2_20210623
create index : ocb_keyword_homeshopping52_v2_20210623
{'acknowledged': True}
26314 []
26092 []
26267 []
26902 []
30868 []
30852 []
30859 []
30684 []
30662 []
30656 []
30661 []
30668 []
30669 []
30677 []
30675 []
30672 []
30669 []
30664 []
30677 []
30670 []
30651 []
30680 []
30654 []
30681 []
30659 []
30665 []
30676 []
30669 []
30663 []
30680 []
30667 []
30661 []
30669 []
4467 []
indexing end: 2021-06-23 16:40:19.239705
indexing start: 2021-06-23 16:40:19.247828
30668 []
30657 []
30665 []
30534 []
30507 []
30496 []
30488 []
30485 []
30485 []
30486 []
30492 []
30486 []
30496 []
30494 []
30490 []
30498 []
30489 []
30499 []
30488 []
30492 []
30495 []
30495 []
30491 []
30494 []
30497 []
30492 []
30507 []
30501 []
30488 []
30487 []
30496 []
30496 []
23667 []
indexing end: 2021-06-23 16:45:41.638852
indexing start: 2021-06-23 16:45:41.660510
30502 []
30492 []
30503 []
30485 []
30491 []
30485 []
30

In [78]:
switch_alais(es, alias_homeshopping_index_name, homeshopping_index_name)

{'acknowledged': True}


In [19]:
def query_index(index_name, query):
    print(index_name)
    #results = es.search(index=index_name, body={'from':0, 'size':10, 'query': query})
    results = es.search(index=index_name, body={'from':0, 'size':10, **query})
    for result in results['hits']['hits']:
        print(result['_source'])

In [None]:
# query
keyword = '기프트존'

coupon_query = {
  'query': {
    'bool': {
      'must': [
        {
          'match': {
            'biz_nm': keyword
          }
        }
      ]
    }
  }
}
query_index(coupon_index_name, coupon_query)


In [None]:
# query
keyword = '안경마트'

coupon_query = {
  'query': {
    'bool': {
      'should': [
        {
          'match': {
            'biz_nm': keyword
          }
        },
        {
          'match': {
            'text': keyword
          }
        }
      ]
    }
  }
}
query_index(coupon_index_name, coupon_query)


In [None]:
# query
keyword = '안경마트'

coupon_query = {
  'query': {
    'bool': {
      'must': [
        {
          'match': {
            'biz_nm': keyword
          }
        }
      ],
      'should': [
        {
          'match': {
            'biz_nm': keyword
          }
        }
      ]
    }
  }
}
query_index(coupon_index_name, coupon_query)


In [None]:
# query
keyword = '안경마트'

coupon_query = {
  'query': {
    'bool': {
      'must': [
        {
          'match': {
            'biz_nm': keyword
          }
        }
      ],
      'should': [
        {
          'match_phrase': {
            'text': keyword
          }
        }
      ]
    }
  }
}
query_index(coupon_index_name, coupon_query)


In [None]:
# query
keyword = '안경'

coupon_query = {
  'query': {
    'bool': {
      'should': [
        {
          'match': {
            'biz_nm': keyword
          }
        },
        {
          'match': {
            'text': keyword
          }
        }
      ]
    }
  }
}
query_index(coupon_index_name, coupon_query)

alliance_query = {
  'query': {
    'bool': {
      'should': [
        {
          'match': {
            'alliance_nm': keyword
          }
        },
        {
          'match': {
            'desc_s': keyword
          }
        }
      ]
    }
  }
}
query_index(alliance_index_name, alliance_query)

# event (benefit)
event_query = {
  'query': {
    'bool': {
      'should': [
        {
          'match': {
            'text': keyword
          }
        },
        {
          'match': {
            'keyword': keyword
          }
        }
      ]
    }
  }
}
query_index(event_index_name, event_query)

place_query = {
  'query': {
    'bool': {
      'should': [
        {
          'match': {
            'latlon': keyword
          }
        },
        {
          'match': {
            'tc_cate_nm': keyword
          }
        
      ]
    }
  }
}
query_index(place_index_name, place_query)


In [35]:
place_query = {
  'query': {
    'bool': {
      'should': [
        {
          'match': {
            'tc_cate_nm': '뚜레쥬르'
          }
        }
      ],
      "filter": {
        "geo_distance": {
            "distance": "12km",
            'latlon': '35.18979406215565,126.81872285499742'
        }
      }
    }
  }
}
query_index(place_index_name, place_query)


ocb_keyword_place52
{'is_category': '300201', 'max_save_rate': '0.3', 'code': 'STORE_11801639', 'rvw_cnt': None, 'channel': 'ocb5', 'gix_xy': '4565025.0,1266110.0', 'mid': '11801639', 'biz_nm': '스피드메이트 태양점', 'latlon': '35.17274028552403,126.80419688237764', 'biz_no': None, 'is_ocb': 'N', 'alliance_cd': 'O11004', 'point_disc_rate': '0', 'rvw_ave_pnt': '0', 'max_disc_rate': '0', 'ocb_category_main': '130', 'biz_addr': '광주 광산구 산정동 918-10', 'is_category_main': '300', 'cate_tong_cd': None, 'tc_cate_nm': '주유/자동차-렌터/판매/수리', 'logo_img': 'http://www.okcashbag.com/upload/alliance/MCHT_WEB_IMG2_sadmin_20130922211801_69030051.png', 'ocb_category': '130120', 'biz_tel_no': '062-962-4466', 'is_tong': 'N', 'category': 'STORE', 'cid': 'O'}
{'is_category': '200102', 'max_save_rate': '5', 'code': 'STORE_11801440', 'rvw_cnt': '4', 'channel': 'ocb5', 'gix_xy': '4568613.0,1265524.0', 'mid': '11801440', 'biz_nm': '미니스톱유동점', 'latlon': '35.15646613261263,126.9038545225341', 'biz_no': None, 'is_ocb': 'N', 'alli

In [22]:
# query - ohsara_ice
keyword = '바지'

ohsara_query = {
  "query": {
    "multi_match": {
      "query": keyword,
      "type": "cross_fields",
      "fields": [
        "name^2",
        "name.syn^1",
        "tag^2",
        "tag.syn^1"
      ],
      "operator": "and"
    }
  }
}
query_index(ohsara_index_name, ohsara_query)


ocb_keyword_ohsara_ice52_v2_20210518
{'img': 'https://dev-s3.ohsara.okcashbag.com/upload/item/G2000029347/20210514101203L.png', 'amount': 1015, 'code': 'ohsaraV2_G2000028465', 'salePrice': 16200, 'adult_flag': False, 'maker': None, 'originPrice': 16200, 'rate': 12.0, 'name': '*최저가* [맥심] 화이트골드 모카골드 커피믹스 180T [카누/아메리카노/원두커피/믹스커피/디카페인/라떼]', 'regdate': '2021-05-13T10:17:36', 'tag': None, 'category': '검수전', 'brand': '맥심'}
{'img': 'https://dev-s3.ohsara.okcashbag.com/upload/item/G2000029347/20210514101203L.png', 'amount': 364, 'code': 'ohsaraV2_G2000028467', 'salePrice': 31100, 'adult_flag': False, 'maker': None, 'originPrice': 31100, 'rate': 6.0, 'name': '맥심 모카골드 화이트골드 커피믹스 360T [심플라떼/카누/디카페인/믹스커피]', 'regdate': '2021-05-13T10:17:36', 'tag': None, 'category': '검수전', 'brand': '맥심'}
{'img': 'https://dev-s3.ohsara.okcashbag.com/upload/item/G2000029347/20210514101203L.png', 'amount': 168, 'code': 'ohsaraV2_G2000028466', 'salePrice': 30900, 'adult_flag': False, 'maker': None, 'originPrice': 30900

In [25]:
# query - homeshopping
keyword = '신세계'

homeshopping_query = {
  "query": {
    "multi_match": {
      "query": keyword,
      "type": "cross_fields",
      "fields": [
        "biz_nm^2",
        "biz_nm.syn^1",
        "text^2",
        "text.syn^1"
      ],
      "operator": "and"
    }
  }
}
query_index(homeshopping_index_name, homeshopping_query)


ocb_keyword_homeshopping_new52_v2_20210518
{'biz_nm': '신세계TV쇼핑', 'img': 'http://img.shinsegaetvshopping.com/goods/223/20004223_g_20210318154449.jpg', 'code': 'homeShopping_video_1041233', 'rate': '0', 'rank': 0, 'text': '[뉴트리] 질건강 유산균 리스펙타 지노마스터 9개월분(9병)(A)'}
