# How to export/import an opensearch index

## Either use directly opensearchpy

In [None]:
import json
from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk


client = OpenSearch(hosts=[{"host": "localhost", "port": 9200}], http_compress=True)

query = {"size": 10000, "query": {"match_all": {}}}
search_result = client.search(index="ind_name", body=query)

with open("ind_name.json", "w") as file:
    json.dump(search_result, file)

client.indices.delete(index="ind_name")

with open("ind_name.json", "r") as file:
    data = json.load(file)

# Prepare bulk indexing data
actions = [
    {
        "_source": doc["_source"],
    }
    for doc in data["hits"]["hits"]
]

bulk(client, actions, index="ind_name")

## Or use awswrangler

In [None]:
# Export and import an index in OpenSearch using AWS Wrangler (python 3.12.3)
# uv add awswrangler[opensearch] ipykernel awscli
# "aws configure" for awswrangler to work
import awswrangler as wr

oc = wr.opensearch.connect(host="localhost", port=9200)
results = wr.opensearch.search(
    oc,
    index="ind_name",
    size=10000,
)
results.to_csv("ind_name.csv", index=False)
wr.opensearch.delete_index(
    oc,
    index="ind_name",
)
wr.opensearch.index_csv(
    oc,
    path="ind_name.csv",
    index="ind_name",
)

## Or opensearch-py-ml

In [None]:
import opensearch_py_ml as oml
import pandas as pd
from opensearchpy import OpenSearch

client = OpenSearch(hosts=[{"host": "localhost", "port": 9200}])
odf = oml.DataFrame(client, "ind_name")
odf.to_pandas().to_parquet("ind_name.parquet")
pdf = pd.read_parquet("ind_name.parquet")
oml.etl.pandas_to_opensearch(
    odf.to_pandas(),
    client,
    "ind_named",
    os_dropna=True,
    os_verify_mapping_compatibility=False,
    os_if_exists="append",
)