In [None]:
pip install elasticsearch

In [4]:
import pandas as pd

from elasticsearch import Elasticsearch



In [43]:
df = pd.read_csv('combined.csv')

In [42]:
mapping = {
    "settings": {
        "index": {
            "number_of_shards": 1,
            "number_of_replicas": 1
        },
        "analysis": {
            "analyzer": {
                "tamil_ngram_analyzer": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "ngram_filter"
                    ]
                },
                "tamil_ngram_analyzer_1": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "ngram_filter_1"
                    ]
                },
                "tamil_ngram_analyzer_2": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "ngram_filter_2"
                    ]
                },

                "tamil_analyzer_sw": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "custom_stopword"
                    ]
                },
                "tamil_analyzer_st": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "custom_stemmer"
                    ]
                },
                "tamil_analyzer_st_sw": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "custom_stemmer",
                        "custom_stopword"
                    ]
                },
                "tamil_analyzer_sw_st": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "custom_stopword",
                        "custom_stemmer"
                    ]
                },
                "tamil_analyzer_syn": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "custom_synonym"
                    ]
                },
                "tamil_analyzer_syn_sw": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "custom_synonym",
                        "custom_stopword"
                    ]
                },
                "tamil_analyzer_syn_st": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "custom_synonym",
                        "custom_stemmer"
                    ]
                },
                "tamil_analyzer_syn_sw_st": {
                    "type": "custom",
                    "tokenizer": "classic",
                    "filter": [
                        "custom_synonym",
                        "custom_stopword",
                        "custom_stemmer"
                    ]
                }
            },
            "filter": {
                "ngram_filter": {
                    "type": "edge_ngram",
                    "min_gram": 2,
                    "max_gram": 20,
                    "side": "front"
                },
                "ngram_filter_1": {
                    "type": "edge_ngram",
                    "min_gram": 1,
                    "max_gram": 20,
                    "side": "front"
                },
                "ngram_filter_2": {
                    "type": "edge_ngram",
                    "min_gram": 4,
                    "max_gram": 20,
                    "side": "front"
                },
                "custom_stopword": {
                    "type": "stop",
                    "stopwords_path": "analyze/stopwords.txt"
                },
                "custom_stemmer": {
                    "type": "stemmer_override",
                    "rules_path": "analyze/stem.txt"
                },
                "custom_synonym": {
                    "type": "synonym",
                    "synonyms_path": "analyze/synonyms.txt"
                }

            }}},
    "mappings": {
        "properties": {
            "Album": {
                "type": "text",
                "analyzer": "tamil_analyzer_syn_sw_st",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "Composer": {
                "type": "text",
                "analyzer": "tamil_analyzer_syn_sw_st",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "Interpretation": {
                "type": "text",
                "analyzer": "tamil_analyzer_syn_sw_st",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "Lyricist": {
                "type": "text",
                "analyzer": "tamil_analyzer_syn_sw_st",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "Metaphor": {
                "type": "text",
                "analyzer": "tamil_analyzer_syn_sw_st",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "Song Name": {
                "type": "text",
                "analyzer": "tamil_analyzer_syn_sw_st",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "Source Domain": {
                "type": "text",
                "analyzer": "tamil_analyzer_syn_sw_st",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "Star": {
                "type": "text",
                "analyzer": "tamil_analyzer_syn_sw_st",
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "Target Domain": {
                "type": "text",
                "analyzer": "tamil_analyzer_syn_sw_st",
                "fielddata": True,
                "fields": {
                    "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "Year": {
                "type": "float"
            }
        }
    }
    
}


In [45]:
from elasticsearch import Elasticsearch

es = Elasticsearch("https://localhost:9200", verify_certs=False,
                   http_auth=['elastic', 'juFxrPmD*c1jp-_NYRUx'])

# convert pandas dataframe to json

# then do bulk upload with mapping


def upload_to_elastic(df):
    import json
    res1 = es.indices.create(index='songs', body=mapping)

    from elasticsearch import helpers

    # load combined.csv
    df_json = df.to_json(orient='records')

    # convert to json
    df_json = json.loads(df_json)

    res = helpers.bulk(es, df_json, index='songs')

    print(res)


upload_to_elastic(df)


  after removing the cwd from sys.path.
  del sys.path[0]


(376, [])


