In [1]:
import pandas

In [2]:
df = pandas.read_csv('transcripts.csv')

In [4]:
ted = pandas.read_csv('ted_main.csv')

In [5]:
ted.dtypes

comments               int64
description           object
duration               int64
event                 object
film_date              int64
languages              int64
main_speaker          object
name                  object
num_speaker            int64
published_date         int64
ratings               object
related_talks         object
speaker_occupation    object
tags                  object
title                 object
url                   object
views                  int64
dtype: object

## 1. Create json file with index

In [11]:
import json

In [14]:
meta_field_list = []
for i in range(len(ted)):
    # create index for each entry
    meta_field = { "index" : { "_index" : "ted", "_type" : "_doc", "_id" : i } }
    meta_field_list.append(meta_field)

In [15]:
"""create json file in specific format for bulk operation"""
## action_and_meta_data\n
## optional_source\n

with open('ted.json', 'w') as outfile:
    for (idx, line) in zip(meta_field_list, ted.to_dict('record')):
        json.dump(idx, outfile)
        outfile.write('\n')
        json.dump(line, outfile)
        outfile.write('\n')

## 2. Import index to ES system

```
curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/ted/_bulk --data-binary @ted.json
```

### create mappings 

```
{
  "mapping": {
    
    "properties": {
      "comments": {
        "type": "long"
      },
      "description": {
        "type": "text"
      },
      "duration": {
        "type": "long"
      },
      "event": {
        "type": "keyword"
      },
      "film_date": {
        "type": "date",
        "format": "epoch_second"
      },
      "languages": {
        "type": "long"
      },
      "main_speaker": {
        "type": "text"
      },
      "name": {
        "type": "text"
      },
      "num_speaker": {
        "type": "long"
      },
      "published_date": {
        "type": "date",
        "format": "epoch_second"
      },
      "ratings": {
        "type": "text"
      },
      "related_talks": {
        "type": "text"
      },
      "speaker_occupation": {
        "type": "text"
      },
      "tags": {
        "type": "text"
      },
      "title": {
        "type": "text"
      },
      "url": {
        "type": "text"
      },
      "views": {
        "type": "long"
      }
    }
  }
}
```

## 3. Perform search queries

__Event Goups__

```
GET ted/_doc/_search?size=0
{
    "aggs" : {
        "events" : {
            "terms" : { "field" : "event" } 
        }
    }
}
```

__Response:__
```
{ ...
"hits" : {
    "total" : {
      "value" : 2544,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "events" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 1805,
      "buckets" : [
        {
          "key" : "TED2014",
          "doc_count" : 84
        },
        {
          "key" : "TED2009",
          "doc_count" : 83
        },
        {
          "key" : "TED2013",
          "doc_count" : 77
        },
        {
          "key" : "TED2016",
          "doc_count" : 77
        },
        {
          "key" : "TED2015",
          "doc_count" : 75
        },
        {
          "key" : "TED2011",
          "doc_count" : 70
        },
        {
          "key" : "TEDGlobal 2012",
          "doc_count" : 70
        },
        {
          "key" : "TED2007",
          "doc_count" : 68
        },
        {
          "key" : "TED2010",
          "doc_count" : 68
        },
        {
          "key" : "TED2017",
          "doc_count" : 67
        }
      ]
    }
   
    ```

___________
__Query Examples:__

```
GET ted/_doc/_search
{
  "query":{
    "match":{
      "speaker_occupation":"economist"
    }
  }
}

GET ted/_doc/_search
{
  "query":{
    "match":{
      "tags":"business"
    }
  }
}

GET ted/_doc/_search
{
  "query": {
    "more_like_this": {
      "fields": ["description"],
      "like": "Sir Ken Robinson makes an entertaining and profoundly moving case for creating an education system that nurtures (rather than undermines) creativity.",
      "min_term_freq": 1,
      "max_query_terms": 50,
      "min_doc_freq": 4
    }
  }
}

GET ted/_doc/_search
{
  "query":{
    "match":{
      "main_speaker":"Ken Robinson"
    }
  }
}
```