In [38]:
from elasticsearch_dsl import connections

# Create the default connection to Elasticsearch
client = connections.create_connection(hosts=["elasticsearch"], timeout=20)

In [79]:
from elasticsearch_dsl import Date, Document, Integer, Keyword, Text

INDEX_NAME = "movies"

class Movie(Document):
    title = Text()
    overview = Text()
    genre = Keyword()
    release_date = Date()
    revenue = Integer()
    production_companies = Keyword()

    class Index:
        name = INDEX_NAME
    
    def get_display_name(self):
        year = self.release_date.strftime('%Y')
        return '{title} ({year})'.format(title=self.title, year=year)
    
    def __repr__(self):
        return '<Movie: {}>'.format(self.get_display_name())

In [80]:
# Create the mapping
Movie.init()

# Confirm the mapping exists
client.indices.get_mapping(INDEX_NAME)

{'movies': {'mappings': {'doc': {'properties': {'genre': {'type': 'keyword'},
     'overview': {'type': 'text'},
     'production_companies': {'type': 'keyword'},
     'release_date': {'type': 'date'},
     'revenue': {'type': 'integer'},
     'title': {'type': 'text'}}}}}}

In [93]:
# Create an example movie
movie = Movie(
    meta={'id': 1},
    title="Example Movie",
    overview="This movie is about cats and dogs",
    genre=["Comedy", "Comedy"],
    release_date="2018-11-02",
    revenue=1000000,
    production_companies="Pixar"
)
movie.save()

False

In [133]:
from elasticsearch_dsl import Search

# Search for all movies (10 at a time) and delete them one by one
movies = Movie.search()
for movie in movies:
    print(movie)
    movie.delete()


In [134]:
import csv
import ast

MAX_ROWS = 10

with open("input/movies.csv") as csvfile:
    reader = csv.DictReader(csvfile)
    for idx, row in enumerate(reader):
        if idx >= MAX_ROWS:
            break

        movie = Movie(
            id = row["imdb_id"],
            title = row["title"],
            overview = row["overview"],
            genre = [g["name"] for g in ast.literal_eval(row["genres"])],
            release_date = row["release_date"],
            revenue = row["revenue"],
            production_companies = [p["name"] for p in ast.literal_eval(row["production_companies"])],      
        )
        movie.save()    