In [None]:
from pymongo import MongoClient
from pprint import pprint

In [None]:
atlas_URI = open("atlas_URI.txt").read().strip()

In [None]:
client = MongoClient(atlas_URI)
db = client.mflix
movies_initial_col = db.movies_initial

#### the two pipelines below do the same thing

In [None]:
pipeline = [
    {
        '$group': { # Aggregation pipeline stage
            '_id': "$language",
            'count': {'$sum': 1} # Aggregation pipeline operator
        }
    },
    {
        '$sort': {'count': -1}
    }
]

pipeline = [
    {
        '$sortByCount': "$language"
    }
]

#### `$facet` allows two pipelines to run concurrently

In [None]:
pipeline = [
    {
        '$sortByCount': "$language"
    },
    {
        '$facet': {
            'top language combinations': [{'$limit': 20}],
            'unusual combinations shared by': [{
                '$skip': 20
            },
            {
                '$bucketAuto': {
                    'groupBy': "$count",
                    'buckets': 5,
                    'output': {
                        'language combinations': {'$sum': 1}
                    }
                }
            }]
        }
    }
]

#### Aggregation framework's `$match` can also be replaced with `collection.find()`

In [None]:
pipeline = [
    {
        '$match': {'language': 'Korean, English'}
    }
]

list(movies_initial_col.find({'language': 'Korean, English'}))

#### `$project`

In [None]:
pipeline = [
    {
        '$limit': 100
    },
    {
        '$addFields': {
            'lastupdated': {
                '$arrayElemAt': [
                    {'$split': ["$lastupdated", "."]},
                    0
                ]}
        }
    },
    {
        '$project': {
            'title': 1,
            'year': 1,
            'directors': {'$split': ["$director", ", "]},
            'actors': {'$split': ["$cast", ", "]},
            'writers': {'$split': ["$writer", ", "]},
            'genres': {'$split': ["$genre", ", "]},
            'languages': {'$split': ["$language", ", "]},
            'countries': {'$split': ["$country", ", "]},
            'plot': 1,
            'fullPlot': "$fullplot",
            'rated': "$rating",
            'released': {
                '$cond': {
                    'if': {'$ne': ["$released", ""]},
                    'then': {
                        '$dateFromString': {
                            'dateString': "$released"
                        }
                    },
                    'else': ""
                }
            },
            'runtime': 1,
            'poster': 1,
            'imdb': {
                'id': "$imdbID",
                'rating': "$imdbRating",
                'votes': "$imdbVotes"
                },
            'metacritic': 1,
            'awards': 1,
            'type': 1,
            'lastUpdated': {
                '$cond': {
                    'if': {'$ne': ["$lastupdated", ""]},
                    'then': {
                        '$dateFromString': {
                            'dateString': "$lastupdated",
                            'timezone': "America/New_York"
                        }
                    },
                    'else': ""
                }
            }
        }
    },
    {
        '$out': "movies_scratch"
    }
]

In [None]:
pprint(list(client.mflix.movies_initial.aggregate(pipeline)))