# Movies

Import the data first:

```shell
mongoimport --type csv -d movies_db -c movies --headerline movies.csv
```

In [21]:
from pymongo import MongoClient

In [22]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [23]:
# assign the database to a variable name
db = mongo['movies_db']

In [24]:
# review the collections in our new database
print(db.list_collection_names())

['movies']


In [25]:
# review a document in the customer_list collection
print(db.movies.find_one())

{'_id': ObjectId('6670c73360f5cffce769f013'), 'imdbID': 'tt9114286', 'Title': 'Black Panther: Wakanda Forever', 'Year': 2022, 'Runtime': '161 min', 'Genre': 'Action, Adventure, Drama', 'Awards': 'Won 1 Oscar. 50 wins & 172 nominations total', 'BoxOffice': '$453,829,060 ', 'Ratings': "[{'Source': 'Internet Movie Database', 'Value': '6.7/10'}, {'Source': 'Rotten Tomatoes', 'Value': '83%'}, {'Source': 'Metacritic', 'Value': '67/100'}]", 'Language': 'English, Xhosa, Maya, Spanish, Haitian, French', 'Director': 'Ryan Coogler', 'Actors': "Letitia Wright, Lupita Nyong'o, Danai Gurira"}


In [39]:
# assign the collection to a variable
movies_list = db['movies']

In [40]:
movies_list_ratings = db['movies_ratings']

In [41]:
# Check the number of documents in the source collection
document_count = movies_list.count_documents({})
print(f'Total documents in source collection: {document_count}')

Total documents in source collection: 995


In [42]:
# Proceed only if there are movies in the source collection
if document_count > 0:
    # Fetch all documents from the source collection
    documents = list(movies_list.find())

    # Remove the '_id' field from each movie to avoid duplicate key error
    for document in documents:
        if '_id' in document:
            del document['_id']
    
    # Insert movies into the destination collection
    if documents:
        movies_list_ratings.insert_many(documents)
        print(f'Duplicated {len(documents)} documents from {movies_list.name} to {movies_list_ratings.name}.')
    else:
        print('No documents found to insert.')
else:
    print('Source collection is empty. No documents to duplicate.')

Duplicated 995 documents from movies to movies_ratings.


In [47]:
# Remove ratings data from movies_list

In [48]:
# Remove the 'ratings' field
movies_list.update_many({}, {"$unset": {"Ratings": ""}})

print("Removed the 'ratings' field from all documents in the 'movies' collection.")

Removed the 'ratings' field from all documents in the 'movies' collection.


In [50]:
# Remove the 'Title, Year, Runtime, Genre, Awards, Box Office, Language, Director, Actors' field
movies_list_ratings.update_many({}, {"$unset": {"Title": "", "Year": "", "Runtime": "", "Genre": "", "Awards": "", "BoxOffice": "", "Language": "",
                                               "Director": "", "Actors": "",}})

print("Removed the 'Title, Year, Runtime, Genre, Awards, Box Office, Language, Director, Actors' field from all documents in the 'movies_ratings' collection.")

Removed the 'Title, Year, Runtime, Genre, Awards, Box Office, Language, Director, Actors' field from all documents in the 'movies_ratings' collection.


In [None]:
# Split Ratings column per each source, and create a column for each source

In [52]:
import ast 

In [53]:
# Iterate over each movie in movies_list_ratings
for movie in movies_list_ratings.find():
    # Get the Ratings from the document and parse it as a list
    ratings = ast.literal_eval(movie.get('Ratings', '[]'))

    # Create new columns for each source in the document
    for rating in ratings:
        source = rating['Source']
        value = rating['Value']
        movies_list_ratings.update_one(
            {'_id': movie['_id']},
            {'$set': {source: value}}
        )

In [56]:
# Remove 'Source' field 
movies_list_ratings.update_many({}, {"$unset": {"Ratings": ""}})

UpdateResult({'n': 995, 'nModified': 995, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)