In [6]:
from pymongo import MongoClient, ReadPreference
import pandas as pd
import time

# MongoDB setup
client = MongoClient('mongodb://localhost:27017')  
db = client['MongoDB']  
collection = 'NetflixUserbase'



In [7]:
# Load the dataset
data_path = "Netflix Userbase.csv"
df = pd.read_csv(data_path)

# Function to check and recreate the collection
def check_and_create_collection():
    try:
        # Check if the collection exists
        existing_collections = db.list_collection_names()
        print("Existing collections:", existing_collections)
        
        if collection in existing_collections:
            print("Collection already exists. Dropping the collection...")
            db[collection].drop()  # Drop the existing collection
            print("Collection dropped.")

        # Create a new collection (MongoDB creates it automatically when we insert data)
        print("Collection recreated successfully!")
    except Exception as e:
        print("Error occurred while recreating the collection:", e)

# Function to load data into MongoDB
def load_data_to_mongodb():
    collection = db['NetflixUserbase']  
    
    # Prepare data for insertion
    records = df.to_dict(orient='records')
    
    try:
        # Insert all records at once
        collection.insert_many(records)
        print("Data successfully loaded into MongoDB.")
    except Exception as e:
        print(f"Error inserting data: {e}")




In [8]:
# Function to test read consistency
def test_read_consistency(user_id):
    collection = db['NetflixUserbase']  
    
    # Strongly Consistent Read (default for MongoDB)
    start_time = time.time()
    strongly_consistent_read = collection.find_one({'UserID': str(user_id)})
    latency_strong = time.time() - start_time
    
    # Eventual Consistent Read (from secondary node, requires replica set)
    start_time = time.time()
    eventual_consistent_read = collection.with_options(read_preference=ReadPreference.SECONDARY_PREFERRED).find_one({'UserID': str(user_id)})
    latency_eventual = time.time() - start_time
    
    # Print results
    print("Strongly Consistent Read Result:", strongly_consistent_read)
    print("Strongly Consistent Read Latency:", latency_strong)
    print("Eventually Consistent Read Result:", eventual_consistent_read)
    print("Eventually Consistent Read Latency:", latency_eventual)



In [9]:
# Step 1: Check and recreate the collection
check_and_create_collection()

# Step 2: Load data into MongoDB
load_data_to_mongodb()

# Step 3: Test read consistency
test_read_consistency("3")  # Replace "3" with an appropriate UserID from your dataset

Existing collections: ['NetflixUserbase']
Collection already exists. Dropping the collection...
Collection dropped.
Collection recreated successfully!
Data successfully loaded into MongoDB.
Strongly Consistent Read Result: None
Strongly Consistent Read Latency: 0.01153874397277832
Eventually Consistent Read Result: None
Eventually Consistent Read Latency: 0.004715919494628906


In [None]:
from pymongo import MongoClient
import random
import time

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017')  
collection = db['NetflixUserbase']

# Generate and insert 100 records in batches
def insert_batch():
    records = []
    for i in range(100):  # Create 100 records
        new_record = {
            'UserID': i + 1,  
            'SubscriptionType': random.choice(['Basic', 'Standard', 'Premium']),
            'MonthlyRevenue': random.uniform(5.99, 14.99),
            'JoinDate': '2024-11-29',
            'LastPaymentDate': '2024-11-29',
            'Country': random.choice(['USA', 'Canada', 'UK', 'India']),
            'Age': random.randint(18, 60),
            'Gender': random.choice(['Male', 'Female', 'Other']),
            'Device': random.choice(['Phone', 'Tablet', 'PC']),
            'PlanDuration': random.randint(1, 12)
        }
        records.append(new_record)

    # Record the start time for insertion
    start_time = time.time()
    
    # Insert batch into MongoDB
    result = collection.insert_many(records)
    
    # Calculate the time taken
    insertion_time = time.time() - start_time
    
    # Print the number of records inserted and the time taken for insertion
    print(f"Successfully inserted {len(records)} records into MongoDB.")
    print(f"Time taken for bulk insertion: {insertion_time:.4f} seconds.")

# Insert 100 records
insert_batch()

# Query to retrieve records sorted by UserID (ascending order)
sorted_records = collection.find().sort('UserID', 1)  # 1 for ascending order

# Print the sorted records
for record in sorted_records:
    print(record)


Successfully inserted 100 records into MongoDB.
Time taken for bulk insertion: 0.0057 seconds.
{'_id': ObjectId('674e998ca3607d20ad85c9ca'), 'User ID': 1, 'Subscription Type': 'Basic', 'Monthly Revenue': 10, 'Join Date': '15-01-22', 'Last Payment Date': '10-06-23', 'Country': 'United States', 'Age': 28, 'Gender': 'Male', 'Device': 'Smartphone', 'Plan Duration': '1 Month'}
{'_id': ObjectId('674e998ca3607d20ad85c9cb'), 'User ID': 2, 'Subscription Type': 'Premium', 'Monthly Revenue': 15, 'Join Date': '05-09-21', 'Last Payment Date': '22-06-23', 'Country': 'Canada', 'Age': 35, 'Gender': 'Female', 'Device': 'Tablet', 'Plan Duration': '1 Month'}
{'_id': ObjectId('674e998ca3607d20ad85c9cc'), 'User ID': 3, 'Subscription Type': 'Standard', 'Monthly Revenue': 12, 'Join Date': '28-02-23', 'Last Payment Date': '27-06-23', 'Country': 'United Kingdom', 'Age': 42, 'Gender': 'Male', 'Device': 'Smart TV', 'Plan Duration': '1 Month'}
{'_id': ObjectId('674e998ca3607d20ad85c9cd'), 'User ID': 4, 'Subscript