### NOSQL Assignment

a. Write a Python program that connects to a MongoDB database and inserts a new document into a collection named "students". The document should include fields such as "name", "age", and "grade". Print a success message after the insertion.
   b. Implement a Python function that connects to a Cassandra database and inserts a new record into a table named "products". The record should contain fields like "id", "name", and "price". Handle any potential errors that may occur during the insertion.


In [None]:
# a. Write a Python program that connects to a MongoDB database and inserts a new document into a collection named "students". The document should include fields such as "name", "age", and "grade". Print a success message after the insertion.


import pymongo

def insert_student_record():
    # Replace 'your_mongodb_uri' with your MongoDB connection URI
    # For example: mongodb://username:password@your_mongodb_host:your_mongodb_port/your_database_name
    mongodb_uri = 'your_mongodb_uri'

    # Connect to MongoDB
    client = pymongo.MongoClient(mongodb_uri)

    # Replace 'your_database_name' with the name of your MongoDB database
    db = client['your_database_name']

    # Replace 'students' with the name of your collection
    collection = db['students']

    # Sample student document to be inserted
    student_data = {
        'name': 'John Doe',
        'age': 25,
        'grade': 'A'
    }

    # Insert the document into the collection
    result = collection.insert_one(student_data)

    # Print success message after insertion
    if result.acknowledged:
        print("Successfully inserted the student record.")
    else:
        print("Failed to insert the student record.")

    # Close the connection to MongoDB
    client.close()

if __name__ == "__main__":
    insert_student_record()


#    b. Implement a Python function that connects to a Cassandra database and inserts a new record into a table named "products". The record should contain fields like "id", "name", and "price". Handle any potential errors that may occur during the insertion.

from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement

def insert_product_record(product_data):
    try:
        # Replace 'your_cassandra_username', 'your_cassandra_password', 'your_cassandra_host', and 'your_cassandra_port'
        # with your Cassandra connection details
        auth_provider = PlainTextAuthProvider(username='your_cassandra_username', password='your_cassandra_password')
        cluster = Cluster(['your_cassandra_host'], port='your_cassandra_port', auth_provider=auth_provider)
        session = cluster.connect()

        # Replace 'your_keyspace_name' with the name of your Cassandra keyspace
        keyspace_name = 'your_keyspace_name'
        session.set_keyspace(keyspace_name)

        # Sample query to insert a new product record
        insert_query = """
            INSERT INTO products (id, name, price)
            VALUES (%s, %s, %s)
        """

        # Execute the query with the provided product data
        product_id, product_name, product_price = product_data
        prepared_statement = SimpleStatement(insert_query)
        session.execute(prepared_statement, (product_id, product_name, product_price))

        print("Successfully inserted the product record.")
    except Exception as e:
        print(f"Error occurred while inserting the product record: {e}")
    finally:
        # Close the Cassandra session and cluster
        session.shutdown()
        cluster.shutdown()

if __name__ == "__main__":
    # Sample product data to be inserted
    product_data = ('1001', 'Sample Product', 9.99)
    insert_product_record(product_data)


2. Document-oriented NoSQL Databases:
   a. Given a MongoDB collection named "books", write a Python function that fetches all the books published in the last year and prints their titles and authors.
   b. Design a schema for a document-oriented NoSQL database to store customer information for an e-commerce platform. Write a Python program to insert a new customer document into the database and handle any necessary validations.


In [None]:
#    a. Given a MongoDB collection named "books", write a Python function that fetches all the books published in the last year and prints their titles and authors.

import pymongo
from datetime import datetime, timedelta

def fetch_books_published_last_year():
    try:
        # Replace 'your_mongodb_uri' with your MongoDB connection URI
        # For example: mongodb://username:password@your_mongodb_host:your_mongodb_port/your_database_name
        mongodb_uri = 'your_mongodb_uri'

        # Connect to MongoDB
        client = pymongo.MongoClient(mongodb_uri)

        # Replace 'your_database_name' with the name of your MongoDB database
        db = client['your_database_name']

        # Replace 'books' with the name of your collection
        collection = db['books']

        # Get the current date
        current_date = datetime.utcnow()

        # Calculate the date one year ago from the current date
        one_year_ago = current_date - timedelta(days=365)

        # Define the query to find books published in the last year
        query = {
            'publication_date': {'$gte': one_year_ago, '$lte': current_date}
        }

        # Find the books that match the query
        books_cursor = collection.find(query)

        # Print the titles and authors of the books
        for book in books_cursor:
            title = book.get('title', 'N/A')
            authors = ', '.join(book.get('authors', []))
            print(f"Title: {title}, Authors: {authors}")

    except Exception as e:
        print(f"Error occurred while fetching books: {e}")
    finally:
        # Close the connection to MongoDB
        client.close()

if __name__ == "__main__":
    fetch_books_published_last_year()

    
# b. Design a schema for a document-oriented NoSQL database to store customer information for an e-commerce platform. Write a Python program to insert a new customer document into the database and handle any necessary validations.

{
    "_id": ObjectId,
    "customer_id": String,
    "name": String,
    "email": String,
    "phone": String,
    "address": {
        "street": String,
        "city": String,
        "state": String,
        "zip": String,
        "country": String
    },
    "orders": [
        {
            "order_id": String,
            "order_date": Date,
            "total_amount": Double,
            "status": String
        }
    ]
}


3. High Availability and Fault Tolerance:
   a. Explain the concept of replica sets in MongoDB. Write a Python program that connects to a MongoDB replica set and retrieves the status of the primary and secondary nodes.
   b. Describe how Cassandra ensures high availability and fault tolerance in a distributed database system. Write a Python program that connects to a Cassandra cluster and fetches the status of the nodes.


In [None]:
# a) a. Explain the concept of replica sets in MongoDB. Write a Python program that connects to a MongoDB replica set and retrieves the status of the primary and secondary nodes.

# Concept of Replica Sets in MongoDB:

# A replica set in MongoDB is a group of MongoDB instances that maintain the same data set to provide redundancy and high availability. The primary purpose of a replica set is to ensure data availability and fault tolerance. It consists of multiple MongoDB nodes, where one node acts as the primary node, and the others are secondary nodes.

# Primary Node:
# The primary node is the main node responsible for handling all write operations and responding to client requests. It is the only node that accepts write operations, making it the most up-to-date copy of the data. The primary node also replicates data to the secondary nodes.

# Secondary Nodes:
# Secondary nodes replicate data from the primary node and can serve read operations. They provide read scaling and data redundancy. If the primary node fails, one of the secondary nodes will be automatically elected as the new primary node to maintain the availability of the database.

# Election of Primary:
# Replica sets use an election mechanism to select a new primary node in case the current primary node becomes unavailable. Secondary nodes monitor the health of the primary node and, if it detects that the primary is unavailable or slow to respond, they initiate an election process to elect a new primary node.

# Advantages of Replica Sets:

# High Availability: If a primary node fails, one of the secondary nodes is automatically elected as the new primary, ensuring continuous operation of the database.
# Read Scalability: Multiple secondary nodes can handle read operations, distributing the read load across the replica set.
# Data Redundancy: Data is replicated across multiple nodes, providing data redundancy and minimizing the risk of data loss.
# Automatic Failover: Replica sets automatically handle failover, reducing the need for manual intervention during node failures.

import pymongo

def get_replica_set_status():
    try:
        # Replace 'your_mongodb_uri' with your MongoDB connection URI for the replica set
        mongodb_uri = 'your_mongodb_uri'

        # Connect to MongoDB
        client = pymongo.MongoClient(mongodb_uri)

        # Get the replica set status
        repl_status = client.admin.command('replSetGetStatus')

        # Print the status of the primary and secondary nodes
        for member in repl_status['members']:
            if member['stateStr'] == 'PRIMARY':
                print(f"Primary Node: {member['name']}")
            elif member['stateStr'] == 'SECONDARY':
                print(f"Secondary Node: {member['name']}")

    except Exception as e:
        print(f"Error occurred while retrieving replica set status: {e}")
    finally:
        # Close the MongoDB connection
        client.close()

if __name__ == "__main__":
    get_replica_set_status()

    
# b. Describe how Cassandra ensures high availability and fault tolerance in a distributed database system. Write a Python program that connects to a Cassandra cluster and fetches the status of the nodes.

# High Availability and Fault Tolerance in Cassandra:

# Cassandra is a distributed NoSQL database that provides high availability and fault tolerance by employing various strategies:

# 1. Distributed Architecture: Cassandra uses a decentralized peer-to-peer architecture where each node in the cluster is treated as equals. There is no single point of failure, and data is distributed across multiple nodes using consistent hashing.

# 2. Replication: Cassandra replicates data across multiple nodes to ensure fault tolerance. Each data item is replicated on multiple nodes, typically in different data centers, based on the configured replication factor.

# 3. Data Partitioning: Cassandra uses consistent hashing to partition data across nodes. Each node is responsible for a specific range of data, and new nodes can join or leave the cluster without affecting the availability of the database.

# 4. Gossip Protocol: Cassandra uses the gossip protocol for node discovery and cluster management. Nodes in the cluster periodically exchange information about the status of other nodes, ensuring that they are aware of the cluster's topology and can handle failures.

# 5. Read and Write Consistency Levels: Cassandra allows the specification of read and write consistency levels on a per-query basis. This allows fine-grained control over the trade-off between consistency and availability. For example, you can choose to read data from a single node for low-latency, or from multiple nodes for higher consistency.

# 6. Hinted Handoff: If a node is temporarily unavailable, Cassandra can store hints about pending writes to that node. Once the node becomes available again, it can receive the missed writes from other nodes.

# 7. Node Repair: Cassandra automatically repairs inconsistencies between replicas during the normal course of operation. This ensures that all replicas are eventually consistent.

# Python Program to Fetch Cassandra Nodes Status:

# To fetch the status of the nodes in a Cassandra cluster, you'll need to use the cassandra-driver library, which provides Python bindings for Cassandra. Make sure you have cassandra-driver installed before proceeding.

from cassandra.cluster import Cluster

def fetch_cassandra_nodes_status():
    try:
        # Replace 'your_cassandra_contact_points' with the IP addresses or hostnames of your Cassandra nodes
        contact_points = ['your_cassandra_contact_point1', 'your_cassandra_contact_point2']

        # Connect to the Cassandra cluster
        cluster = Cluster(contact_points=contact_points)
        session = cluster.connect()

        # Fetch the status of the nodes
        rows = session.execute("SELECT * FROM system.local")

        # Print the status of each node
        for row in rows:
            print(f"Node: {row['broadcast_address']} | Status: {row['status']}")

    except Exception as e:
        print(f"Error occurred while fetching Cassandra nodes status: {e}")
    finally:
        # Close the Cassandra session and cluster
        session.shutdown()
        cluster.shutdown()

if __name__ == "__main__":
    fetch_cassandra_nodes_status()

4. Sharding in MongoDB:
   a. Explain the concept of sharding in MongoDB and how it improves performance and scalability. Write a Python program that sets up sharding for a MongoDB cluster and inserts multiple documents into a sharded collection.
   b. Design a sharding strategy for a social media application where user data needs to be distributed across multiple shards. Write a Python program to demonstrate how data is distributed and retrieved from the sharded cluster.


In [None]:
#    a. Explain the concept of sharding in MongoDB and how it improves performance and scalability. Write a Python program that sets up sharding for a MongoDB cluster and inserts multiple documents into a sharded collection.

# Concept of Sharding in MongoDB:

# Sharding in MongoDB is a technique used to horizontally partition data across multiple servers or shards. It allows MongoDB to distribute data across different machines in a cluster, providing better performance and scalability for large datasets and high-volume workloads.

# In a sharded cluster, data is divided into chunks, and each chunk is stored on a separate shard. The shard key is a field or set of fields that determine how data is partitioned among the shards. MongoDB uses the shard key to route queries to the appropriate shard, ensuring that data retrieval is efficient.

# Advantages of Sharding:

# Scalability: Sharding enables horizontal scaling, allowing you to add more shards to the cluster as the data and workload increase. This improves the cluster's capacity to handle large amounts of data and concurrent operations.

# Load Distribution: By distributing data across multiple shards, the load is evenly distributed, preventing hotspots and bottlenecks on individual servers.

# Performance: Sharding improves read and write performance by distributing data and query processing across multiple shards. This allows for parallel processing and reduced query response times.

# Data Isolation: Sharding allows you to isolate specific datasets to dedicated shards, making it easier to manage and optimize data storage and performance.

import pymongo

def setup_sharded_collection():
    try:
        # Replace 'your_mongodb_uri' with your MongoDB connection URI
        mongodb_uri = 'your_mongodb_uri'

        # Connect to the MongoDB cluster
        client = pymongo.MongoClient(mongodb_uri)

        # Replace 'your_database_name' with the name of your database
        db = client['your_database_name']

        # Create a sharded collection named 'your_sharded_collection'
        db.command('enableSharding', 'your_database_name')
        db.command('shardCollection', 'your_database_name.your_sharded_collection', key={'shard_key_field': 'hashed'})

        # Insert multiple documents into the sharded collection
        your_sharded_collection = db['your_sharded_collection']
        documents = [
            {'shard_key_field': 'value1', 'field1': 'data1'},
            {'shard_key_field': 'value2', 'field1': 'data2'},
            # Add more documents here
        ]
        your_sharded_collection.insert_many(documents)

        print("Documents inserted into the sharded collection.")

    except Exception as e:
        print(f"Error occurred while setting up sharding and inserting documents: {e}")
    finally:
        # Close the MongoDB connection
        client.close()

if __name__ == "__main__":
    setup_sharded_collection()

    
#    b. Design a sharding strategy for a social media application where user data needs to be distributed across multiple shards. Write a Python program to demonstrate how data is distributed and retrieved from the sharded cluster.


# Sharding Strategy for Social Media Application:

# For a social media application, a suitable sharding strategy involves distributing user data based on a shard key that evenly distributes the workload and balances the read and write operations. In this scenario, the shard key should be carefully chosen to avoid hotspots and ensure a relatively uniform distribution of data across the shards.

# A possible shard key for the social media application could be the "user_id" field, as it uniquely identifies each user. Distributing data based on user IDs ensures that user-related data is distributed across shards and avoids having all data for a specific user stored on a single shard.

import pymongo

def distribute_user_data():
    try:
        # Replace 'your_mongodb_uri' with your MongoDB connection URI
        mongodb_uri = 'your_mongodb_uri'

        # Connect to the MongoDB cluster
        client = pymongo.MongoClient(mongodb_uri)

        # Replace 'your_database_name' with the name of your database
        db = client['your_database_name']

        # Create a sharded collection named 'users' with 'user_id' as the shard key
        db.command('enableSharding', 'your_database_name')
        db.command('shardCollection', 'your_database_name.users', key={'user_id': 1})

        # Sample user data to be inserted
        users = [
            {'user_id': 1001, 'name': 'John Doe', 'age': 30, 'city': 'New York'},
            {'user_id': 1002, 'name': 'Jane Smith', 'age': 25, 'city': 'Los Angeles'},
            # Add more user data here
        ]

        # Insert user data into the 'users' collection
        db.users.insert_many(users)

        print("User data distributed across shards.")

    except Exception as e:
        print(f"Error occurred while distributing user data: {e}")
    finally:
        # Close the MongoDB connection
        client.close()

def retrieve_user_data(user_id):
    try:
        # Replace 'your_mongodb_uri' with your MongoDB connection URI
        mongodb_uri = 'your_mongodb_uri'

        # Connect to the MongoDB cluster
        client = pymongo.MongoClient(mongodb_uri)

        # Replace 'your_database_name' with the name of your database
        db = client['your_database_name']

        # Retrieve user data based on the user_id from the 'users' collection
        user_data = db.users.find_one({'user_id': user_id})

        if user_data:
            print(f"User ID: {user_data['user_id']}, Name: {user_data['name']}, Age: {user_data['age']}, City: {user_data['city']}")
        else:
            print(f"User with ID {user_id} not found.")

    except Exception as e:
        print(f"Error occurred while retrieving user data: {e}")
    finally:
        # Close the MongoDB connection
        client.close()

if __name__ == "__main__":
    distribute_user_data()

    # Retrieve user data for a specific user_id
    user_id_to_retrieve = 1001
    retrieve_user_data(user_id_to_retrieve)



5. Indexing in MongoDB:
   a. Describe the concept of indexing in MongoDB and its importance in query optimization. Write a Python program that creates an index on a specific field in a MongoDB collection and executes a query using that index.
   b. Given a MongoDB collection named "products", write a Python function that searches for products with a specific keyword in the name or description. Optimize the query by adding appropriate indexes.


In [None]:
# a. Describe the concept of indexing in MongoDB and its importance in query optimization. Write a Python program that creates an index on a specific field in a MongoDB collection and executes a query using that index.

# Concept of Indexing in MongoDB:

# Indexing in MongoDB is a data structure that optimizes the retrieval of data from a collection. It works similarly to indexes in traditional databases and allows MongoDB to efficiently locate and retrieve documents based on specified fields. By creating indexes on fields frequently used in queries, MongoDB can significantly speed up query execution, especially for large collections.

# Importance of Indexing in Query Optimization:

# Improved Query Performance: Indexing reduces the number of documents that need to be scanned to fulfill a query. Without an index, MongoDB may have to perform a collection scan to find matching documents, which can be slow for large collections. With an index, MongoDB can perform an index scan, which is much faster.

# Reduced Resource Consumption: Indexing can reduce the amount of CPU and memory resources required for query execution, as it narrows down the documents to be examined.

# Support for Sorting and Aggregation: Indexes allow MongoDB to efficiently sort and aggregate data. Without an index, sorting and aggregating operations may require scanning the entire collection.

# Enforcement of Uniqueness and Constraints: Indexes can enforce uniqueness and constraints on specific fields, ensuring data integrity and preventing duplicates.

import pymongo

def create_index_and_query():
    try:
        # Replace 'your_mongodb_uri' with your MongoDB connection URI
        mongodb_uri = 'your_mongodb_uri'

        # Connect to MongoDB
        client = pymongo.MongoClient(mongodb_uri)

        # Replace 'your_database_name' with the name of your database
        db = client['your_database_name']

        # Replace 'your_collection_name' with the name of your collection
        collection = db['your_collection_name']

        # Create an index on the 'field_to_index' field
        # In this example, we're creating an ascending index on the 'age' field
        collection.create_index([('field_to_index', pymongo.ASCENDING)])

        # Execute a query using the index
        # In this example, we're querying for documents where 'age' is greater than 25
        query = {'field_to_index': {'$gt': 25}}
        result = collection.find(query)

        # Print the results
        for doc in result:
            print(doc)

    except Exception as e:
        print(f"Error occurred: {e}")
    finally:
        # Close the MongoDB connection
        client.close()

if __name__ == "__main__":
    create_index_and_query()
    
#    b. Given a MongoDB collection named "products", write a Python function that searches for products with a specific keyword in the name or description. Optimize the query by adding appropriate indexes.

import pymongo

def search_products_by_keyword(keyword):
    try:
        # Replace 'your_mongodb_uri' with your MongoDB connection URI
        mongodb_uri = 'your_mongodb_uri'

        # Connect to MongoDB
        client = pymongo.MongoClient(mongodb_uri)

        # Replace 'your_database_name' with the name of your database
        db = client['your_database_name']

        # Replace 'products' with the name of your collection
        collection = db['products']

        # Create a compound index on the 'name' and 'description' fields
        collection.create_index([('name', pymongo.TEXT), ('description', pymongo.TEXT)])

        # Perform the search query
        # In this example, we are using the $text operator to perform a full-text search
        # on the 'name' and 'description' fields for the given 'keyword'
        query = {'$text': {'$search': keyword}}
        result = collection.find(query)

        # Print the results
        for product in result:
            print(f"Product Name: {product['name']}, Description: {product['description']}")

    except Exception as e:
        print(f"Error occurred: {e}")
    finally:
        # Close the MongoDB connection
        client.close()

if __name__ == "__main__":
    # Replace 'your_search_keyword' with the keyword you want to search for
    search_keyword = 'your_search_keyword'
    search_products_by_keyword(search_keyword)