In [3]:
import os
import json
from pymongo import MongoClient
from pymongo.server_api import ServerApi

In [4]:
# MongoDB connection string
uri = "mongodb+srv://mehakrafiq:QF9iRblbqnoyYhle@cluster-datafiniti.voasry5.mongodb.net/"


In [5]:
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [6]:
# Databases and collections
db_clean_data = client["clean_data"]
collection_id_jsons = db_clean_data["id_jsons"]

db_in_stock = client["inStock"]
db_out_stock = client["outStock"]

collections_to_search = [
    "cleaned_reviews",
    "extracted_features",
    "imageurls_jsons",
    "latest_pricing_per_merchant_2024",
    "latest_pricing-per_merchant",
    "pricing_history",
    "updated_descriptions"
]

In [11]:
# Build the query based on the input type (EAN, UPC, ASINS, manufacturerNumber, or SKU)
def fetch_primary_ids(user_input):
    """
    Fetch ean, upc, or asins from id_jsons based on any given input like manufacturerNumber, SKU, keys, etc.
    """
    # Construct the query to search across different possible fields
    query = {
        "$or": [
            {"ean": user_input},                    # Search directly by EAN if provided
            {"upc": user_input},                    # Search directly by UPC if provided
            {"gtins": user_input},                  # Search by GTINs
            {"manufacturerNumber": user_input},     # Search by manufacturerNumber
            {"keys": user_input},                   # Search by keys
            {"skus.value": user_input},             # Search by any value in skus list
            {"id": user_input}                      # Search by ASINS if user input matches
        ]
    }

    # Query the id_jsons collection
    result = collection_id_jsons.find_one(query)

    if result:
        # Extract ean, upc, and asins from the result
        primary_ids = {
            "ean": result.get("ean", []),   # Extract as a list
            "upc": result.get("upc", []),   # Extract as a list
            "asins": result.get("asins", None) # Extract the ASIN (assuming the 'id' field is equivalent to ASIN)
        }

        # Return only the fields we need (ean, upc, asins) while removing any empty values
        return {
            "ean": primary_ids["ean"] if primary_ids["ean"] else [],
            "upc": primary_ids["upc"] if primary_ids["upc"] else [],
            "asins": [primary_ids["asins"]] if primary_ids["asins"] else []
        }
    else:
        print("No matching document found in id_jsons")
        return None


In [14]:
# Example usage
user_input = "S2JTW-N740"  # Could be manufacturerNumber, SKU, keys, etc.
primary_ids = fetch_primary_ids(user_input)
print(primary_ids)

{'ean': ['0810015589397'], 'upc': ['810015589397'], 'asins': ['B08G1X87ZY']}


In [15]:
# Test Ids to search collections
def search_in_collections(primary_ids):
    """
    Search in inStock collections first using ean, upc, or asins.
    If not found, search in outStock collections.
    """
    # Collect all possible values to search (ean, upc, asins)
    search_values = set()
    search_values.update(primary_ids.get("ean", []))
    search_values.update(primary_ids.get("upc", []))
    search_values.update(primary_ids.get("asins", []))

    search_values = list(search_values)
    if not search_values:
        return {}

    # Construct the query to search for these values
    query = {"$or": [{"ean": {"$in": search_values}},
                     {"upc": {"$in": search_values}},
                     {"asins": {"$in": search_values}}]}

    # Search in 'inStock' collections first
    for collection_name in collections_to_search:
        in_stock_collection = db_in_stock[collection_name]
        in_stock_result = list(in_stock_collection.find(query))
        
        if in_stock_result:
            print("Found in inStock:")
            return in_stock_result

    # If not found in 'inStock', search in 'outStock'
    for collection_name in collections_to_search:
        out_stock_collection = db_out_stock[collection_name]
        out_stock_result = list(out_stock_collection.find(query))
        
        if out_stock_result:
            print("Found in outStock:")
            return out_stock_result

    return {}


In [16]:
# Example usage
user_input = "ADIB000IKWBC2"  # This could be manufacturerNumber, SKU, etc.
primary_ids = fetch_primary_ids(user_input)

if primary_ids:
    search_results = search_in_collections(primary_ids)
    if search_results:
        for result in search_results:
            print(result)
    else:
        print("No matching documents found in either inStock or outStock.")
else:
    print("Unable to fetch primary IDs.")



Found in outStock:
{'_id': ObjectId('66e8720ee136ce826f531aee'), 'id': 'AWUaQ3oGuC1rwyj_uBUq', 'ean': ['0887468966457'], 'ean13': '0887468966457', 'gtins': ['887468966457', '0887468966457', '689719608754'], 'upc': ['689719608754'], 'upca': '689719608754', 'asins': 'B000IKWBC2', 'reviews': [{'merchant': 'amazon.com', 'date': '2020-05-12T00:00:00.000Z', 'dateSeen': '2020-06-01T23:25:33.215Z', 'rating': 1.0, 'text': 'they sound excellent but they fell apart within days we purchased these and the hpx to compare the difference and whether or not its worth it to spend the extra money the s have held up great the didnt its a shame because the sound good and for our use of cueing and playback they would have been fine we were a few days behind the return window so ill try to fix them if they fixed the quality control these would be star for sure', 'sentiment': 'positive', 'title': 'sound great but they fell apart within days', 'doRecommend': 'null', 'username': 'Carmelsan', 'didPurchase': 'tru