In [16]:
import os

if "notebooks" in os.getcwd():
    os.chdir("..")

In [17]:
import os
import hashlib
from datetime import datetime
import pandas as pd
import logging
from azure.cosmos import CosmosClient
from azure.cosmos.exceptions import CosmosHttpResponseError


class AzureCosmos:
    def __init__(self) -> None:
        logging.getLogger('azure').setLevel(logging.CRITICAL)

        self.COSMOS_HOST = "https://cosmos-scraper.documents.azure.com:443/"
        self.COSMOS_MASTER_KEY = "bBgVEeSnEQaSss88e8zZU5pjpiVzPjba5qpe6alFqU548KcW2eMkCeUf7J99RWVUPw6ASV32W8pGACDb5ZhxrA=="
        self.DATABASE_ID = "Scraper"
        self.client = CosmosClient(self.COSMOS_HOST, {'masterKey': self.COSMOS_MASTER_KEY})
        print("client created")

    def initialize_cosmosdb(self, container_name):
        try:
            database = self.client.get_database_client(self.DATABASE_ID)
            container = database.get_container_client(container_name)
            print("[INFO] Cosmos client created")
            return container
        except Exception as e:
            print("Error initializing Cosmos DB:", e)
            return None


    def execute_cosmos_query(self, container_name, query):
        """
        Executes a Cosmos SQL query on the specified container.
        """
        try:
            # Initialize the container using the provided container_name
            container = self.initialize_cosmosdb(container_name)
            
            # Run the query and fetch results
            results = list(container.query_items(
                query=query,
                enable_cross_partition_query=True
            ))
            
            print(f"[INFO] Query executed successfully: {query}")
            return results
        
        except CosmosHttpResponseError as e:
            print(f"[ERROR] Failed to execute query: {e}")
            return None
        
    def mark_them_unprocessed(self):
        sheet_name = "Internal Dating Batch 2 - Social Mixer"
        query = f"SELECT * FROM c WHERE c.processed=true and c.sheet_name='{sheet_name}'"
        container_name = 'eventBrite_links'

        container = self.initialize_cosmosdb(container_name)
        results = list(container.query_items(
                query=query,
                enable_cross_partition_query=True
            ))

        for item in results:
            print(item)
            data = {
                "id": item["id"],
                "url": item["link_name"],
                "processed": False,
                "sheet_name": item["sheet_name"]
                }
            container.upsert_item(data)

        
    def download_data_to_excel(self):
        """
        Downloads all data from the 'eventBrite_events' container and saves it to an Excel file.
        """
        sheet_name = "Internal Dating Batch 2 - Social Mixer"
        query = f"SELECT * FROM c where c.processed=true and c.sheet_name = '{sheet_name}'"
        container_name = 'eventBrite_events'
        
        # Execute query to fetch all data
        data = self.execute_cosmos_query(container_name, query)
        if data:
            # Convert the data into a pandas DataFrame
            df = pd.DataFrame(data)
            
            # Create a timestamp for the file name
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            file_name = f"data/outputs/eventBrite_events_{sheet_name}_{timestamp}.xlsx"
            
            # Save the DataFrame to an Excel file
            df.to_excel(file_name, index=False)
            
            print(f"[INFO] Data saved successfully to {file_name}")
        else:
            print("[ERROR] No data found or query execution failed.")

In [18]:
azure_cosmos = AzureCosmos()
azure_cosmos.download_data_to_excel()
# azure_cosmos.mark_them_unprocessed()

client created
[INFO] Cosmos client created
{'link_name': 'https://www.eventbrite.com/d/united-states/paid--parties/social-mixer/?page=1', 'sheet_name': 'Internal Dating Batch 2 - Social Mixer', 'id': 'cb9dd73316d1c1735d9af2147f6dc72f3e8bafa63db1e6cecbc44279792826a5', 'processed': True, '_rid': '66UjAOvL21oGAgAAAAAAAA==', '_self': 'dbs/66UjAA==/colls/66UjAOvL21o=/docs/66UjAOvL21oGAgAAAAAAAA==/', '_etag': '"00002b18-0000-0a00-0000-66ea4ea90000"', '_attachments': 'attachments/', '_ts': 1726631593}
{'link_name': 'https://www.eventbrite.com/d/united-states/paid--parties/social-mixer/?page=2', 'sheet_name': 'Internal Dating Batch 2 - Social Mixer', 'id': '918b593d2a8ddf5df659d7321f44f7355ee0a0d4fe367f8e73ca29174c6c02b9', 'processed': True, '_rid': '66UjAOvL21oHAgAAAAAAAA==', '_self': 'dbs/66UjAA==/colls/66UjAOvL21o=/docs/66UjAOvL21oHAgAAAAAAAA==/', '_etag': '"00002d18-0000-0a00-0000-66ea4ec20000"', '_attachments': 'attachments/', '_ts': 1726631618}
{'link_name': 'https://www.eventbrite.com/