In [1]:
import pymongo
from qdrant_client import QdrantClient
import requests
from clearml import Task
import os

In [2]:
def test_mongodb():
    try:
        client = pymongo.MongoClient("mongodb://mongodb:27017/")
        db_list = client.list_database_names()
        print("✅ MongoDB Connection Successful")
        print(f"Available databases: {db_list}")
        return True
    except Exception as e:
        print(f"❌ MongoDB Connection Failed: {str(e)}")
        return False
test_mongodb()

❌ MongoDB Connection Failed: mongodb:27017: [Errno 8] nodename nor servname provided, or not known (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 30s, Topology Description: <TopologyDescription id: 6750b5377ac927af16415c88, topology_type: Unknown, servers: [<ServerDescription ('mongodb', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('mongodb:27017: [Errno 8] nodename nor servname provided, or not known (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>


False

In [3]:
def test_qdrant():
    try:
        client = QdrantClient(host="qdrant", port=6333)
        service_info = client.get_collections()
        print("✅ Qdrant Connection Successful")
        print(f"Collections info: {service_info}")
        return True
    except Exception as e:
        print(f"❌ Qdrant Connection Failed: {str(e)}")
        return False
test_qdrant()

❌ Qdrant Connection Failed: [Errno 8] nodename nor servname provided, or not known


False

In [4]:
def test_clearml_services():
    services = {
        'API Server': 'http://apiserver:8008',
        'Web Server': 'http://webserver:8080',
        'File Server': 'http://fileserver:8081'
    }
    results = {}
    for service_name, url in services.items():
        try:
            response = requests.get(f"{url}/health")
            if response.status_code == 200:
                print(f"✅ {service_name} Connection Successful")
                results[service_name] = True
            else:
                print(f"❌ {service_name} Response Code: {response.status_code}")
                results[service_name] = False
        except Exception as e:
            print(f"❌ {service_name} Connection Failed: {str(e)}")
            results[service_name] = False

    return all(results.values())
test_clearml_services()

❌ API Server Connection Failed: HTTPConnectionPool(host='apiserver', port=8008): Max retries exceeded with url: /health (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x12b9ecf10>: Failed to resolve 'apiserver' ([Errno 8] nodename nor servname provided, or not known)"))
❌ Web Server Connection Failed: HTTPConnectionPool(host='webserver', port=8080): Max retries exceeded with url: /health (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x12b9ece50>: Failed to resolve 'webserver' ([Errno 8] nodename nor servname provided, or not known)"))
❌ File Server Connection Failed: HTTPConnectionPool(host='fileserver', port=8081): Max retries exceeded with url: /health (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x12b9e7490>: Failed to resolve 'fileserver' ([Errno 8] nodename nor servname provided, or not known)"))


False

In [5]:
from app.utils.mongodb import MongoDBClient

mongo_client = MongoDBClient()
documents = mongo_client.docs_collection.find({}, {"source.url": 1})
urls = [doc['source']['url'] for doc in documents]
print("Ingested URLs:")
for url in urls:
    print(url)

ModuleNotFoundError: No module named 'app'

In [11]:
def test_api_keys():
    import yaml
    from github import Github
    from googleapiclient.discovery import build
    
    # Load API keys
    with open('../app/configs/api_keys.yaml', 'r') as f:
        api_keys = yaml.safe_load(f)
    
    # Test GitHub
    try:
        g = Github(api_keys['github']['access_token'])
        user = g.get_user()
        print(f"✅ GitHub API Connection Successful")
    except Exception as e:
        print(f"❌ GitHub API Connection Failed: {str(e)}")
    
    # Test YouTube
    try:
        youtube = build('youtube', 'v3', developerKey=api_keys['youtube']['api_key'])
        request = youtube.playlistItems().list(
            part='snippet',
            playlistId='PLNw1_R4SfJQf_i5un9AGGj1jmqWJ4yE4i',
            maxResults=1
        )
        response = request.execute()
        print(f"✅ YouTube API Connection Successful")
    except Exception as e:
        print(f"❌ YouTube API Connection Failed: {str(e)}")

test_api_keys()

✅ GitHub API Connection Successful
❌ YouTube API Connection Failed: <HttpError 404 when requesting https://youtube.googleapis.com/youtube/v3/playlistItems?part=snippet&playlistId=PLNw1_R4SfJQf_i5un9AGGj1jmqWJ4yE4i&maxResults=1&key=AIzaSyC5kEoUeAFSdQHqfUggxjG4zOhv5FizWus&alt=json returned "The playlist identified with the request's <code>playlistId</code> parameter cannot be found.". Details: "[{'message': "The playlist identified with the request's <code>playlistId</code> parameter cannot be found.", 'domain': 'youtube.playlistItem', 'reason': 'playlistNotFound', 'location': 'playlistId', 'locationType': 'parameter'}]">
