<a href="https://colab.research.google.com/github/EdenKantor/Cloud-Computing-Course-Tut/blob/main/Cloud_Tut7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# ranking_service.py
class RankingService:
    def __init__(self):
        self.document_scores = {}

    def calculate_score(self, document, search_terms):
        """Calculate document relevance score based on term frequency"""
        content = document['content'].lower()
        title = document['title'].lower()
        score = 0

        for term in search_terms:
            # Title matches are weighted more heavily
            title_count = title.count(term.lower()) * 2
            content_count = content.count(term.lower())
            score += title_count + content_count

        return score

# Enhanced IndexService
class IndexService:
    def __init__(self):
        self.documents = {}
        self.index = {}

    def add_document(self, doc_data):
        """Add a document to the index"""
        doc_id = str(len(self.documents) + 1)
        self.documents[doc_id] = {**doc_data, 'id': doc_id}

        # Create inverted index
        words = set(doc_data['content'].lower().split() + doc_data['title'].lower().split())
        for word in words:
            if word not in self.index:
                self.index[word] = set()
            self.index[word].add(doc_id)

        return self.documents[doc_id]

    def get_document(self, doc_id):
        """Retrieve a document by ID"""
        return self.documents.get(doc_id)

    def search_word(self, word):
        """Find documents containing a word"""
        word = word.lower()
        return list(self.index.get(word, set()))

# Enhanced QueryService
class QueryService:
    def __init__(self, index_service):
        self.index_service = index_service
        self.queries = {}
        self.ranking_service = RankingService()

    def create_query(self, query_data):
        """Create and execute a search query with logical operators"""
        try:
            query_id = str(len(self.queries) + 1)
            query_expression = query_data['expression']  # List of terms and operators

            results = self._evaluate_expression(query_expression)

            # Get original search terms (excluding operators)
            search_terms = [term for term in query_expression if term.lower() not in ['and', 'or']]

            # Calculate scores for matched documents
            scored_results = []
            for doc_id in results:
                doc = self.index_service.get_document(doc_id)
                score = self.ranking_service.calculate_score(doc, search_terms)
                scored_results.append((doc_id, score))

            # Sort by score
            scored_results.sort(key=lambda x: x[1], reverse=True)

            query = {
                'id': query_id,
                'expression': query_expression,
                'results': [doc_id for doc_id, _ in scored_results],
                'scores': {doc_id: score for doc_id, score in scored_results},
                'timestamp': query_data.get('timestamp', 'now')
            }
            self.queries[query_id] = query
            return query

        except Exception as e:
            return {'error': str(e)}

    def _evaluate_expression(self, expression):
        """Evaluate a search expression with AND/OR operators"""
        if not expression:
            return set()

        stack = []
        current_set = set()
        current_op = 'OR'  # Default operator

        for token in expression:
            token = token.lower()
            if token in ['and', 'or']:
                current_op = token.upper()
            else:
                doc_ids = set(self.index_service.search_word(token))

                if not stack:
                    stack.append(doc_ids)
                else:
                    prev_set = stack.pop()
                    if current_op == 'AND':
                        stack.append(prev_set & doc_ids)
                    else:  # OR
                        stack.append(prev_set | doc_ids)

        return stack[0] if stack else set()

# Enhanced ResultService
class ResultService:
    def __init__(self, index_service, query_service):
        self.index_service = index_service
        self.query_service = query_service
        self.results = {}

    def format_results(self, query_id):
        """Format search results for display with rankings"""
        try:
            query = self.query_service.queries.get(query_id)
            if not query:
                return {'error': 'Query not found'}

            formatted_results = []
            for doc_id in query['results']:
                doc = self.index_service.get_document(doc_id)
                if doc:
                    formatted_results.append({
                        'doc_id': doc_id,
                        'title': doc['title'],
                        'snippet': doc['content'][:100] + '...',
                        'relevance_score': query['scores'][doc_id]
                    })

            result_id = str(len(self.results) + 1)
            result = {
                'id': result_id,
                'query_id': query_id,
                'formatted_results': formatted_results,
                'count': len(formatted_results)
            }
            self.results[result_id] = result
            return result

        except Exception as e:
            return {'error': str(e)}

# Example usage and tests
def main():
    # Initialize services
    index_service = IndexService()
    query_service = QueryService(index_service)
    result_service = ResultService(index_service, query_service)

    # Add sample documents
    index_service.add_document({
        'title': 'Python Programming',
        'content': 'Python is a popular programming language for cloud computing and web development'
    })
    index_service.add_document({
        'title': 'Cloud Services Architecture',
        'content': 'Cloud computing enables scalable microservices architecture and distributed systems'
    })
    index_service.add_document({
        'title': 'Web Development with Python',
        'content': 'Python frameworks are popular for web development and API creation'
    })

    # Test simple AND query
    print("\nTesting AND query:")
    query1 = query_service.create_query({
        'expression': ['python', 'AND', 'web']
    })
    results1 = result_service.format_results(query1['id'])
    print(f"Documents containing 'python' AND 'web':")
    for result in results1['formatted_results']:
        print(f"- {result['title']} (Score: {result['relevance_score']})")

    # Test OR query
    print("\nTesting OR query:")
    query2 = query_service.create_query({
        'expression': ['cloud', 'OR', 'web']
    })
    results2 = result_service.format_results(query2['id'])
    print(f"Documents containing 'cloud' OR 'web':")
    for result in results2['formatted_results']:
        print(f"- {result['title']} (Score: {result['relevance_score']})")

    # Test complex query
    print("\nTesting complex query:")
    query3 = query_service.create_query({
        'expression': ['python', 'AND', 'web', 'OR', 'cloud']
    })
    results3 = result_service.format_results(query3['id'])
    print(f"Documents matching '(python AND web) OR cloud':")
    for result in results3['formatted_results']:
        print(f"- {result['title']} (Score: {result['relevance_score']})")

if __name__ == "__main__":
    main()


Testing AND query:
Documents containing 'python' AND 'web':
- Web Development with Python (Score: 6)
- Python Programming (Score: 4)

Testing OR query:
Documents containing 'cloud' OR 'web':
- Cloud Services Architecture (Score: 3)
- Web Development with Python (Score: 3)
- Python Programming (Score: 2)

Testing complex query:
Documents matching '(python AND web) OR cloud':
- Web Development with Python (Score: 6)
- Python Programming (Score: 5)
- Cloud Services Architecture (Score: 3)
