# Embeddings - Other applications

- Simplest example of generating an embedding with a REST call

In [1]:
import os
import asyncio
from typing import List
import re

import numpy as np
import pandas as pd
from dotenv import load_dotenv
from openai import AsyncAzureOpenAI

load_dotenv()
ENDPOINT= os.getenv("OPENAI_URI")
KEY=os.getenv("OPENAI_KEY")
VERSION=os.getenv("OPENAI_API_VERSION") or "2024-02-15-preview"
CHAT_MODEL="gpt-4o"
EMB_MODEL="text-embedding-3-large"
# For Serverless API or Managed Compute endpoints

client = AsyncAzureOpenAI(azure_endpoint=ENDPOINT, api_key=KEY, api_version="2024-02-15-preview")

In [2]:
async def embedding(text,dimensions=1536)->List[float]:
    embedding = await client.embeddings.create(input=text, model=EMB_MODEL,dimensions=dimensions)
    return embedding.data[0].embedding

In [3]:
async def completion(messages: List[str], max_tokens:int|None=None, temperature=0.1, json_mode:bool=False)->str|None:
    res = await client.chat.completions.create(
        model=CHAT_MODEL, 
        messages=messages,
        max_tokens=max_tokens,
        response_format={ "type": "json_object" } if json_mode else None,
        temperature=temperature
    )
    return res.choices[0].message.content

## Single or multi-label classifier

In [4]:
AZURE_LABELS="""Compute: Services like Azure Virtual Machines, Azure Kubernetes Service (AKS), and Azure App Services provide scalable computing power for running applications, containers, and workloads.
Networking: Includes Azure Virtual Network, Azure Load Balancer, Azure Front Door, and Azure ExpressRoute, offering secure connectivity, traffic management, and content delivery.
Storage: Services like Azure Blob Storage, Azure Files, and Azure Disk Storage provide scalable, durable, and secure storage solutions for data, files, and backups.
Databases: Azure SQL Database, Azure Cosmos DB, and Azure Database for PostgreSQL offer fully managed database solutions for relational and NoSQL data.
AI and Machine Learning: Azure Cognitive Services and Azure Machine Learning enable developers to build intelligent applications with pre-built AI models and custom machine learning workflows.
Analytics: Services like Azure Synapse Analytics, Azure Data Lake, and Azure Stream Analytics provide tools for big data processing, real-time analytics, and data integration.
DevOps: Azure DevOps and GitHub Actions support continuous integration and delivery (CI/CD), source control, and project management for software development.
Security: Azure Security Center, Azure Sentinel, and Azure Key Vault offer tools for threat detection, security management, and secure key and secret storage.
Identity: Azure Active Directory (Azure AD) provides identity and access management for users, applications, and devices.
Integration: Services like Azure Logic Apps, Azure API Management, and Azure Service Bus enable seamless integration of applications, APIs, and workflows.
IoT: Azure IoT Hub and Azure Digital Twins provide tools for managing IoT devices, analyzing IoT data, and creating digital representations of physical environments.
Hybrid and Multicloud: Azure Arc and Azure Stack allow businesses to manage resources across on-premises, hybrid, and multicloud environments.
Migration: Tools like Azure Migrate help organizations assess, plan, and execute migrations to the Azure cloud.
Media: Azure Media Services provides solutions for video encoding, streaming, and content delivery.
Management and Governance: Azure Monitor, Azure Policy, and Azure Cost Management help organizations monitor, manage, and optimize their Azure resources.
None: The text does not fit any of the predefined categories.
"""

async def classifier(text: str, labels:str=AZURE_LABELS, multiple_labels=False)->str | None:
    #print(text)
    system_message = "You are an AI system that can help classify a text into one of the following labels:\n\n" + labels + "\nRespond with the best classification label. No epilogue or prologue."
    if multiple_labels:
        system_message = "You are an AI system that can help classify a text into multiple labels:\n\n" + labels + "\nRespond with a comma delimited list of classification labels that best apply. No epilogue or prologue."
    messages = [{"role": "system", "content": system_message}, {"role": "user", "content": text}]    
    response = await completion(messages)
    return response

## Cosine similarity function with numpy

In [5]:
def cosine_similarity(v1:List[float], v2:List[float])->float:
    if len(v1) != len(v2):
        raise ValueError("Vectors must have the same length")
    v1 = np.array(v1)
    v2 = np.array(v2)
    dot_product = np.dot(v1, v2)
    magnitude_A = np.linalg.norm(v1)
    magnitude_B = np.linalg.norm(v2)
    product = magnitude_A * magnitude_B
    if product == 0:
        return 0
    return dot_product / product

## Utility class to extract the title for the sample data

In [6]:
def extract_title(text:str)->str:
    match = re.match(r'^(.*?):', text)
    return match.group(1) if match else None

## Generate titles, embeddings, and classification for Azure Data services

In [7]:
services = [
    """Azure Virtual Machines (VMs): Azure VMs provide scalable, on-demand computing resources in the cloud, allowing users to run applications, host workloads, and deploy custom operating systems. They support a wide range of configurations, including Linux and Windows, and are ideal for development, testing, and production environments.""",
"""Azure App Services: Azure App Services is a fully managed platform for building, deploying, and scaling web apps, mobile backends, and APIs. It supports multiple programming languages and frameworks, integrates with DevOps pipelines, and offers built-in security and monitoring.""",
"""Azure Kubernetes Service (AKS): AKS simplifies the deployment, management, and scaling of containerized applications using Kubernetes. It provides a fully managed Kubernetes environment, enabling developers to focus on building applications without worrying about the underlying infrastructure.""",
"""Azure Container Instances (ACI): ACI offers a serverless container runtime for running Docker containers in the cloud. It provides fast startup times, auto-scaling, and per-second billing, making it ideal for short-lived tasks, batch processing, and microservices.""",
"""Azure Networking: Azure Networking encompasses a suite of services like Azure Virtual Network (VNet) for secure communication between resources, Azure Load Balancer for distributing traffic, and Azure DNS for domain management. It also includes Azure ExpressRoute for private connections and Azure Firewall for advanced security.""", 
"""Azure Application Gateway: Azure Application Gateway is a web traffic load balancer that enables users to manage and optimize web applications. It provides features like SSL termination, URL-based routing, and web application firewall (WAF) for enhanced security and performance."""
"""Azure API Management: This service allows organizations to create, publish, secure, and monitor APIs at scale. It provides features like API gateways, developer portals, and analytics, enabling seamless integration and management of APIs across hybrid and multi-cloud environments.""",
"""Azure Service Bus: Azure Service Bus is a messaging service for connecting applications, services, and devices across distributed environments. It provides reliable message delivery, pub/sub messaging patterns, and advanced features like dead-letter queues and message sessions.""",
"""Azure Logic Apps: Azure Logic Apps is a cloud-based service for automating workflows and integrating applications, data, and services. It offers a visual designer for creating workflows, connectors for integrating with external systems, and triggers for responding to events.""",
"""Azure SQL: Azure SQL is a family of fully managed, scalable, and secure relational database services. It includes Azure SQL Database for modern cloud applications and SQL Managed Instance for migrating existing SQL Server workloads with minimal changes.""",
"""Azure PostgreSQL: Azure Database for PostgreSQL is a fully managed database service that provides high availability, security, and scalability for PostgreSQL workloads. It supports features like automatic backups, point-in-time restore, and geo-replication for disaster recovery.""",
"""Azure Cosmos DB: Azure Cosmos DB is a globally distributed, multi-model database service designed for building planet-scale applications. It supports multiple data models, including document, key-value, graph, and column-family, and provides automatic scaling and high availability.""",
"""Azure Application Insights: Application Insights is an application performance management (APM) service that helps developers monitor and diagnose issues in web applications. It provides real-time telemetry, performance metrics, and log analytics for identifying performance bottlenecks and improving user experience.""",
"""Azure Storage: Azure Storage offers a range of cloud storage solutions, including Blob Storage for unstructured data, File Storage for file shares, and Queue Storage for messaging between applications. It provides high availability, durability, and scalability for storing data in the cloud.""",
"""Azure Cache for Redis: Azure Redis is a fully managed, in-memory data store that provides high-performance caching and real-time data processing. It is ideal for accelerating application performance and reducing database load.""",
"""Azure Container Apps: This service enables developers to build and deploy microservices and containerized applications without managing Kubernetes. It supports serverless scaling, event-driven architectures, and integrates with tools like Dapr for distributed application development.""",
"""Azure Front Door: Azure Front Door is a global, scalable content delivery network (CDN) and application acceleration service. It provides fast, secure, and reliable delivery of web content, with features like load balancing, caching, and DDoS protection.""",
"""Azure Cognitive Services: Azure Cognitive Services are pre-built AI models and APIs that enable developers to add intelligent features to applications. They include vision, speech, language, and decision services, allowing for capabilities like image recognition, speech-to-text, and sentiment analysis.""",
"""Azure Machine Learning: Azure Machine Learning is a cloud-based service for building, training, and deploying machine learning models. It provides tools for data preparation, model training, and model deployment, with support for popular frameworks like TensorFlow and PyTorch.""",
"""Azure Synapse Analytics: Azure Synapse is an integrated analytics service that combines data warehousing, big data analytics, and data integration. It enables organizations to analyze large volumes of data, build machine learning models, and create real-time dashboards.""",
"""Azure Data Lake: Azure Data Lake Storage is a scalable and secure data lake service for big data analytics. It supports multiple data types, including structured, semi-structured, and unstructured data, and integrates with Azure Databricks for advanced analytics.""",
"""Azure Stream Analytics: Azure Stream Analytics is a real-time data processing service that ingests, processes, and analyzes streaming data from IoT devices, sensors, and applications. It provides low-latency insights, real-time alerts, and integration with other Azure services.""",
"""Azure IoT Hub: Azure IoT Hub is a managed service for connecting, monitoring, and managing IoT devices at scale. It provides device-to-cloud and cloud-to-device messaging, device management, and security features for IoT solutions.""",
"""Speed of light: The speed of light is 299,792,458 meters per second."""
]

list = [{"title":extract_title(service),"text": service, "vector": result[0], "label": result[1]} for service in services for result in [await asyncio.gather(embedding(service), classifier(service))]]

df = pd.DataFrame(list)
df

Unnamed: 0,title,text,vector,label
0,Azure Virtual Machines (VMs),Azure Virtual Machines (VMs): Azure VMs provid...,"[-0.06364396214485168, 0.031071187928318977, -...",Compute
1,Azure App Services,Azure App Services: Azure App Services is a fu...,"[-0.007695374544709921, 0.0127828074619174, -0...",Compute
2,Azure Kubernetes Service (AKS),Azure Kubernetes Service (AKS): AKS simplifies...,"[-0.003945189528167248, 0.0017525596776977181,...",Compute
3,Azure Container Instances (ACI),Azure Container Instances (ACI): ACI offers a ...,"[-0.04357001185417175, 0.015702897682785988, -...",Compute
4,Azure Networking,Azure Networking: Azure Networking encompasses...,"[-0.038340769708156586, 0.011999299749732018, ...",Networking
5,Azure Application Gateway,Azure Application Gateway: Azure Application G...,"[-0.07149704545736313, -0.014326813630759716, ...",Networking
6,Azure Service Bus,Azure Service Bus: Azure Service Bus is a mess...,"[-0.007568337023258209, -0.00745970057323575, ...",Integration
7,Azure Logic Apps,Azure Logic Apps: Azure Logic Apps is a cloud-...,"[-0.03546272963285446, -0.002621572697535157, ...",Integration
8,Azure SQL,Azure SQL: Azure SQL is a family of fully mana...,"[-0.04313934966921806, 0.01698102429509163, -0...",Databases
9,Azure PostgreSQL,Azure PostgreSQL: Azure Database for PostgreSQ...,"[-0.041189566254615784, 0.023745814338326454, ...",Databases


In [8]:
async def compare_to_text_and_classification(text:str,classification:str) -> None:
    text_embedding = await embedding(text)
    result = [{"service": service["title"],"label":service["label"], "similarity": cosine_similarity(service["vector"], text_embedding)} for service in list if service["label"] in classification]
    result.sort(key=lambda x: x["similarity"], reverse=True)
    print(f"Comparing to: {text} and classification: {classification}")
    for item in result:
        #if item["similarity"] >= 0.4:
            print(f"{item['service']} : {item['label']} -> {item['similarity']:.2f}")
    print()
    

async def compare_to_text(text:str) -> None:
    text_embedding = await embedding(text)
    result = [{"service": service["title"],"label":service["label"], "similarity": cosine_similarity(service["vector"], text_embedding)} for service in list]
    result.sort(key=lambda x: x["similarity"], reverse=True)
    print(f"Comparing to: {text}")
    for item in result:
        #if item["similarity"] >= 0.4:
            print(f"{item['service']} : {item['label']} -> {item['similarity']:.2f}")
    print()
#await compare_to_text("Kubernetes")
#await compare_to_text("Azure sql service")
#await compare_to_text("PaaS Services")

## Perform a search with and without labels

In [9]:
await compare_to_text("What are some services that I can use to deploy applications?")
q_classifier = await classifier("What are some services that I can use to deploy applications?")
await compare_to_text_and_classification("What are some services that I can use to deploy applications?", q_classifier)

Comparing to: What are some services that I can use to deploy applications?
Azure Container Apps : Compute -> 0.52
Azure App Services : Compute -> 0.52
Azure Virtual Machines (VMs) : Compute -> 0.42
Azure Kubernetes Service (AKS) : Compute -> 0.38
Azure Container Instances (ACI) : Compute -> 0.38
Azure Machine Learning : AI and Machine Learning -> 0.37
Azure Application Gateway : Networking -> 0.37
Azure Cognitive Services : AI and Machine Learning -> 0.37
Azure Storage : Storage -> 0.36
Azure Application Insights : Management and Governance -> 0.35
Azure Networking : Networking -> 0.35
Azure PostgreSQL : Databases -> 0.34
Azure Logic Apps : Integration -> 0.34
Azure SQL : Databases -> 0.31
Azure Service Bus : Integration -> 0.31
Azure IoT Hub : IoT -> 0.30
Azure Cache for Redis : Databases -> 0.29
Azure Front Door : Networking -> 0.29
Azure Stream Analytics : Analytics -> 0.27
Azure Cosmos DB : Databases -> 0.27
Azure Data Lake : Analytics -> 0.26
Azure Synapse Analytics : Analytics -

## Perform a search with and without labels

In [10]:
await compare_to_text("In Azure, what services can be used to store transactional or nosql data?")
q_classifier = await classifier("In Azure, what services can be used to store transactional or nosql data?", multiple_labels=True)
await compare_to_text_and_classification("In Azure, what services can be used to store transactional or nosql data?", q_classifier)

Comparing to: In Azure, what services can be used to store transactional or nosql data?
Azure Cosmos DB : Databases -> 0.58
Azure Storage : Storage -> 0.57
Azure Data Lake : Analytics -> 0.52
Azure Cache for Redis : Databases -> 0.52
Azure SQL : Databases -> 0.50
Azure Container Apps : Compute -> 0.46
Azure Stream Analytics : Analytics -> 0.45
Azure App Services : Compute -> 0.45
Azure PostgreSQL : Databases -> 0.44
Azure Synapse Analytics : Analytics -> 0.44
Azure Cognitive Services : AI and Machine Learning -> 0.42
Azure Virtual Machines (VMs) : Compute -> 0.42
Azure Networking : Networking -> 0.41
Azure Service Bus : Integration -> 0.41
Azure Logic Apps : Integration -> 0.40
Azure IoT Hub : IoT -> 0.37
Azure Container Instances (ACI) : Compute -> 0.37
Azure Machine Learning : AI and Machine Learning -> 0.36
Azure Application Gateway : Networking -> 0.35
Azure Application Insights : Management and Governance -> 0.34
Azure Kubernetes Service (AKS) : Compute -> 0.33
Azure Front Door : 

## Find the relationships between vectors

In [11]:
processed = {}

def checked_processed(idx,idy) -> bool:
    if idx>idy:
        temp = idx
        idx = idy
        idy = temp
    if idy>idx:
        try:
            if processed[idx]:
                return True
        except KeyError:
            pass        
        processed[idx]=idy
    return False

for (idx,item) in enumerate(list):
    #print(f"Service {idx}: {item['vector']}")
    for (idy,item1) in enumerate(list):
        #print(f"Service {idy}: {item1['vector']}")
        if idx != idy: #and item["label"] == item1["label"]:            
            if not checked_processed(idx,idy):
                similarity = cosine_similarity(item["vector"], item1["vector"])
                if similarity >= 0.4:
                    print(f"{item['label']}: {item['title'][:30]} <-> {item1['title'][:30]} -> {similarity:.2f}")


Compute: Azure Virtual Machines (VMs) <-> Azure App Services -> 0.54
Compute: Azure App Services <-> Azure Kubernetes Service (AKS) -> 0.60
Compute: Azure Kubernetes Service (AKS) <-> Azure Container Instances (ACI -> 0.54
Compute: Azure Container Instances (ACI <-> Azure Networking -> 0.43
Networking: Azure Networking <-> Azure Application Gateway -> 0.61
Networking: Azure Application Gateway <-> Azure Service Bus -> 0.49
Integration: Azure Service Bus <-> Azure Logic Apps -> 0.53
Integration: Azure Logic Apps <-> Azure SQL -> 0.49
Databases: Azure SQL <-> Azure PostgreSQL -> 0.66
Databases: Azure PostgreSQL <-> Azure Cosmos DB -> 0.57
Databases: Azure Cosmos DB <-> Azure Application Insights -> 0.42
Management and Governance: Azure Application Insights <-> Azure Storage -> 0.43
Storage: Azure Storage <-> Azure Cache for Redis -> 0.55
Databases: Azure Cache for Redis <-> Azure Container Apps -> 0.47
Compute: Azure Container Apps <-> Azure Front Door -> 0.48
Networking: Azure Front Doo

## Create a classification model using centroids for given data

In [32]:
def calculate_centroid(embeddings: List[float]) -> List[float]:
    return np.mean(embeddings, axis=0)

# For each label, calculate the centroid of the embeddings
centroids = {}
for item in list:
    if item["label"] in centroids:
        centroids[item["label"]].append(item["vector"])
    else:
        centroids[item["label"]] = [item["vector"]]

for label, embeddings in centroids.items():
    centroids[label] = calculate_centroid(embeddings)

async def classifier_with_centroids(text: str, single:bool=True, multi_max:int=2) -> None:
    text_embedding = await embedding(text)
    result = [{"label": label, "similarity": cosine_similarity(centroid, text_embedding)} for label, centroid in centroids.items()]
    result.sort(key=lambda x: x["similarity"], reverse=True)
    print(f"Comparing to: {text}")
    count = 0
    for item in result:
        if item["similarity"] >= 0.4:
            print(f"{item['label']} -> {item['similarity']:.2f}")            
            count += 1
        else:
            print("None")
        if single:
            break
        else:
            if count >= multi_max:
                break
            
    print()

# dataframe of the centroids
centroids_df = pd.DataFrame(centroids).transpose()
centroids_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
Compute,-0.031278,0.010041,-0.024983,0.002572,0.003802,0.002605,-0.007091,0.016764,0.000361,-0.006772,...,-0.004225,0.006937,0.011258,0.031372,0.01801,0.002802,0.011319,0.023253,0.01695,0.008314
Networking,-0.054654,-0.005289,-0.017234,0.004676,-0.011992,-0.012044,-0.014483,0.013814,-0.033323,-0.018009,...,-0.010539,-0.001292,-0.000954,0.018156,0.015165,-0.003428,0.015587,0.029911,0.018137,0.022345
Integration,-0.021516,-0.005041,-0.018232,0.015601,-0.001311,0.014353,0.006261,0.016675,-0.021067,0.011484,...,-0.025524,0.014436,0.008977,0.010939,-0.004354,-0.003677,0.013854,0.035913,0.02121,0.016947
Databases,-0.038213,0.018225,-0.028292,0.033421,0.000973,-0.004695,-0.011761,0.012351,-0.028743,0.022601,...,-0.022957,0.024938,-0.012139,0.020242,0.030595,0.021355,0.017953,0.020743,0.008197,0.034865
Management and Governance,-0.037299,0.016435,-0.025559,-0.009228,-0.002937,0.015638,-0.018859,-0.003326,0.00585,0.020711,...,0.007875,0.010235,-0.001639,0.006861,0.006764,-0.032049,0.016483,0.000639,-5.3e-05,0.003891
Storage,-0.065031,-0.017984,-0.028354,0.005617,0.035354,-0.005262,-0.009831,0.014016,-0.011984,0.012605,...,-0.009387,0.020822,-0.015145,0.023274,0.020758,0.001665,4.1e-05,0.009984,0.015516,0.037
AI and Machine Learning,-0.025552,-0.007621,-0.021921,0.011188,0.001124,-0.023736,0.008463,-0.005845,0.000532,0.010748,...,-0.033903,0.017491,0.009686,-0.004113,0.017902,-0.002563,0.027502,0.012877,0.02468,-0.00276
Analytics,-0.038853,-0.00189,-0.023695,0.007646,0.015376,-0.001732,-0.023888,0.006591,0.001748,0.005066,...,-0.054346,0.015266,0.013331,0.003745,0.007424,-0.021999,0.012582,-0.001048,0.021903,0.024293
IoT,-0.051233,0.005064,-0.019168,0.00044,0.002118,0.013547,0.002696,0.012305,-0.009909,0.022319,...,-0.013694,0.002808,-0.005957,0.012077,-0.00969,-0.010721,0.031042,0.023456,0.028135,-0.002709
,0.002129,-0.014044,-0.0173,0.047779,-0.010978,-0.017284,-0.000832,0.045766,-0.016034,0.017646,...,-0.038783,6.4e-05,0.021389,-0.007569,-0.002241,0.032901,-0.021829,0.008041,-0.011764,-0.003543


## Test the classification model

In [35]:
azure_fuction = "Azure Functions is a serverless compute service that lets you run event-triggered code without having to explicitly provision or manage infrastructure."
await classifier_with_centroids(azure_fuction, single=True)
await classifier_with_centroids(azure_fuction, single=False, multi_max=2)

meaning_of_life = "The meaning of life is a philosophical question concerning the significance of living or existence in general."
await classifier_with_centroids(meaning_of_life)

Comparing to: Azure Functions is a serverless compute service that lets you run event-triggered code without having to explicitly provision or manage infrastructure.
Compute -> 0.67

Comparing to: Azure Functions is a serverless compute service that lets you run event-triggered code without having to explicitly provision or manage infrastructure.
Compute -> 0.67
Integration -> 0.60

Comparing to: The meaning of life is a philosophical question concerning the significance of living or existence in general.
None

