# Profile March
![Profile Match Approach](../Other/Profile_Match_Flow.jpg)

## Prerequisites
* Convert PowerPoint to JPG(PPTX --> Save AS --> Select JPG option and specify a folder to save all the images of the slides). This will save each slide as seperate image files (Slide1.JPG, Slide2.JPG etc).
* Create model deployments for GPT-4o,  dall-e-3 and text-embedding-3-large models
* Update the local.env file


In [None]:
%pip install --force-reinstall -r requirements.txt


In [None]:
import os
from openai import AzureOpenAI
import re
from azure.storage.blob import (
    BlobServiceClient,
    BlobClient,
    BlobSasPermissions,
    generate_blob_sas,
    ContainerClient,
)
from datetime import datetime, timedelta, timezone

import json
import requests
import math
import os
import pandas as pd
import numpy as np

from dotenv import load_dotenv
load_dotenv("local.env")

## Helper Methods

In [None]:

def generate_sas_token(
    blob_service_client: BlobServiceClient, source_blob: BlobClient
) -> str:

    # Create a SAS token that's valid for one hour, as an example
    sas_token = generate_blob_sas(
        account_name=blob_service_client.account_name,
        container_name=source_blob.container_name,
        blob_name=source_blob.blob_name,
        account_key=blob_service_client.credential.account_key,
        permission=BlobSasPermissions(read=True),
        expiry=datetime.now(timezone.utc) + timedelta(hours=1),
        start=datetime.now(timezone.utc) + timedelta(hours=-1),
    )
    return sas_token



def generate_embeddings(text, model): 
    return client.embeddings.create(input = [text], model=model).data[0].embedding

#just a dot product should be OK too
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def getResponseFromAoAi_image(systemPrompt, userPrompt, temp, imageurl):
    temperature = temp if temp else 0
    conversaion = [
        {"role": "system", "content": systemPrompt},
        {
            "role": "user",
            "content": [
                {"type": "text", "text": userPrompt},
                {"type": "image_url", "image_url": {"url": imageurl}},
            ],
        },
    ]
    # Send the conversation to the API
    response = client.chat.completions.create(
        model=aoai_api_deployment_name,  # The deployment name you chose when you deployed the GPT-35-Turbo or GPT-4 model.
        messages=conversaion,
        response_format={"type": "json_object"},
        temperature=temperature,
    )
    # Print the assistant's response
    responseText = response.choices[0].message.content
    return responseText


def getResponseFromAoAi(systemPrompt, userPrompt, temp):
    temperature = temp if temp else 0
    conversaion = [
        {"role": "system", "content": systemPrompt},
        {"role": "user", "content": userPrompt},
    ]
    # Send the conversation to the API
    response = client.chat.completions.create(
        model=aoai_api_deployment_name,  # The deployment name you chose when you deployed the GPT-35-Turbo or GPT-4 model.
        messages=conversaion,
        # response_format={ "type": "json_object" }, #requires ptu enabled gpt4
        temperature=temperature,
    )
    # Print the assistant's response
    responseText = response.choices[0].message.content
    return responseText


def clean_json_string(json_string):
    pattern = r"^```json\s*(.*?)\s*```$"
    cleaned_string = re.sub(pattern, r"\1", json_string, flags=re.DOTALL)
    return cleaned_string.strip()


def merge_json_files(container_client: ContainerClient, profile_folder_name: str):
    try:
        combined_data = []
        blob_list = container_client.list_blobs(
            name_starts_with=output_folder_name
            + "/"
            + profile_text_folder_name
            + "/"
            + profile_folder_name
        )
        for blob in blob_list:
            if blob.name.endswith(".json"):
                blob_client = container_client.get_blob_client(blob.name)
                downloaded_blob = blob_client.download_blob().readall()
                json_data = json.loads(downloaded_blob)
                combined_data.append(json_data)

        agg_team_profiles_blob_client = container_client.get_blob_client(
            output_folder_name
            + "/"
            + profile_aggregated_folder_name
            + "/"
            + profile_folder_name
            + "/aggregated_team_profiles.json"
        )
        agg_team_profiles_blob_client.upload_blob(
            json.dumps(combined_data, indent=4), overwrite=True
        )

        return True
    except Exception as e:
        print(f"Failed to generate aggregated team_profiles: {e}")
        return False


def generate_profile_images(
    container_client: ContainerClient, profile_folder_name: str
):
    try:
        print("Started image creating process")
        blob_list = container_client.list_blobs(
            name_starts_with=output_folder_name
            + "/"
            + profile_text_folder_name
            + "/"
            + profile_folder_name
        )
        for blob in blob_list:
            try:
                print("\t" + blob.name)
                print("type:", blob.content_settings.content_type)
                if blob.name.endswith(".json"):
                    blob_client = container_client.get_blob_client(blob.name)
                    downloaded_blob = blob_client.download_blob().readall()
                    json_data = json.loads(downloaded_blob)
                    final_image_prompt = img_profile_prompt.format(
                        person_profile_summary=json_data["Overall_Summary"]
                    )
                    print(final_image_prompt)

                    result = client.images.generate(
                        model="Dalle3",  # the name of your DALL-E 3 deployment
                        prompt=final_image_prompt,
                        n=1,
                    )
                    # image_url = json.loads(result.model_dump_json())['data'][0]['url']
                    json_response = json.loads(result.model_dump_json())
                    # Retrieve the generated image
                    image_url = json_response["data"][0]["url"]
                    generated_image = requests.get(image_url).content
                    img_file_name = blob.name.replace(
                        output_folder_name + "/" + profile_text_folder_name,
                        output_folder_name + "/" + profile_img_folder_name,
                    ).replace(".json", ".jpg")
                    print(img_file_name)
                    img_blob_client = container_client.get_blob_client(img_file_name)
                    img_blob_client.upload_blob(
                        generated_image, blob_type="BlockBlob", overwrite=True
                    )

            except Exception as e:
                print(f"An error occurred with item {blob.name}: {e}")
                print("eroor code:", e.code)
                # Continue to the next item in the loop
                continue

        return True
    except Exception as e:
        print(f"Failed to generate profile images: {e}")
        return False


def match_profiles(
    container_client: ContainerClient, temperature: float, profile_folder_name: str
):
    try:
        blob_client = container_client.get_blob_client(
            output_folder_name
            + "/"
            + profile_aggregated_folder_name
            + "/"
            + profile_folder_name
            + "/aggregated_team_profiles.json"
        )

        downloaded_blob = blob_client.download_blob().readall()
        json_data = json.loads(downloaded_blob)
        updated_json = [
            {
                "Name": item["Team_Member_Name"],
                "What_I_Do": item["What_I_Do"],
                "What_I_Like": item["What_I_Like"],
                "Overall_Summary": item["Overall_Summary"],
            }
            for item in json_data
        ]

        for i in range(len(updated_json)):
            try:
                current_object = updated_json[i]  # Object in the current loop
                rest_of_objects = (
                    updated_json[:i] + updated_json[i + 1 :]
                )  # JSON for the rest of the objects
                name_profile = json.dumps(current_object["Name"]).strip('"')
                print("Current Object:", name_profile)
                print("\n")
                part_length = math.ceil(len(rest_of_objects) / 10)
                # Split the item list into the specified number of parts
                parts = [
                    rest_of_objects[i : i + part_length]
                    for i in range(0, len(rest_of_objects), part_length)
                ]
                final_json = []
                for part in parts:
                    final_user_prompt = member_match_usr_prompt.format(
                        team_member_profile=json.dumps(current_object, indent=4),
                        rest_of_team_profiles=json.dumps(part, indent=4),
                    )
                    aoai_response = getResponseFromAoAi(
                        member_match_sys_prompt, final_user_prompt, temp=temperature
                    )
                    cleaned_response = clean_json_string(aoai_response)
                    parsed_json = json.loads(cleaned_response)
                    final_json.extend(parsed_json)
                sorted_json_data = sorted(
                    final_json,
                    key=lambda x: x["Overall_Similarity_Score"],
                    reverse=True,
                )

                blob_client = container_client.get_blob_client(
                    output_folder_name
                    + "/"
                    + profile_matches_folder_name
                    + "/"
                    + profile_folder_name
                    + "/"
                    + name_profile
                    + ".json"
                )
                blob_client.upload_blob(
                    json.dumps(sorted_json_data, indent=4), overwrite=True
                )
            except Exception as e:
                print(
                    f"An error occurred while generating matches for profile {name_profile}: {e}"
                )
                print("eroor code:", e.code)
                # Continue to the next item in the loop
                continue

        return True
    except Exception as e:
        print(f"Failed while matching profiles: {e}")
        return False

## Prompts

In [None]:

image_insights_sys_prompt = """ 
You are a specialized AI model designed to extract and analyze text and visual information from images containing team member profiles. Your task is to process each image, identify and categorize the textual information under predefined topics, and provide a description and summary of the images. The predefined topics are "Who I Am," "What I Do," "What I Like," "Analyze Me," "The Last Word," ,  "To Work With Me, You Should Know","Career Path & Education", "Passions", "I Thrive On" and "I'm Good @".   Note that not all images may contain information on every topic. Your primary goal is to capture detailed information that will be used to match people based on their interests, hobbies, skills, experience, and other relevant attributes. The images contain personal and professional details, along with pictures representing their interests and lifestyle.

For each image, follow these steps:

1. **Extract Team Member's Name:**
   - Identify and extract the name of the team member to whom the image belongs.
   - This name should be placed at the top of the JSON output as a separate element.

2. **Extract Text:**
   - Identify and extract text related to the topics listed above.
   - If a topic is not mentioned in the image, return a "Not Provided" status for that topic.
   - Based on the information from extracted text, identify the current location (City and state), if available.
   - Extract any additional relevant information that could help in matching people, such as certifications, languages spoken, or specific project experience.

3. **Image Analysis:**
   - Describe the content of the pictures with a focus on extracting every possible detail.
   - Pay attention to all elements such as people, objects, food, places, activities, and settings that reflect the individual's personal or professional life.
   - Ensure to capture details like the food the person is eating, the places they are in, clothing, and any background elements that might provide insights into their lifestyle, interests, or personality.
   - Extract all details from the pictures and ensure no detail is missed.

4. **Identify Superhero Match:**
   - Based on the extracted profile information, identify a superhero that closely matches the person’s characteristics, personality, or lifestyle.
   - Provide a brief reason for this match.

5. **Identify Food Recommendation:**
   - Based on the extracted profile information, suggest a food item that the person should try.
   - Provide a brief explanation for the recommendation.

6. **Detailed Insights Based on Pictures:**
   - Analyze the pictures mentioned in the image and provide detailed insights about the person.
   - These insights should be based on the content of the pictures and reflect the individual’s personality, interests, lifestyle, and any other relevant attributes.

7. **Generate JSON:**
   - Organize the extracted information into the following structured JSON format.Ensure accuracy in filling out each field based on the information extracted from the image. Handle missing data with "Not Provided" for the predefined topics or leave optional fields empty if they do not apply.
{
  "Team_Member_Name": "team member name",
  "Who_I_Am": "details extracted related to who I am and Career Path & Education",
  "What_I_Do": "details extracted related to what I do",
  "What_I_Like": "details extracted related to what I like and Passions",
  "Analyze_Me": "details extracted related to analyze me",
  "The_Last_Word": "details extracted related to the last word",
  "Other Info":"details extracted related to I'm Good at and I thrive on", 
  "Current_Location": "details extracted related to current location",,
  "To_Work_With_Me_You_Should_Know": "details extracted related to who I am",
  "Image_Analysis": "all details captured from the pictures in the image, including food, places, and other elements",
  "Superhero_Match": {
    "Superhero": "superhero identified",
    "Reason": "brief reason for the match"
  },
  "Food_Recommendation": {
    "Food_Item": "recommended food item",
    "Reason": "brief reason for the recommendation"
  },
  "Detailed_Insights_From_Pictures": "detailed insights about the person based on the pictures mentioned in the image",
  "Overall_Summary": "detailed summary combining all of the above information for an overall understanding of the team member’s profile. Make sure to include all of the details extracted including Superhero_Match and Food_Recommendation if available."
}

8. **Considerations:**
   - Ensure accuracy in extracting text, especially when dealing with varied fonts and layouts.
   - Provide clear and concise descriptions for images that adequately represent the visual content.
   - Be mindful of incomplete information, and appropriately handle missing data without making assumptions.

9. **Output Requirements:**
   - Produce the final output in a proper JSON format. The JSON should be clean and properly structured without any additional markers or formatting such as triple backticks or other delimiters.

"""


image_insights_usr_prompt = """
Extract all the relevant information from the attached image file:

Please, make sure that the output is valid JSON. 
"""


member_match_sys_prompt = """
You are an AI model tasked with assigning similarity scores between a given team member's profile and the profiles of other team members based on their interests, skills, and personality. You will be provided with a JSON object containing profiles of all team members.

Your task is to:

1. **Evaluate All Profiles and Assign Similarity Scores**:
   - For each profile in the JSON, evaluate its similarity to the given team member's profile based on the following criteria:
     - **Interests**: From the `What_I_Like` section. Assign a similarity score from 1 to 10, where 10 represents very high similarity.
     - **Skills and Experience**: From the `What_I_Do` section. Assign a similarity score from 1 to 10, where 10 represents very high similarity.
     - **Personality**: From the `Overall_Summary` and any relevant information in other sections. Assign a similarity score from 1 to 10, where 10 represents very high similarity.
   - Calculate an **overall similarity score** for each profile by averaging the individual similarity scores for interests, skills, and personality.

2. **Generate a JSON Output**:
   - The output should consist of valid JSON only, with no extra text or explanations. Each profile's output should include:
     - `Name`: Name of the team member.
     - `Interests_Similarity_Score`: The similarity score for interests (1 to 10).
     - `Skills_Similarity_Score`: The similarity score for skills and experience (1 to 10).
     - `Personality_Similarity_Score`: The similarity score for personality (1 to 10).
     - `Overall_Similarity_Score`: The overall similarity score for this profile, calculated as the average of the three individual scores.

3. **Important Instruction**:
   - Respond **only** with the JSON output in the format specified.
   - Do not include any other text or explanations.
   - Ensure that the response is a valid JSON format.

### JSON Output Example:
```json
[
  {
    "Name": "Adam Davenport",
    "Interests_Similarity_Score": 1,
    "Skills_Similarity_Score": 5,
    "Personality_Similarity_Score": 18,
    "Overall_Similarity_Score": 8
  },
  {
    "Name": "Emily Brown",
    "Interests_Similarity_Score": 19,
    "Skills_Similarity_Score": 4,
    "Personality_Similarity_Score": 4,
    "Overall_Similarity_Score": 9
  }
]

"""
member_match_usr_prompt = """
Here is JSON for team_member_profile:{team_member_profile}

Here is JSON for rest_of_team_profiles :{rest_of_team_profiles}

Generate JSON Output as mentioned above. 
"""

img_profile_prompt = """                
    Create a 4-section image representing an individual based on the following summary: 
    {person_profile_summary}
    Each section should depict this person in a different context, such as their hobbies, interests, likes, dislikes, and positive personality traits. Ensure each section is visually distinct and captures the essence of their diverse characteristics in a respectful and creative manner. No text or religious symbols, imagery, or elements should be included in the image. Make sure to  take their Superhero match wwhile creating images. 
        """

## Initialize the variables

In [None]:
aoai_api_key = os.getenv("AZURE_OPENAI_KEY")
aoai_api_endpoint =  os.getenv("AZURE_OPENAI_ENDPOINT")
aoai_api_deployment_name =  os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
dalle_deployment = os.getenv("AZURE_DALLE_DEPLOYMENT_NAME")
embeding_model = os.getenv("AZURE_EMBEDDING_MODEL")
account_name = os.getenv("STORAGE_ACCOUNT_NAME")
account_key = os.getenv("STORAGE_ACCOUNT_KEY")
input_container_name = os.getenv("STORAGE_CONTAINER_NAME")
connection_string = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={account_key};EndpointSuffix=core.windows.net"
aoai_api_version= '2024-06-01'

input_folder_name = "input"
output_folder_name = "output"
profile_text_folder_name = "profile_text"
profile_img_folder_name = "profile_img"
profile_aggregated_folder_name = "profiles_aggregated"
profile_matches_folder_name = "profiles_matches"

chunk_tokens = 8000
profile_folder_name = "test2"

## Extract the profile info and generate JSON file

In [None]:
try:
    print("starting time:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    
    # define openai and document intelligence Clients
    client = AzureOpenAI(
        api_key=aoai_api_key, api_version=aoai_api_version, azure_endpoint=aoai_api_endpoint
    )

    # Create a BlobServiceClient object using the connection string
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)

    # Create a ContainerClient object
    input_container_client = blob_service_client.get_container_client(
        input_container_name
    )


    # List the blobs in the container
    blob_list = input_container_client.list_blobs(name_starts_with=input_folder_name+"/"+profile_folder_name)
    chunks = []
    team_file = False

    print("Started profile info extraction process")
    # extract profile info and store as json
    for blob in blob_list:
        print("\t" + blob.name)
        print("type:", blob.content_settings.content_type)
        blob_client = input_container_client.get_blob_client(blob.name)
        sas_token = generate_sas_token(
            blob_service_client=blob_service_client, source_blob=blob_client
        )
        source_blob_sas_url = blob_client.url + "?" + sas_token
        llm_response_file_name = ""

        if blob.content_settings.content_type == "image/jpeg":
            team_file = True
            aoai_response = getResponseFromAoAi_image(
                image_insights_sys_prompt,
                image_insights_usr_prompt,
                0.7,
                source_blob_sas_url,
            )
            cleaned_response = clean_json_string(aoai_response)
            json_data = json.loads(cleaned_response)
            profile_name = json_data.get("Team_Member_Name").strip('"')
            folder_path= os.path.dirname(blob.name).replace(input_folder_name, "")
            llm_response_file_name = output_folder_name+ "/"+profile_text_folder_name+folder_path+"/"+profile_name+".json"

            blob_client = input_container_client.get_blob_client(
                llm_response_file_name
            )
            blob_client.upload_blob(cleaned_response, overwrite=True)

    

    print("End time:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

except Exception as ex:
    print("Exception:")
    print(ex)


## Generate the following
* Profile Images
* Aggregated Profile JSON file
* Identify Matches for each profile

In [None]:
if team_file:
    if generate_profile_images(input_container_client, profile_folder_name):
        print("Successfully generated profile images")
    else:
        print("Failed to generate profile images")
  
    if( merge_json_files(input_container_client , profile_folder_name) ):
        print("Successfully merged profiles")
    else:
        print("Failed to merge profiles")  

    if( match_profiles(input_container_client , 0.0, profile_folder_name) ):
        print("Successfully matched profiles")
    else:
        print("Failed to match profiles")