# Practice using an LLM for Data Creation

In [3]:
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from datasets import load_dataset

from dotenv import load_dotenv

import json
import pandas as pd
import os

from azure.identity import (
    DefaultAzureCredential,
)

from azure.identity import AzureAuthorityHosts
from azure.keyvault.secrets import SecretClient

### Create your Azure OpenAI Resource and Key Vault

_If you have your resource from the last exercise, you don't need to complete the following steps._

Navigate to the [Azure Portal](https://portal.azure.com/#home) or [US Gov Azure Portal](https://portal.azure.us/#home) and login using your account. Next you're going to create an Azure OpenAI resource, create a new resource group and use any unique name for the resource's name.  
Once the resource is created, you need to open [Azure AI Foundry](https://ai.azure.com/) or [Azure OpenAI Studio](https://ai.azure.us/) to deploy the model. Navigate to deployments, press deploy model and select gpt-4o-mini.  Make sure to increase your rate limit, or tokens per minute (around 700k should be sufficient)

Once that is created, copy the key (Found under Resource Management > Keys and Endpoints) and create a new key vault. Assign the same resource group as your Azure OpenAI resource and again pick a unique name for the key vault name.  

Once the key vault is created, make a new secret with the API key.  

### Connect to Azure OpenAI
First, run `az login` in the terminal and login to your FedAIRS account.  
<br>
If you are using a Gov account:<br>
az cloud set --name AzureUSGovernment <br>
az login <br>
az account set --subscription="your subscription"<br>

If you are using a commercial account:<br>
az login <br>
az account set --subscription="your subscription"<br>
<br>
Two things are needed to connect to your Azure OpenAI resource
- Your API key
- Your Endpoint  
  
For the API Key, we are going to connect to the key vault we just made to insert the key. For this, you'll need to change the URL below to match your key vault's URL.  
Next, we will insert the endpoint URL from our Azure OpenAI resource.

Our `azure_client` is where we are calling the LLM and connecting to the model we deployed. Other parameters can be passed in, like timeout or max_retires.

Note: In the block below we inserted `credential = DefaultAzureCredential(authority=AzureAuthorityHosts.AZURE_GOVERNMENT)`. This is because when we use our FedAIRS account we are connected to the US Government cloud, rather than the regular commercial cloud.

In [None]:
load_dotenv()

credential = DefaultAzureCredential(authority=AzureAuthorityHosts.AZURE_GOVERNMENT)

secret_client = SecretClient(vault_url=os.getenv('KEY_VAULT_URL'), credential=credential)
deployment = os.getenv('DEPLOYMENT')
endpoint_url = os.getenv('AZURE_OPENAI_ENDPOINT')
api_version = os.getenv('API_VERSION')
api_key = secret_client.get_secret(os.getenv('SECRET_NAME')).value


azure_client = AzureChatOpenAI(
                api_key=api_key
                ,api_version=api_version
                ,azure_endpoint=endpoint_url
                ,deployment_name=deployment
                ,temperature=0
                ,max_tokens=4000
                ,model_kwargs={"response_format": {"type": "json_object"}}
)

### The Data

The dataset we are looking at is about 300 rows of sample data about different employees. Each employee has maany attributes but for this exercise we will only use:  
* FirstName
* LastName
* Title
* BirthDate
* EmailAddress
* DepartmentName

In [None]:
employees = pd.read_csv('./data/DimEmployee.csv')
employees.head()

Like the previous exercises, we are going to take a sample of 100 employees and convert that into an array.

In [6]:
employees_sample = employees.sample(100).reset_index()
employees_sample_array = employees_sample.to_numpy()

### Creating a Narrative

We are going to prompt the LLM with the 6 attributes mentioned before (FirstName LastName, Title, BirthDate, EmailAddress, DepartmentName) and ask it to create a story based on these facts. We will instruct the LLM to keep the spelling and details of all the infomation passed intact, but still create a narrative in the style requested.

In [7]:
shakespeare_prompt = PromptTemplate.from_template(

template = '''
### Instructions:
Your task is to create a four-sentence story in a Shakespearean style for the following attributes of each employee: FirstName, LastName, Title, BirthDate, EmailAddress, DepartmentName. You must include all of the details given and all details of the employee should remain intact, there should be no spelling or details about the employee changed.
Each story should capture the essence of the employee's life and their role within the company, based solely on the information provided. Do not introduce any additional details beyond what is given in the input.

### Reasoning Required:
After crafting your Shakespearean tale, provide a brief explanation (about 50 words) of why you chose to depict the employee in this particular way. Analyze how Shakespearean techniques enhance the storytelling and relate to the employee's modern workplace experiences.

### Employee Details:
FirstName: {FirstName}
LastName: {LastName}
Title: {Title}
BirthDate: {BirthDate}
EmailAddress: {EmailAddress}
DepartmentName: {DepartmentName}

### Return JSON:
{{
    "ShakespeareStory": "<The four sentence story about this employee in Shakespearean style>"
    "ShakespeareReasoning": "<Explain why you chose to craft this employee's story in this way>"
}}
'''
)

In [8]:
shakespeare_chain = (
    shakespeare_prompt
    | azure_client
    | StrOutputParser()
)

In [9]:
marvel_prompt = PromptTemplate.from_template(

template = '''
### Instructions:
Your task is to create a four-sentence story in the style of the Marvel Comic Books for the following attributes of each employee: FirstName, LastName, Title, BirthDate, EmailAddress, DepartmentName. You must include all of the details given and all details of the employee should remain intact, there should be no spelling or details about the employee changed.
Each story should capture the essence of the employee's life and their role within the company, based solely on the information provided. Do not introduce any additional details beyond what is given in the input.

### Reasoning Required:
After crafting your Shakespearean tale, provide a brief explanation (about 50 words) of why you chose to depict the employee in this particular way. Analyze how Shakespearean techniques enhance the storytelling and relate to the employee's modern workplace experiences.


### Employee Details:
FirstName: {FirstName}
LastName: {LastName}
Title: {Title}
BirthDate: {BirthDate}
EmailAddress: {EmailAddress}
DepartmentName: {DepartmentName}


### Return JSON:
{{
    "MarvelStory": "<The four sentence story about this employee in the Marvel Comics style>"
    "MarvelReasoning": "<Explain why you chose to craft this employee's story in this way>"
}}
'''
)

In [10]:
marvel_chain = (
    marvel_prompt
    | azure_client
    | StrOutputParser()
)

In [11]:
fairytale_prompt = PromptTemplate.from_template(

template = '''
### Instructions:
Your task is to create a four-sentence story in the style of a traditional fairy-tale for the following attributes of each employee: FirstName, LastName, Title, BirthDate, EmailAddress, DepartmentName. You must include all of the details given and all details of the employee should remain intact, there should be no spelling or details about the employee changed.
Each story should capture the essence of the employee's life and their role within the company, based solely on the information provided. Do not introduce any additional details beyond what is given in the input.

### Reasoning Required:
After crafting your Shakespearean tale, provide a brief explanation (about 50 words) of why you chose to depict the employee in this particular way. Analyze how Shakespearean techniques enhance the storytelling and relate to the employee's modern workplace experiences.


### Employee Details:
FirstName: {FirstName}
LastName: {LastName}
Title: {Title}
BirthDate: {BirthDate}
EmailAddress: {EmailAddress}
DepartmentName: {DepartmentName}


### Return JSON:
{{
    "FairyTaleStory": "<The four sentence story about this employee in the fairytale  style>"
    "FairyTaleReasoning": "<Explain why you chose to craft this employee's story in this way>"
}}
'''
)

In [12]:
fairytale_chain = (
    fairytale_prompt
    | azure_client
    | StrOutputParser()
)

In [13]:
async def runChain(employee_array, chain):
    texts = [{"FirstName": x[3], "LastName": x[4], "Title": x[5], "BirthDate": x[7], "EmailAddress": x[8], "DepartmentName": x[16]} for x in employee_array]
    return await chain.abatch(texts)

In [14]:
async def assess_summaries(df, array, chain):
    results = await runChain(array, chain)
    df["results"] = results

    results_df = pd.json_normalize(df.results.apply(json.loads))
    output = pd.concat([df, results_df], axis=1)
    output.drop(columns='results', inplace=True)
    return output

Now, we are running the prompts through the LLM and adding all of the stories to our dataframe.

In [None]:
input_df = employees_sample
input_array = employees_sample_array
chains = [shakespeare_chain, marvel_chain, fairytale_chain]
output_df = None

for chain in chains:
    print(chain)
    output_df = await assess_summaries(input_df, input_array, chain)
    input_df = output_df

In [None]:
output_df.head()

Next, we will save the dataframe into a CSV to be used in the extraction notebook

In [47]:
output_df.to_csv("DimEmployeeStories.csv")