<a href="https://colab.research.google.com/github/pandeyvaibhav/GPTWrapper/blob/main/Azure_Infra_V1_1_openai_api_automation_wrapper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Open AI Automation Wrapper

#### Install Open AI Binaries
#### Install Langchain on top of OpenAI
#### Install Backoff and Tenacity
#### Install PDF readers binaries.

In [1]:
!pip install --upgrade openai
#For Fine tuning. Work is pending.
#!pip install --upgrade openai wandb
#Add Langchain
!pip install langchain
!pip install azure-identity azure-keyvault-secrets
!pip install mermaid-python

#These are used only for reading from documents like PDF, etc.
!pip install pypdf
!pip install langchain[llms]
!pip install langchain[all]

from mermaid import Mermaid
import openai
import os
import json
import pprint

Collecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/77.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.1
Collecting langchain
  Downloading langchain-0.0.312-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.1-py3-none-any.whl (27 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langsmith<0.1.0,>=0.0.43 (from langchain)
  Downloading langsmith-0.0.43-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.0/40.0 kB[0m [31m5

## Selecting required binaries.

In [2]:
#Langchain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

#Building Memory
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from google.colab import drive

## Langchain and PDF reading.

In [3]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader

In [4]:
from IPython.display import Markdown


In [5]:
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff

#Common Variables

### Common Variables - Path for drives.

In [6]:
##Set this variable for your inputs.
drive_path1 = '/gdrive/MyDrive/Gen-AI/Input'
local_path = '/content'

#### Variable to hold model answer

In [7]:
#holds responses from the model
response_list = []
prompts = []

Secrets Setting - Replace the secret Variable

In [8]:
#This can be read from a secrets location like Azure Keyvault or it can be read from app secrets in case of an Azure Function type application.
openai.api_key  = ''
os.environ["OPENAI_API_KEY"] = openai.api_key

## Common Methods Setup

### Setup google drive

In [9]:
#Mounting Google drive. You can add your favourite drive/storage option.
def mount_google_drive():
  # Mount Google Drive
  drive.mount('/gdrive', force_remount=True)
  print("Drive Path:" + drive_path1)
#Attempt loading google drive
mount_google_drive()

Mounted at /gdrive
Drive Path:/gdrive/MyDrive/Gen-AI/Input


In [10]:
#print files in the drive for test!!
!ls {drive_path1}

BackUp	Code-C#  PDF_Requirements  Recipe


### Building Prompt template

In [11]:
# Pass on a role, context and action needed from the model.
# Roles sets the thinking hat model neeeds to put on.
# Context gives it information to be used for the current problem domain.
# Prompt - is the action you want model to take on the problem domain.
def make_prompt(role_for_model, context_for_model, prompt):
  prompt1 = f"""
  ```Think like a/an {role_for_model} and and be as accurate as possible. {context_for_model} and {prompt} ```
  """
  print(prompt1)
  return prompt1

### GPT 3.5 Model

In [12]:
# list models
models = openai.Model.list()

# print the first model's id
print(models.data[0].id)

# create a chat completion
chat_completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}])

# print the chat completion
print(chat_completion.choices[0].message.content)

text-search-babbage-doc-001
Hello! How can I assist you today?


### Adding tenacity with retry whenwe hit rate limiting from OpenAI
https://cookbook.openai.com/examples/how_to_handle_rate_limits

### Setting up GPT Model 3.5 for calls. This is the usual one available via ChatGPT interface. Use this when possible as it is cheaper and gives good solution.

In [13]:
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff

prompts = []

## Method with retry
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def get_response_from_model_3_5(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0.0, # this is the degree of randomness of the model's output
    )
    #print(response.choices[0].message["content"])
    return response.choices[0].message["content"]

### Setting up GPT Model 4 for calls. This is only available as paid option for anyone and is atleast 30 times more costly for same type of prompt.

In [14]:
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff


## Method with retry to handle rate limitting
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def get_response_from_model_4_0(prompt, model="gpt-4"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0.0, # this is the degree of randomness of the model's output
    )
    print(response.choices[0].message["content"])
    return response.choices[0].message["content"]

get_response_from_model_4_0("Hello")

Hello! How can I assist you today?


'Hello! How can I assist you today?'

### Read input recipe

In [42]:
import json
def read_input_recepie(path):
  print("read_input_recepie processing ")
  json_data = ""
  mount_google_drive()
  print(path)
  # Open and read the JSON file
  if os.path.isfile(path):
    with open(path, 'r') as json_file:
      #print(json_file)
      json_data = json.load(json_file)
      json_string = json.dumps(json_data)

  #You can access and work with the data as needed. For example, you can print the content:
  print("read_input_recepie processing finished:-\n" + json.dumps(json_data))
  #print(type (json.dumps(json_data)))
  print(type(json.dumps(json_data)))
  return json.dumps(json_data)

In [None]:
def is_valid_json(text):
    try:
        json.loads(text)
        return True
    except json.JSONDecodeError:
        return False

print(is_valid_json("sdfsdfsdfsdf"))

#Test code.
ans = read_input_recepie("/gdrive/MyDrive/Gen-AI/Input/Recipe/InfraArchitect_recipe.json")
print(is_valid_json(ans))

In [16]:
#General method to read a file
def read_file(path):
  mount_google_drive()

  if os.path.isfile(path):
    with open(path, 'r') as file:
     file_data = file.read()
    return file_data

In [None]:
#Test call.
pprint.pprint(read_file('/gdrive/MyDrive/Gen-AI/Input/Recipe/Person.json'), width=90)

### Iterate the files.

In [None]:
mount_google_drive()

#Get all the files in a folder.
def iterate_files_content():
  all_items = os.listdir(drive_path1)

  for file_name in all_items:
    file_path = os.path.join(drive_path1,file_name )
    #print(file_path)
    pprint.pprint(read_file(file_path), width=90)
    #print(file_name)

  #Filter only the files (exclude directories)
  #files = [item for item in all_items if os.path.isfile(os.path.join(drive_path1, item))]
  #Iterate through the files
  #for file_name in files:
    #print(file_name)

#Checking files present.
iterate_files_content()

## Parse the output JSON from Open AI

In [64]:
#Parse a JSON
def parse_json(json_string):
    print("parse_json processing")
    print(type(json_string))
    try:
        # Parse the JSON string into a Python object (usually a dictionary or list)
        parsed_data = json.loads(json_string)
        return parsed_data
    except json.JSONDecodeError as e:
        # Handle JSON decoding errors, such as invalid JSON syntax
        print(f"JSON parsing error: {e}")
        return None

# Example usage: Test Case
json_string = '{"name": "John", "age": 30, "city": "New York"}'
parsed_data = parse_json(json_string)
if parsed_data:
    # Access and work with the parsed data
    #print(parsed_data[0].name)
    print(parsed_data.get("name"))

parse_json processing
<class 'str'>
John


In [66]:
ans = read_input_recepie("/gdrive/MyDrive/Gen-AI/Input/Recipe/InfraArchitect_recipe.json")
parsed_ans = parse_json(ans)
#print(is_valid_json(parsed_ans))

read_input_recepie processing 
Mounted at /gdrive
Drive Path:/gdrive/MyDrive/Gen-AI/Input
/gdrive/MyDrive/Gen-AI/Input/Recipe/InfraArchitect_recipe.json
read_input_recepie processing finished:-
{"Role": "Cloud Infrastructure Architect", "Context": "I have two systems, one of them is SAP, and another one is Commercetools. SAP will emit files, and Commercetools will accept messages using C# SDK. We are looking at integrating them. SAP files will have multiple records. Consider usage of Azure-based cloud platform with PAAS based offering mainly utilising API Management, Service Bus messaging, Azure Function Apps", "Prompts": ["Generate a Terraform Code to create Azure Service Bus, Azure API Management, Azure Storage, Azure Function App and hosted in a VNet with appropriate Subnets"]}
<class 'str'>
parse_json processing
<class 'str'>


## Reading Recipe/es one by one depending on the function.

In [20]:
#path = full_path = os.path.join(drive_path1, "Input", "Cloud_Arch_Recipe.json")
#data = parse_json(read_input_recepie(path))
#role_for_model = data["Role"]
#context_for_model = data["Context"]
#prompts = data["Prompts"]
#print(role_for_model)
#print(context)
#print(type(prompts))

## Debug method to calculate if prompt may be hitting token(context) ceilings for the model.

In [21]:
def count_words(input_string):
    #print(input_string)
    #print(type(input_string))
    words = input_string.split()
    word_count = len(words)
    #if word_count > 850:
      #print(input_string)
      #print("Inaccurate or incomplete response is expected as we may be breaching token limit")
    return len(words)

## Emit Model Optput for local debugging.

In [22]:
import markdown
from IPython.display import HTML

#Emit the model responses.
def emit_output(response_list):
  for item in response_list:
    print(item)
    #pprint.pprint(item, width=180)
    #print(markdown.markdown(item))
    #HTML(markdown.markdown(item))
    #html_text = markdown.markdown(item)
    # Display the HTML
    #HTML(html_text)

emit_output(response_list)

Setting the role for the model

In [92]:
def build_recipe_path( recipe_filename, basepath):
  print("recipe_filename:-" + recipe_filename)
  print("Basepath:-" + basepath)

  print("Joining path")
  path = os.path.join(basepath, recipe_filename)
  print("Path:- "+path)
  data = parse_json(read_input_recepie(path))
  print(load_json_and_access_role(path))

  #print("Checking if its valid json response?")
  #print(is_valid_json(data))

  #print(data)
  #%debug

  role_for_model = data["Role"]
  context_for_model = data["Context"]
  prompts = data["Prompts"]
  #Debug prompts
  #print(role_for_model)
  #print(context_for_model)
  #print("printing prompts:")
  #print(prompts)
  return  role_for_model, context_for_model, prompts

#test the logic:
build_recipe_path("Recipe/InfraArchitect_recipe.json", drive_path1 )

#/gdrive/MyDrive/Gen-AI/Input/Recipe/Infra_Architect_recipe.json

recipe_filename:-Recipe/InfraArchitect_recipe.json
Basepath:-/gdrive/MyDrive/Gen-AI/Input
Joining path
Path:- /gdrive/MyDrive/Gen-AI/Input/Recipe/InfraArchitect_recipe.json
read_input_recepie processing 
Mounted at /gdrive
Drive Path:/gdrive/MyDrive/Gen-AI/Input
/gdrive/MyDrive/Gen-AI/Input/Recipe/InfraArchitect_recipe.json
read_input_recepie processing finished:-
{"Role": "Cloud Infrastructure Architect", "Context": "I have two systems, one of them is SAP, and another one is Commercetools. SAP will emit files, and Commercetools will accept messages using C# SDK. We are looking at integrating them. SAP files will have multiple records. Consider usage of Azure-based cloud platform with PAAS based offering mainly utilising API Management, Service Bus messaging, Azure Function Apps", "Prompts": ["Generate a Terraform Code to create Azure Service Bus, Azure API Management, Azure Storage, Azure Function App and hosted in a VNet with appropriate Subnets"]}
<class 'str'>
parse_json processing

('Cloud Infrastructure Architect',
 'I have two systems, one of them is SAP, and another one is Commercetools. SAP will emit files, and Commercetools will accept messages using C# SDK. We are looking at integrating them. SAP files will have multiple records. Consider usage of Azure-based cloud platform with PAAS based offering mainly utilising API Management, Service Bus messaging, Azure Function Apps',
 ['Generate a Terraform Code to create Azure Service Bus, Azure API Management, Azure Storage, Azure Function App and hosted in a VNet with appropriate Subnets'])

In [93]:
#Read files in a folder
#def read_files(path):

## Setting up Orchestration Methods on top of OpenAIs GPT 3.5 turbo

### Orchestration Method

In [94]:
#Get the recipe file and invoke the model for prompt processing.
def build_and_act_3_5(recipe_filename, basepath=drive_path1, directory="Input", ):
  print(recipe_filename)
  print(basepath)
  print(directory)
  role_for_model, context_for_model, prompts = build_recipe_path(recipe_filename,basepath)
  act_on_prompts_3_5(role_for_model, context_for_model, prompts)

### Action method - invokes the model

In [95]:
def act_on_prompts_3_5(role_for_model, context_for_model, prompts):
    print(prompts)
    for item in prompts:
      print(item)
      #build individual prompts
      #count_words(make_prompt(role_for_model, context_for_model, item))
      response_list.append(get_response_from_model_3_5(make_prompt(role_for_model, context_for_model, item)))

## Test the recipe.

In [97]:
build_and_act_3_5("Recipe/InfraArchitect_recipe.json")

Recipe/InfraArchitect_recipe.json
/gdrive/MyDrive/Gen-AI/Input
Input
recipe_filename:-Recipe/InfraArchitect_recipe.json
Basepath:-/gdrive/MyDrive/Gen-AI/Input
Joining path
Path:- /gdrive/MyDrive/Gen-AI/Input/Recipe/InfraArchitect_recipe.json
read_input_recepie processing 
Mounted at /gdrive
Drive Path:/gdrive/MyDrive/Gen-AI/Input
/gdrive/MyDrive/Gen-AI/Input/Recipe/InfraArchitect_recipe.json
read_input_recepie processing finished:-
{"Role": "Cloud Infrastructure Architect", "Context": "I have two systems, one of them is SAP, and another one is Commercetools. SAP will emit files, and Commercetools will accept messages using C# SDK. We are looking at integrating them. SAP files will have multiple records. Consider usage of Azure-based cloud platform with PAAS based offering mainly utilising API Management, Service Bus messaging, Azure Function Apps", "Prompts": ["Generate a Terraform Code to create Azure Service Bus, Azure API Management, Azure Storage, Azure Function App and hosted in a

In [98]:
emit_output(response_list)

As a Cloud Infrastructure Architect, I would design the integration between SAP and Commercetools using the Azure-based cloud platform with PAAS offerings. Here is a high-level overview of the architecture:

1. SAP Integration:
   - SAP will emit files containing multiple records.
   - These files can be stored in Azure Storage for further processing.
   - Azure Function Apps can be used to monitor the storage container and trigger processing whenever new files are added.
   - The Function App can read the SAP files, extract the necessary data, and transform it into a format suitable for integration with Commercetools.
   - The transformed data can be sent to the Service Bus messaging system for further processing.

2. Commercetools Integration:
   - The C# SDK for Commercetools can be used to accept messages from the Service Bus.
   - Azure Function Apps can be used to process the messages received from the Service Bus and perform any necessary actions in Commercetools.
   - The Funct

## Setting-up Orchestration methods to call GPT Model 4



#### Orchestration Method.

In [99]:
#build the the prompts and invoke them.
def build_and_act_4_0(recipe_filename, basepath=drive_path1, directory="Input" ):
  print(recipe_filename)
  print(basepath)
  print(directory)
  role_for_model, context_for_model, prompts = build_recipe_path(recipe_filename,basepath)
  act_on_prompts_4_0(role_for_model, context_for_model, prompts)

### Invoke the prompts and collect response.

In [100]:
#Invoke the model
def act_on_prompts_4_0(role_for_model, context_for_model, prompts):
    #print(prompts)
    for item in prompts:
      print(item)
      #build individual prompts
      #count_words(make_prompt(role_for_model, context_for_model, item))
      response_list.append(get_response_from_model_4_0(make_prompt(role_for_model, context_for_model, item)))

### Testing Model 4 with same recipes

In [105]:
#build_and_act_4_0("Recipe/Business_Analyst_recipe.json")
#build_and_act_4_0("GCSE_Year11.json")
#build_and_act_4_0("GCSE_Year11_Recipe.json")
#build_recipe_path(drive_path1, "Input", "Infra_Architect_recipe.json")
build_and_act_4_0("Recipe/InfraArchitect_recipe.json")


Recipe/InfraArchitect_recipe.json
/gdrive/MyDrive/Gen-AI/Input
Input
recipe_filename:-Recipe/InfraArchitect_recipe.json
Basepath:-/gdrive/MyDrive/Gen-AI/Input
Joining path
Path:- /gdrive/MyDrive/Gen-AI/Input/Recipe/InfraArchitect_recipe.json
read_input_recepie processing 
Mounted at /gdrive
Drive Path:/gdrive/MyDrive/Gen-AI/Input
/gdrive/MyDrive/Gen-AI/Input/Recipe/InfraArchitect_recipe.json
read_input_recepie processing finished:-
{"Role": "Cloud Infrastructure Architect", "Context": "In the context of a software integration project, two distinct systems play crucial roles: SAP and Commercetools SAAS. SAP, as the source system, is responsible for generating files, each of which may contain multiple records. Commercetools, on the other hand, is purpose-built to accept messages via its C# SDK.The project's primary objective is to leverage an Azure-based cloud platform, with a strong emphasis on Platform-as-a-Service (PaaS) solutions. This comprehensive approach encompasses essential com

In [104]:
emit_output(response_list)

As a Cloud Infrastructure Architect, I would design the integration between SAP and Commercetools using the Azure-based cloud platform with PAAS offerings. Here is a high-level overview of the architecture:

1. SAP Integration:
   - SAP will emit files containing multiple records.
   - These files can be stored in Azure Storage for further processing.
   - Azure Function Apps can be used to monitor the storage container and trigger processing whenever new files are added.
   - The Function App can read the SAP files, extract the necessary data, and transform it into a format suitable for integration with Commercetools.
   - The transformed data can be sent to the Service Bus messaging system for further processing.

2. Commercetools Integration:
   - The C# SDK for Commercetools can be used to accept messages from the Service Bus.
   - Azure Function Apps can be used to process the messages received from the Service Bus and perform any necessary actions in Commercetools.
   - The Funct

In [None]:
loader = PyPDFLoader("PITLDNCC.pdf")
pages = loader.load_and_split()

In [None]:
data = pages
pages = pages.count
print(pages)
print(data)


In [None]:
documents = loader.load()

In [None]:
print(documents.count)

#for index, item in enumerate(documents):
    #print(f"Index {index}: {item}")
   # print("index - " + index)
    #print(count_words(documents[index].page_content))
#act_on_prompts_4_0("C# Application Developer",documents[0].page_content,"Generate C# code for every statement which matches requirement in this context. Keep seperate for method and follow best practices.")
get_response_from_model_4_0(make_prompt("C# Application Developer", documents[0].page_content, "Generate C# code for every requirement in this context. Use latest dotnet template and utilise BackgroundService. Keep seperate method for every requirement, add code comments and follow best practices."))

In [None]:
type(data)

In [None]:
for item in data:
    print(item)
    regex = "(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s"
    print(type(item))


    #response = re.split(regex, item)

In [None]:
for index, item in enumerate(data):
    print(f"Index {index}: {item}")

In [None]:

import regex as re
regex = "(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s"
re.split(regex, data)

In [None]:
data = {
    "name": "John",
    "age": 30,
    "city": "New York",
    "pets": [
        {"type": "dog", "name": "Fido"},
        {"type": "cat", "name": "Fluffy"}
    ]
}

pprint.pprint(data, width=20)

In [None]:
from IPython.display import Markdown

# Markdown text
markdown_text = """
# Heading 1
## Heading 2

**Bold Text** or __Bold Text__
*Italic Text* or _Italic Text_

- Bullet Point 1
- Bullet Point 2

[Link to Google](https://www.google.com/)

![Image Alt Text](image_url.jpg)
"""

# Render the Markdown text
Markdown(markdown_text)

In [23]:
import markdown
from IPython.display import HTML

# Your Markdown text
markdown_text = """
# Heading 1

This is a **bold** and *italic* text.

- Item 1
- Item 2
- Item 3

A [link](https://www.example.com).
"""

# Convert Markdown to HTML
html_text = markdown.markdown(markdown_text)

# Display the HTML
HTML(html_text)


In [24]:
!pip install markdown
import markdown

# Your input text
input_text = """
# Heading 1
This is a **bold** and *italic* text.1
- Item 1
- Item 2
- Item 3
A [link](https://www.example.com).
"""

# Convert to Markdown
markdown_text = markdown.markdown(input_text)
#html_text = markdown.markdown(markdown_text)

# Display the HTML
HTML(markdown_text)
# Print the Markdown
#print(markdown_text)

