In [4]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.pydantic_v1 import BaseModel
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough


skeleton_generator_template = """[User:] You’re an organizer responsible for only \
giving the skeleton (not the full content) for answering the question.
Provide the skeleton in a list of points (numbered 1., 2., 3., etc.) to answer \
the question. \
Instead of writing a full sentence, each skeleton point should be very short \
with only 3∼5 words. \
Generally, the skeleton should have 3∼10 points. Now, please provide the skeleton \
for the following question.
{question}
Skeleton:
[Assistant:] 1."""

point_expander_template = """[User:] You’re responsible for continuing \
the writing of one and only one point in the overall answer to the following question.
{question}
The skeleton of the answer is
{skeleton}
Continue and only continue the writing of point {point_index}. \
Write it **very shortly** in 1∼2 sentence and do not continue with other points!
[Assistant:] {point_index}. {point_skeleton}"""

def parse_numbered_list(input_str):
    """Parses a numbered list into a list of dictionaries

    Each element having two keys:
    'index' for the index in the numbered list, and 'point' for the content.
    """
    lines = input_str.split("\n")
    parsed_list = []
    for line in lines:
        parts = line.split(". ", 1)
        if len(parts) == 2:
            index = int(parts[0])
            point = parts[1].strip()
            parsed_list.append({"point_index": index, "point_skeleton": point})
    return parsed_list

def create_list_elements(_input):
    skeleton = _input["skeleton"]
    numbered_list = parse_numbered_list(skeleton)
    for el in numbered_list:
        el["skeleton"] = skeleton
        el["question"] = _input["question"]
    return numbered_list

def get_final_answer(expanded_list):
    final_answer_str = "Here's a comprehensive answer:\n\n"
    for i, el in enumerate(expanded_list):
        final_answer_str += f"{i+1}. {el}\n\n"
    return final_answer_str

def get_skeleton_prompt_chain(llm):
    skeleton_generator_prompt = ChatPromptTemplate.from_template(
        skeleton_generator_template
    )

    skeleton_generator_chain = (
        skeleton_generator_prompt | llm | StrOutputParser() #| (lambda x: "1. " + x)
    )
    point_expander_prompt = ChatPromptTemplate.from_template(point_expander_template)

    point_expander_chain = RunnablePassthrough.assign(
        continuation=point_expander_prompt | llm | StrOutputParser()
    ) | (lambda x: x["point_skeleton"].strip() + " " + x["continuation"])
    
    chain = (
        RunnablePassthrough.assign(skeleton=skeleton_generator_chain)
        | create_list_elements
        | point_expander_chain.map()
        | get_final_answer
    )
    return chain

In [7]:
from langchain_openai import AzureChatOpenAI
llm = AzureChatOpenAI(api_key = os.environ["API_KEY"],  
                      api_version="2024-02-15-preview",
                      azure_endpoint = os.environ["AZURE_ENDPOINT"],
                      azure_deployment= os.environ["MODELGPT4-8k"],
                      streaming=True)


In [8]:
chain = get_skeleton_prompt_chain(llm)
print(chain.invoke({"question": "What are the main reasons for the French Revolution?"}))

Here's a comprehensive answer:

1. Social Inequality : The rigid social structure of the Old Regime, where privileges were granted by birthright and the nobility and clergy were exempt from taxes, created deep-seated resentment among the commoners and the burgeoning middle class. This unfair distribution of wealth and privilege served as a major catalyst for the Revolution.

2. Enlightenment Ideas : The Age of Enlightenment introduced radical philosophical ideas about democracy and equality, which questioned the traditional hierarchical structures in France and provoked a desire for change.

3. Weak Leadership : King Louis XVI's inability to effectively address the economic crisis and his indecisiveness in implementing reforms contributed significantly to the unrest leading up to the French Revolution.

4. Popular Dissatisfaction : The French public was highly discontented due to high taxes, food scarcity, and the prevalent social inequalities, which greatly contributed to the growing 

In [10]:
from langchain_community.callbacks import get_openai_callback
with get_openai_callback() as cb:
    result = chain.invoke({"question": "What are the main reasons for the French Revolution?"})
    print(cb)

Tokens Used: 0
	Prompt Tokens: 0
	Completion Tokens: 0
Successful Requests: 0
Total Cost (USD): $0.0


In [11]:
for i in range(10):
    result = chain.invoke({"question": "What are the main reasons for the French Revolution?"})

In [14]:
data1 = [{key: old_dict[key] for key in ['session_id', 'start_time', 'end_time', 'total_tokens', 'prompt_tokens', 'completion_tokens', 'first_token_time']} for old_dict in data['runs']]

TypeError: list indices must be integers or slices, not str

In [54]:
import pandas as pd
data1 = [{key: old_dict[key] for key in ['session_id', 'start_time', 'end_time', 'total_tokens', 'prompt_tokens', 'completion_tokens', 'first_token_time']} for old_dict in data['runs']]
df = pd.DataFrame(data1)
df['start_time'] = pd.to_datetime(df['start_time'])
df['end_time'] = pd.to_datetime(df['end_time'])
df['first_token_time'] = pd.to_datetime(df['first_token_time'])
df['e2e_latency'] = (df['end_time'] - df['start_time']).dt.total_seconds()
df['t2f_token'] = (df['first_token_time'] - df['start_time']).dt.total_seconds()
df

Unnamed: 0,session_id,start_time,end_time,total_tokens,prompt_tokens,completion_tokens,first_token_time,e2e_latency,t2f_token
0,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 08:02:33.414940,NaT,17,17,0,NaT,,
1,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 08:00:51.677681,2024-04-08 08:02:33.337126,633,17,616,2024-04-08 08:00:55.848661,101.659445,4.17098
2,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:58:43.128278,2024-04-08 08:00:51.601314,734,17,717,2024-04-08 07:59:05.025311,128.473036,21.897033
3,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:57:36.931981,2024-04-08 07:58:43.041934,658,17,641,2024-04-08 07:57:37.502964,66.109953,0.570983
4,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:55:57.772212,2024-04-08 07:57:36.883197,575,17,558,2024-04-08 07:55:58.380557,99.110985,0.608345
5,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:53:24.100855,2024-04-08 07:55:57.731014,668,17,651,2024-04-08 07:53:24.572488,153.630159,0.471633
6,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:50:47.950236,2024-04-08 07:53:24.059941,631,17,614,2024-04-08 07:50:49.024038,156.109705,1.073802
7,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:49:48.907697,2024-04-08 07:50:47.899681,581,17,564,2024-04-08 07:49:49.426441,58.991984,0.518744
8,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:46:48.760291,2024-04-08 07:49:48.864893,719,17,702,2024-04-08 07:46:49.646131,180.104602,0.88584
9,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:45:38.135434,2024-04-08 07:46:48.445806,666,17,649,2024-04-08 07:45:39.367916,70.310372,1.232482


In [42]:
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage

llm = AzureChatOpenAI(api_key = os.environ["AZURE_OPENAI_API_KEY"],  
                      api_version="2024-02-15-preview",
                      azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"],
                      azure_deployment= "gpt-4-1106-preview",
                      streaming=True)
for i in range(30):
    result = llm([HumanMessage(content="What are the main reasons for the French Revolution?")])

In [44]:
import pandas as pd
data1 = [{key: old_dict[key] for key in ['session_id', 'start_time', 'end_time', 'total_tokens', 'prompt_tokens', 'completion_tokens', 'first_token_time']} for old_dict in data['runs']]
df1 = pd.DataFrame(data1)
df1['start_time'] = pd.to_datetime(df1['start_time'])
df1['end_time'] = pd.to_datetime(df1['end_time'])
df1['first_token_time'] = pd.to_datetime(df1['first_token_time'])
df1['e2e_latency'] = (df1['end_time'] - df1['start_time']).dt.total_seconds()
df1['t2f_token'] = (df1['first_token_time'] - df1['start_time']).dt.total_seconds()

In [45]:
df1

Unnamed: 0,session_id,start_time,end_time,total_tokens,prompt_tokens,completion_tokens,first_token_time,e2e_latency,t2f_token
0,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 08:02:33.414940,NaT,17,17,0,NaT,,
1,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 08:00:51.677681,2024-04-08 08:02:33.337126,633,17,616,2024-04-08 08:00:55.848661,101.659445,4.17098
2,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:58:43.128278,2024-04-08 08:00:51.601314,734,17,717,2024-04-08 07:59:05.025311,128.473036,21.897033
3,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:57:36.931981,2024-04-08 07:58:43.041934,658,17,641,2024-04-08 07:57:37.502964,66.109953,0.570983
4,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:55:57.772212,2024-04-08 07:57:36.883197,575,17,558,2024-04-08 07:55:58.380557,99.110985,0.608345
5,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:53:24.100855,2024-04-08 07:55:57.731014,668,17,651,2024-04-08 07:53:24.572488,153.630159,0.471633
6,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:50:47.950236,2024-04-08 07:53:24.059941,631,17,614,2024-04-08 07:50:49.024038,156.109705,1.073802
7,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:49:48.907697,2024-04-08 07:50:47.899681,581,17,564,2024-04-08 07:49:49.426441,58.991984,0.518744
8,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:46:48.760291,2024-04-08 07:49:48.864893,719,17,702,2024-04-08 07:46:49.646131,180.104602,0.88584
9,fa514022-4e41-44c4-bfe0-b2b879b6b3c1,2024-04-08 07:45:38.135434,2024-04-08 07:46:48.445806,666,17,649,2024-04-08 07:45:39.367916,70.310372,1.232482


### Without Skeleton Of Thoughts

In [52]:
df1[['completion_tokens', 'e2e_latency', 't2f_token']].describe(percentiles = [.5, 0.9, .95, .99])

Unnamed: 0,completion_tokens,e2e_latency,t2f_token
count,14.0,13.0,13.0
mean,568.142857,103.18954,2.665881
std,172.686402,40.87592,5.861721
min,0.0,58.991984,0.471633
50%,598.5,99.110985,0.636713
90%,686.7,155.613796,3.629101
95%,707.25,165.707664,11.261401
99%,715.05,177.225214,19.769907
max,717.0,180.104602,21.897033


### With Skeleton Of Thoughts

In [53]:
df[['completion_tokens', 'e2e_latency', 't2f_token']].describe(percentiles = [.5, 0.9, .95, .99])

Unnamed: 0,completion_tokens,e2e_latency,t2f_token
count,14.0,14.0,14.0
mean,391.071429,30.612563,3.047999
std,37.814251,13.577387,3.917537
min,275.0,18.898039,0.733935
50%,397.5,27.113691,1.172814
90%,413.8,40.635054,5.941634
95%,424.1,52.783141,9.350134
99%,437.62,68.144532,14.059763
max,441.0,71.98488,15.23717
