In [40]:
from langchain.llms import OpenAI
from langchain import PromptTemplate, FewShotPromptTemplate
import os

print(os.environ['HOME'])
print(os.environ['OPENAI_API_KEY'])

llm = OpenAI(model_name="text-davinci-003", n=2, best_of=2)
print(llm("给我讲个笑话？"))

# try a few-shot In-Context Learning setting
examples = [{'word':'好', 'antonym':'坏'}, 
    {'word':'快', 'antonym':'慢'}]

format_template = """
原词: {word}
反义词: {antonym}\n
"""

example_prompt = PromptTemplate(
    input_variables = ['word', 'antonym'],
    template = format_template
)

few_shot_prompt = FewShotPromptTemplate(
    examples = examples, 
    example_prompt = example_prompt,
    prefix = "给出输入的反义词", 
    suffix = "原词: {input}\n反义词:",
    input_variables = ["input"], 
    # the string we join the prefix, examples and suffix
    example_separator = "\n"
)

input = '开心'

print(few_shot_prompt.format(input=input))
print(llm(few_shot_prompt.format(input=input)))

/Users/wujianmin
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


两个熊在谈论他们最喜欢的电影。第一只熊说：“我最喜欢《熊出没》！”第二只熊说：“我最喜欢《熊不出没》！”
给出输入的反义词

原词: 好
反义词: 坏



原词: 快
反义词: 慢


原词: 开心
反义词:
 不开心


In [37]:
from typing import List, Dict

def prompt_one_author(authid : str, examples : List[Dict[str, str]], few_shot_ratio = 0.8, min_reply = 4) :

    example_num = len(examples)
    if example_num < min_reply :
        print("Minimum reply required {0}, received {1} for author {2}".format(min_reply, example_num, authid))
        return
    
    # llm to use
    llm = OpenAI(model_name="text-davinci-003", n=2, best_of=2)
    # Few-shot learning template
    format_template = """
    评论：{post}
    回复：{reply}\n
    """
    
    few_shot_num = int(example_num * few_shot_ratio)
    print(examples[:few_shot_num])

    example_prompt = PromptTemplate(
        input_variables = ['post', 'reply'],
        template = format_template
    )
    
    few_shot_prompt = FewShotPromptTemplate(
        examples = examples[:few_shot_num], 
        example_prompt = example_prompt,
        prefix = "作为一个创作者，请给用户的评论做出回复", 
        suffix = "评论：{input}\n回复：",
        input_variables = ["input"], 
        example_separator = "\n"
    )

    print(few_shot_prompt.format(input="A"))

    for example in examples[few_shot_num:] :
        print("LLM reply: {0}, Author reply: {1}".format(
            llm(few_shot_prompt.format(input=example['post'])), example['reply']))

In [38]:
import sys
import json

def extract_author_reply(input_stream) :
    # read video comments line by line

    authid_prev = None
    auth_reply_buffer = []

    for line in input_stream:
        video_comments = json.loads(line)
        title = video_comments['title']
        authid = video_comments['mthid']
        for comment in video_comments['comment'] :
            content = comment['comment_content']
            if 'reply_list' in comment :
                for reply in comment['reply_list'] :
                    if reply['is_author'] == '1' :
                        # print("{0}\t#{1}\t#{2}\t#{3}".format(authid, title, content, reply['content']))
                        auth_reply_buffer.append({"post":content, "reply":reply["content"]})
        if authid_prev != None and authid != authid_prev : 
            print("===Reply generation for author {0}===\n".format(authid_prev))
            prompt_one_author(authid_prev, auth_reply_buffer)
            auth_reply_buffer = []

        authid_prev = authid


In [39]:
file_path = '/Users/wujianmin/bak-from-mac/PycharmProjects/work-notes/code/comment_info_20w.json'
with open(file_path) as fin :
    extract_author_reply(fin)

===Reply generation for author 1710771319626650===

Minimum reply required 4, received 0 for author 1710771319626650
===Reply generation for author 1560184445643243===

Minimum reply required 4, received 0 for author 1560184445643243
===Reply generation for author 1753607923150347===

Minimum reply required 4, received 0 for author 1753607923150347
===Reply generation for author 1655533672430779===

Minimum reply required 4, received 1 for author 1655533672430779
===Reply generation for author 1629593136560128===

[{'post': '真是能忽悠人，经营药店看地段看人，是三两句能说清楚的', 'reply': '我怎么忽悠人了，我说的是不是事实，我又没让你们给我钱，我也没要求你付费，怎么就忽悠人了，喜欢看就看，不喜欢看就略过'}, {'post': '医院2块6毛的阿莫西林，药房卖26，你说赚钱不赚钱！', 'reply': '主要是医院有些药不卖给你。美林医院10来块钱，外面最高的时候炒到2500，还买不到'}, {'post': '开的试试看，起码不赔钱吧', 'reply': '这个不一定'}, {'post': '我也想开，正看房子', 'reply': '尽量不要选择竞争激烈的地方，选择入住量大的小区，尽量选择方便停车，方便走过路人的地方'}, {'post': '看你开什么样的,一般来说还是比较赚钱的,但要有好的经营理念', 'reply': '很对'}]
作为一个创作者，请给用户的评论做出回复

    评论：真是能忽悠人，经营药店看地段看人，是三两句能说清楚的
    回复：我怎么忽悠人了，我说的是不是事实，我又没让你们给我钱，我也没要求你

InvalidRequestError: This model's maximum context length is 4097 tokens, however you requested 6490 tokens (6234 in your prompt; 256 for the completion). Please reduce your prompt; or completion length.

In [6]:
import json
import os

root = f"../datasets/gsm-cn"
file_name = f"reformated-text"

txt_file_in = f"{root}/{file_name}.txt"
jsonl_file_out = f"{root}/{file_name}.jsonl"

examples = []
ans_scaffold = '【解析】'

with open(txt_file_in, 'r') as fin :
    meet_answer = False
    question, answer = '', ''
    for line in fin.readlines() :
        if line.startswith(ans_scaffold) :
            meet_answer = True
            line = line[len(ans_scaffold):]
        elif line[:2].isdigit() : # and (line[3] == '.' or line[3] == '、'):
            if len(question) > 0 and len(answer) > 0:
                examples.append({'question': question, 'answer': answer})
                question, answer = '', ''
            meet_answer = False
            line = line[3:]
        
        if meet_answer :
            answer += line
        else :
            question += line

# last question & answer
if len(question) > 0 and len(answer) > 0:
    examples.append({'question': question, 'answer': answer})
    question, answer = '', ''

with open(jsonl_file_out, 'w', encoding='utf-8') as fout:
    for exa in examples :
        json.dump(exa, fout, ensure_ascii=False)
        fout.write('\n')

In [2]:
import json
import requests

def yiyan_token() :
    yiyan_api_key=os.environ['YIYAN_API_KEY']
    yiyan_sec_key=os.environ['YIYAN_SEC_KEY']
    # api-token url
    TOKEN_URL = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={yiyan_api_key}&client_secret={yiyan_sec_key}"
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json'
    }
    # get token
    response = requests.request("POST", TOKEN_URL, headers=headers, data = '')
    response = json.loads(response.content.decode('utf-8'))
    token_fn = 'access_token'
    if token_fn in response :
        return response[token_fn]
    return None

def yiyan_inference_api(payload : str, model = 'yiyan') :
    access_token = yiyan_token()
    assert access_token is not None

    # augument payload for single round chat
    messages = list()
    messages.append({"role" : "user", "content" : payload})
    aug_payload = json.dumps({"messages" : messages})
    # print(aug_payload)

    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json'
    }

    API_URL = f"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/agile/chat/completions?access_token={access_token}"
    response = requests.request("POST", API_URL, headers=headers, data = aug_payload)
    response = json.loads(response.content.decode('utf-8'))
    print(response)
    assert 'result' in response 

    return response['result']

In [3]:
import os

prompt = """
You are a virtual assistant that helps users with their questions by relying on
information from HTTP APIs. When the user asks a question, you should determine whether
you need to fetch information from the API to properly answer it. If so, you will
request the user to provide all the parameters you need, and then ask them to run the
request for you. When you are ready to ask for a request, you should specify it using
the following syntax:

<http_request>{
"url": "<request URL>",
"method": "<method>",
"body": {<json request body>},
"headers": {<json request headers>}
}</http_request>

Replace in all the necessary values the user provides during the interaction, and do not
use placeholders. The user will then provide the response body, which you may use to
formulate your answer. You should not respond with code, but rather provide an answer
directly.

The following APIs are available to you:

---openapi: 3.0.1
info:
  title: TODO Plugin
  description: A plugin that allows the user to create and manage a TODO list using ChatGPT. 
  version: 'v1'
servers:
  - url: https://dummyjson.com/todos
paths:
  /todos:
    get:
      operationId: getTodos
      summary: Get the list of todos
      parameters:
      - in: query
        name: limit
        schema:
          type: integer
        description: Number of todos to return
      - in: query
        name: skip
        schema:
          type: integer
        description: Number of todos to skip from the beginning of the list
      responses:
        "200":
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/getTodosResponse'
components:
  schemas:
    getTodosResponse:
      type: object
      properties:
        todos:
          type: array
          items:
            type: object
            properties:
              id:
                type: int
              todo:
                type: string
              completed:
                type: bool
              userId:
                type: string
          description: The list of todos.
--- user input
What are my top 5 todos?
"""
print(f"Prompt Length: {len(prompt)}")
print(yiyan_inference_api(prompt))

Prompt Length: 2148
{'error_code': 17, 'error_msg': 'Open api daily request limit reached'}


AssertionError: 