# Env import

In [46]:
from datetime import date, datetime

from apikey import API_KEY, BASE_URL, DEPLOYMENT_NAME
from langchain.chat_models import AzureChatOpenAI

from langchain import PromptTemplate
from langchain.prompts.chat import(
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    AIMessagePromptTemplate
)

from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
    )

from langchain.chains import LLMChain, APIChain, SequentialChain, SimpleSequentialChain

import os 
from apikey import apikey 


os.environ['OPENAI_API_KEY'] = apikey

import json


In [2]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI


# Config 

In [52]:
data_info = """
[{"name":"会员ID","type":"String"},
{"name":"销量","type":"Long"},
{"name":"省份","type":"Double"},
{"name":"平台","type":"Double"},
{"name":"日期","type":"Date"}]
"""

# 当前时间
today = date.today()
formatted_date = today.strftime("%Y-%m-%d")

today_dt = datetime.now()
formatted_dt = today_dt.strftime("%Y-%m-%d %H:%M:%S")

formatted_dt



'2023-06-19 20:02:50'

# Model

In [53]:
chat_llm = AzureChatOpenAI(
    openai_api_base=BASE_URL,
    openai_api_version="2023-05-15",
    deployment_name=DEPLOYMENT_NAME,
    openai_api_key=API_KEY,
    openai_api_type = "azure",
    temperature=0
)

# chat_llm = ChatOpenAI(temperature=0.0, model= 'gpt-3.5-turbo-0613')
# chat_llm


In [54]:
# test
chat_llm([HumanMessage(content="Translate this sentence from English to Chinese. I love programming.")])


AIMessage(content='我喜欢编程。', additional_kwargs={}, example=False)

In [73]:
example="""
{
"chartType": "PIE",
"row": [{
"name": "省份"
}],
"metric": [{
"aggrType": "SUM",
"name": "省份"
}],
"filter": [{
"filterType": "IN",
"filterValue": "["浙江省","上海市","江苏省"]",
"name": "省份"
}],
"explanation": "维度字段为省份，筛选条件为省份是浙江省、上海市、江苏省，数值字段为对销量进行求和，使用饼图可视化展示各省份销量占比情况。",
"title": "各省份销量占比",
"description": "各省份销量占比的饼图表示"
}
"""

api_parameters = """
{
"chartType": // recommended chart type.BASIC_COLUMN for bar chart. BASIC_LINE for line chart. PIE for pie chart.This is an enumeration value, the optional values are BASIC_COLUMN,BASIC_BAR,BASIC_LINE,PIE,
"row": // Data dimension field
[{
"name": // field name
}],
"metric": // Data metric field
[{
"aggrType": // Aggregation method. This is an enumeration value, the optional values are SUM,CNT,MIN,MAX,AVG,STDDEV,VAR,CNT_DISTINCT,NUL,MED,PERCENTILE,
"name": // field name
}],
"filter": // sorting method
[{
"filterType": // filter type.For example, BT means between, GT means Greater Than.This is an enumeration value, the optional values are BT,GT,GE,LT,LE,EQ,NE,IN,NI,
"filterValue": // value to compare.Two elements when filterType is BT. Multiple elements when filterType is IN or NI. One element when filterType is other.
[String],
"name": // field name
}],
"explanation": // The explanation of the chart, expressed in Chinese,
"title": // The title of the chart, within 15 characters, expressed in Chinese,
"description": // The description of the chart, within 50 characters, expressed in Chinese
}
For example:
{
"chartType": PIE,
"row": [{
"name": "省份"
}],
"metric": [{
"aggrType": SUM,
"name": "省份"
}],
"filter": [{
"filterType": IN,
"filterValue": "["浙江省","上海市","江苏省"]",
"name": "省份"
}],
"explanation": "维度字段为省份，筛选条件为省份是浙江省、上海市、江苏省，数值字段为对销量进行求和，使用饼图可视化展示各省份销量占比情况。",
"title": "各省份销量占比",
"description": "各省份销量占比的饼图表示"
}
"""

# Promp Eng

In [74]:
system_template = """You are Guandata-GPT, 'an AI assistant designed to help data analysts do their daily work.'
Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.
The Current Time is {formatted_dt}

GOALS:
1. "Check the dataset schema meets user's requirements."
2. "Generate the api core parameters for charting through the dataset schema."
3. "Check that the api parameters are correct."

the dataset scheme is {data_info}

the api core parameters is {api_parameters}

You should only respond in JSON format as described below
Response Format: 
{example}
"""

system_message_prompt = SystemMessagePromptTemplate.from_template(template=system_template)
system_message_prompt



SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['api_parameters', 'data_info', 'example', 'formatted_dt'], output_parser=None, partial_variables={}, template='You are Guandata-GPT, \'an AI assistant designed to help data analysts do their daily work.\'\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\nThe Current Time is {formatted_dt}\n\nGOALS:\n1. "Check the dataset schema meets user\'s requirements."\n2. "Generate the api core parameters for charting through the dataset schema."\n3. "Check that the api parameters are correct."\n\nthe dataset scheme is {data_info}\n\nthe api core parameters is {api_parameters}\n\nYou should only respond in JSON format as described below\nResponse Format: \n{example}\n', template_format='f-string', validate_template=True), additional_kwargs={})

In [75]:
human_template = """
user's requirements is {text}

the response is:
"""
human_message_prompt = HumanMessagePromptTemplate.from_template(template=human_template)



In [76]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

chat_prompt

ChatPromptTemplate(input_variables=['formatted_dt', 'text', 'api_parameters', 'example', 'data_info'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['api_parameters', 'data_info', 'example', 'formatted_dt'], output_parser=None, partial_variables={}, template='You are Guandata-GPT, \'an AI assistant designed to help data analysts do their daily work.\'\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\nThe Current Time is {formatted_dt}\n\nGOALS:\n1. "Check the dataset schema meets user\'s requirements."\n2. "Generate the api core parameters for charting through the dataset schema."\n3. "Check that the api parameters are correct."\n\nthe dataset scheme is {data_info}\n\nthe api core parameters is {api_parameters}\n\nYou should only respond in JSON format as described below\nResponse Format: \n

In [36]:
print(chat_prompt.format(text="会员销量趋势", 
                         api_parameters=api_parameters,
                   formatted_dt=formatted_dt, 
                   example=example,
                   data_info=data_info))


System: You are Guandata-GPT, 'an AI assistant designed to help data analysts do their daily work.'
Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.
The Current Time is 2023-06-19 19:55:01

GOALS:
1. "Check the dataset schema meets user's requirements."
2. "Generate the api core parameters for charting through the dataset schema."
3. "Check that the api parameters are correct."

the dataset scheme is 
[{"name":"会员ID","type":"String"},
{"name":"销量","type":"Long"},
{"name":"省份","type":"Double"},
{"name":"省份","type":"Double"},
{"name":"日期","type":"Date"}]


the api core parameters is 
{
"chartType": // recommended chart type.BASIC_COLUMN for bar chart. BASIC_LINE for line chart. PIE for pie chart.This is an enumeration value, the optional values are BASIC_COLUMN,BASIC_BAR,BASIC_LINE,PIE,
"row": // Data dimension field
[{
"name": // field name
}],
"metric": // Data me

In [77]:
chain = LLMChain(llm=chat_llm, prompt=chat_prompt)
question = '各平台会员数量占比'

chain_response = chain.run({'text':question, 'api_parameters':api_parameters, 
                            'formatted_dt':formatted_dt, 
                            'example':example,
                            'data_info':data_info,
                            'formatted_dt': formatted_dt})


In [78]:
print(chain_response)

{
"chartType": "PIE",
"row": [{
"name": "平台"
}],
"metric": [{
"aggrType": "CNT_DISTINCT",
"name": "会员ID"
}],
"filter": [],
"explanation": "维度字段为平台，数值字段为会员数量，使用饼图可视化展示各平台会员数量占比情况。",
"title": "各平台会员数量占比",
"description": "各平台会员数量占比的饼图表示"
}


In [79]:
type(chain_response)

str

In [80]:
dictionary = json.loads(chain_response)


In [81]:
dictionary

{'chartType': 'PIE',
 'row': [{'name': '平台'}],
 'metric': [{'aggrType': 'CNT_DISTINCT', 'name': '会员ID'}],
 'filter': [],
 'explanation': '维度字段为平台，数值字段为会员数量，使用饼图可视化展示各平台会员数量占比情况。',
 'title': '各平台会员数量占比',
 'description': '各平台会员数量占比的饼图表示'}

## LLMChain

# memory

memory 暂时待定

# Chain 