In [1]:
#!pip install looker-sdk
#!pip install google-cloud-aiplatform langchain

In [2]:
question = "생성일 기준 2022년으로, 월별로, 건설업, 제조업에 대한 연금합계를 보여줘."

In [3]:
import looker_sdk

sdk = looker_sdk.init31()  # or init40() for the older v3.1 API
my_user = sdk.me()

In [4]:
def listup_explore(model_name):
  
  return sdk.all_projects()

In [5]:
from pprint import pprint

for lookml_model in sdk.all_lookml_models():
  pprint(lookml_model.explores)

[]
[LookmlModelNavExplore(name='postal_location', description=None, label='Postal Location', hidden=False, group_label='Lookml Hol Sample', can={}),
 LookmlModelNavExplore(name='bizcategory', description='This view includes business category used in Korea. From Level 1(biggest category) to Level 3(smallest sub category).', label='Bizcategory', hidden=False, group_label='Lookml Hol Sample', can={}),
 LookmlModelNavExplore(name='national_pension_mom', description='This model explore shows the national pension information - pension amount, lost member count, added member count, business category and geolocations.', label='National Pension Mom', hidden=False, group_label='Lookml Hol Sample', can={})]
[]


In [6]:
# TODO : Implement this function with RAG architecture.

def get_right_explore(question):
  lookml_explore = "national_pension_mom"
  lookml_model = "lookml_hol_sample"
  return lookml_model, lookml_explore

lookml_model, lookml_explore = get_right_explore(question)  

In [7]:
def get_field_values(model_name, explore_name):

  # API Call to pull in metadata about fields in a particular explore
  explore = sdk.lookml_model_explore(
    lookml_model_name=model_name,
    explore_name=explore_name,
    fields="id, name, description, fields",
  )

  my_fields = []

  # Iterate through the field definitions and pull in the description, sql,
  # and other looker tags you might want to include in  your data dictionary.
  if explore.fields and explore.fields.dimensions:
    for dimension in explore.fields.dimensions:
      dim_def = {
        "field_type": "Dimension",
        "view_name": dimension.view_label,
        "field_name": dimension.name,
        #"type": dimension.type,
        "description": dimension.description,
        #"sql": dimension.sql,
      }
      my_fields.append(dim_def)
  if explore.fields and explore.fields.measures:
    for measure in explore.fields.measures:
      mes_def = {
        "field_type": "Measure",
        "view_name": measure.view_label,
        "field_name": measure.name,
        #"type": measure.type,
        "description": measure.description,
        #"sql": measure.sql,
      }
      my_fields.append(mes_def)
  if explore.fields and explore.fields.parameters:
    for parameter in explore.fields.parameters:
      par_def = {
        "field_type": "Parameter",
        "view_name": parameter.view_label,
        "field_name": parameter.name,
        "default_filter_value": parameter.default_filter_value,
        #"type": parameter.type,
        "description": parameter.description,
        #"sql": parameter.sql,
      }
      my_fields.append(par_def)
  return my_fields

In [8]:
# for lookml_model in sdk.all_lookml_models():
#   print('lookml model name:' + lookml_model.name)
#   for one_explore in lookml_model.explores:
#     print('explore name:' + one_explore.name)
#     pprint(get_field_values(lookml_model.name, one_explore.name))

In [9]:
import vertexai
from langchain.chat_models import ChatVertexAI
from langchain.llms import VertexAI
import os

PROJECT_ID = os.getenv("PROJECT_ID")  # @param {type:"string"}
vertexai.init(project=PROJECT_ID, location="us-central1")

llm_vertex = VertexAI(
    #model_name="text-bison@latest",
    model_name="text-bison-32k",
    max_output_tokens=8000,
    temperature=0,
    top_p=0.8,
    top_k=40,
)


In [10]:
llm = llm_vertex

In [11]:
def choose_right_fields(fields, question):
  sample_json = """
{
  "dimensions": [
    "dimension1",
  ],
  "measures": [
    "measure1",
  ],
  "filters": [
    {
      "field_name": "field_name1",
      "values": [
        "value1"
      ]
    }
  ],
  "sorts": [
    {
      "field_name": "field_name1",
      "direction": "asc"
    }
  ],
  "parameters": [
    "param1",
  ]
}
  """

  prompt_template = """As a looker developer, choose right dimesions and measures for the question below. 
  You should choose right fields as least as possible and sort fields must be choosen in the dimension fields.

  fields : {fields}

  question: {question}

  answer format: json
  {sample_json}
  """
  response = llm.predict(prompt_template.format(fields=fields, question=question, sample_json=sample_json))
  return response




In [12]:
# Test Code.
# get_field_values(lookml_model, lookml_explore)


In [13]:
response = choose_right_fields(fields=get_field_values(lookml_model, lookml_explore), question=question) #S


In [14]:
response

' ```json\n{\n  "dimensions": [\n    "national_pension_mom.datapoint_create_yearmonth_year",\n    "national_pension_mom.datapoint_create_yearmonth_month",\n    "bizcategory.cat_l1"\n  ],\n  "measures": [\n    "national_pension_mom.total_monthly_fixed_amount"\n  ],\n  "filters": [\n    {\n      "field_name": "national_pension_mom.datapoint_create_yearmonth_year",\n      "values": [\n        "2022"\n      ]\n    },\n    {\n      "field_name": "bizcategory.cat_l1",\n      "values": [\n        "건설업",\n        "제조업"\n      ]\n    }\n  ],\n  "sorts": [\n    {\n      "field_name": "national_pension_mom.datapoint_create_yearmonth_month",\n      "direction": "asc"\n    }\n  ],\n  "parameters": []\n}\n```'

In [15]:
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field, validator
from typing import List

class LookerFilterValue(BaseModel):
  field_name: str = Field(description="field_name")
  values: List[str] = Field(description="values")

class LookerSortField(BaseModel):
  field_name: str = Field(description="field_name")
  direction: str = Field(description="direction")

class LookerQueryParameters(BaseModel):
  dimensions: List[str] = Field(description="dimensions")
  measures: List[str] = Field(description="measures")
  parameters: List[str] = Field(description="parameters")
  filters: List[LookerFilterValue] = Field(description="filters")
  sorts: List[LookerSortField] = Field(description="sorts")

def get_right_fields_object(response):
  parser = PydanticOutputParser(pydantic_object=LookerQueryParameters)
  return parser.parse(response)


In [16]:
related_fields = get_right_fields_object(response)

In [17]:
def decide_to_retrieve_values_for_the_filters(related_fields):
  output_sample = """
  {
    "required_target": ["field1","field2"]
  }
  """
  prompt_template = """As a looker developer, decide whether to retrieve values for the filters below. 
  For example, date / timestamp columns don't need to retrieve values. but string columns need to retrieve values from the database.

  filter fields : {filter_fields}

  output sample : json array
  {output_sample}
  """
  response = llm.predict(prompt_template.format(filter_fields=related_fields.filters, output_sample=output_sample))
  return response

In [18]:
retrieve_target_filters = decide_to_retrieve_values_for_the_filters(related_fields)

In [19]:

class LookerFilterRetrieves(BaseModel):
  required_target: List[str] = Field(description="required_target")

def get_retrieve_target_filters_object(retrieve_target_filters):
  parser = PydanticOutputParser(pydantic_object=LookerFilterRetrieves)
  return parser.parse(retrieve_target_filters)


In [20]:
retrieve_target_filters

' ```json\n  {\n    "required_target": ["national_pension_mom.datapoint_create_yearmonth_year", "bizcategory.cat_l1"]\n  }\n  ```'

In [21]:
def get_user_input_value_for_filter_field(related_fields, field_name):
  for filter in related_fields.filters:
    if filter.field_name == field_name:
      return filter.values
  return ""


In [22]:
retrieve_target_filter_obj = get_retrieve_target_filters_object(retrieve_target_filters)

In [25]:
from looker_sdk.sdk.api31 import models as ml

def get_validated_filter_values_from_looker(lookml_model, lookml_explore, retrieve_target_filter_obj):
  choose_right_filter_value_list = []
  for retrieve_target_filter in retrieve_target_filter_obj.required_target:
    print(retrieve_target_filter)
    query_template = ml.WriteQuery(model=lookml_model, view=lookml_explore,fields=[retrieve_target_filter])
    query = sdk.create_query(query_template)
    json_ = sdk.run_query(query.id, "json")
    print(json_)
    choose_right_filter_value_list.append({ retrieve_target_filter : json_})
  return choose_right_filter_value_list
    


In [48]:
retrieve_filter_and_values = get_validated_filter_values_from_looker(lookml_model, lookml_explore, retrieve_target_filter_obj)

national_pension_mom.datapoint_create_yearmonth_year
[{"national_pension_mom.datapoint_create_yearmonth_year":2022}]
bizcategory.cat_l1
[{"bizcategory.cat_l1":"가구 내 고용활동 및 달리 분류되지 않은 자가 소비 생산활동"},
{"bizcategory.cat_l1":"건 설 업"},
{"bizcategory.cat_l1":"공공 행정, 국방 및 사회보장 행정"},
{"bizcategory.cat_l1":"광업"},
{"bizcategory.cat_l1":"교육서비스업"},
{"bizcategory.cat_l1":"금융 및 보험업"},
{"bizcategory.cat_l1":"도매 및 소매업"},
{"bizcategory.cat_l1":"보건업 및 사회복지 서비스업"},
{"bizcategory.cat_l1":"부동산업"},
{"bizcategory.cat_l1":"사업시설 관리, 사업 지원 및 임대 서비스업"},
{"bizcategory.cat_l1":"수도, 하수 및 폐기물 처리, 원료 재생업"},
{"bizcategory.cat_l1":"숙박 및 음식점업"},
{"bizcategory.cat_l1":"예술, 스포츠 및 여가관련 서비스업"},
{"bizcategory.cat_l1":"운수 및 창고업"},
{"bizcategory.cat_l1":"전기, 가스, 증기 및 공기 조절 공급업"},
{"bizcategory.cat_l1":"전문, 과학 및 기술 서비스업"},
{"bizcategory.cat_l1":"정보통신업"},
{"bizcategory.cat_l1":"제조업"},
{"bizcategory.cat_l1":"협회 및 단체, 수리 및 기타 개인 서비스업"},
{"bizcategory.cat_l1":"협회 및 단체, 수리 및 기타 개인서비스"}]


In [49]:
retrieve_filter_and_values

[{'national_pension_mom.datapoint_create_yearmonth_year': '[{"national_pension_mom.datapoint_create_yearmonth_year":2022}]'},
 {'bizcategory.cat_l1': '[{"bizcategory.cat_l1":"가구 내 고용활동 및 달리 분류되지 않은 자가 소비 생산활동"},\n{"bizcategory.cat_l1":"건 설 업"},\n{"bizcategory.cat_l1":"공공 행정, 국방 및 사회보장 행정"},\n{"bizcategory.cat_l1":"광업"},\n{"bizcategory.cat_l1":"교육서비스업"},\n{"bizcategory.cat_l1":"금융 및 보험업"},\n{"bizcategory.cat_l1":"도매 및 소매업"},\n{"bizcategory.cat_l1":"보건업 및 사회복지 서비스업"},\n{"bizcategory.cat_l1":"부동산업"},\n{"bizcategory.cat_l1":"사업시설 관리, 사업 지원 및 임대 서비스업"},\n{"bizcategory.cat_l1":"수도, 하수 및 폐기물 처리, 원료 재생업"},\n{"bizcategory.cat_l1":"숙박 및 음식점업"},\n{"bizcategory.cat_l1":"예술, 스포츠 및 여가관련 서비스업"},\n{"bizcategory.cat_l1":"운수 및 창고업"},\n{"bizcategory.cat_l1":"전기, 가스, 증기 및 공기 조절 공급업"},\n{"bizcategory.cat_l1":"전문, 과학 및 기술 서비스업"},\n{"bizcategory.cat_l1":"정보통신업"},\n{"bizcategory.cat_l1":"제조업"},\n{"bizcategory.cat_l1":"협회 및 단체, 수리 및 기타 개인 서비스업"},\n{"bizcategory.cat_l1":"협회 및 단체, 수리 및 기타 개인서비스"}]'}]

In [53]:
def choose_right_filter_value(filter_values, wanted_value):
  prompt_template = """As a looker developer, choose right filter value for the wanted value below without changing filter value itself without json footer and header.

  filter_values : {filter_values}

  wanted_values: {wanted_value}

  answer format: json array
  [filter_value1, filter_value2, ...]
  """
  response = llm.predict(prompt_template.format(filter_values=filter_values, wanted_value=wanted_value))
  return response  

In [59]:
import json

# class LookerValidFilterValue(BaseModel):
#   field_name: str = Field(description="field_name")
#   field_values: List[str] = Field(description="field_values")

# def parse_valid_filter_values(response):
#   parser = PydanticOutputParser(pydantic_object=LookerValidFilterValue)
#   return parser.parse(response)

def parse_valid_filter_values(response):
  start_index = response.find('[')
  end_index = response.rfind(']')
  json_data = response[start_index:end_index+1]
  parsed_json = json.loads(json_data)
  return parsed_json

In [61]:
def get_appropriate_filter_value_pair(related_fields, retrieve_filter_and_values):
  filter_value_pair = []
  for filter_and_values in retrieve_filter_and_values:
    field_name = list(filter_and_values.keys())[0]
    #print(filter_and_values)
    user_input_value = get_user_input_value_for_filter_field(related_fields, field_name)
    #print(user_input_value)
    actual_value = choose_right_filter_value(filter_and_values, user_input_value)
    #print(actual_value)  
    value_object = parse_valid_filter_values(actual_value)
    #print(value_object)
    filter_value_pair.append(value_object)
  return filter_value_pair


In [62]:
valid_filter_values = get_appropriate_filter_value_pair(related_fields, retrieve_filter_and_values)

In [63]:
valid_filter_values

[[{'national_pension_mom.datapoint_create_yearmonth_year': 2022}],
 [{'bizcategory.cat_l1': '건 설 업'}, {'bizcategory.cat_l1': '제조업'}]]

In [None]:
def make_query_for_the_look(lookml_model, lookml_explore, related_fields, valid_filter_values):
  fields = related_fields.dimensions + related_fields.measures
  filters = []
  for index, filter in enumerate(related_fields.filters):
    filters.append({filter.field_name : valid_filter_values[index]})
  sorts = []
  for sort in related_fields.sorts:
    sorts.append({sort.field_name : sort.direction})
  for retrieve_target_filter in retrieve_target_filter_obj.required_target:
    print(retrieve_target_filter)
    query_template = ml.WriteQuery(model=lookml_model, view=lookml_explore,fields=[retrieve_target_filter])
    query = sdk.create_query(query_template)
    json_ = sdk.run_query(query.id, "json")
    print(json_)
    choose_right_filter_value_list.append({ retrieve_target_filter : json_})
  return choose_right_filter_value_list
    

In [None]:
look_template = """
- name: {look_name}
  title: {title}
  model: {model}
  explore: {explore}
  type: looker_column
  fields: [{dimesions_and_meaures}]
  filters:
    {filters}
  sorts: [{sorts}]
  limit: 500
  column_limit: 50
  query_timezone: America/Los_Angeles
  x_axis_gridlines: false
  y_axis_gridlines: true
  show_view_names: false
  show_y_axis_labels: true
  show_y_axis_ticks: true
  y_axis_tick_density: default
  y_axis_tick_density_custom: 5
  show_x_axis_label: true
  show_x_axis_ticks: true
  y_axis_scale_mode: linear
  x_axis_reversed: false
  y_axis_reversed: false
  plot_size_by_field: false
  trellis: ''
  stacking: ''
  limit_displayed_rows: false
  legend_position: center
  point_style: none
  show_value_labels: false
  label_density: 25
  x_axis_scale: auto
  y_axis_combined: true
  ordering: none
  show_null_labels: false
  show_totals_labels: false
  show_silhouette: false
  totals_color: "#808080"
  defaults_version: 1
"""

In [None]:
look_template.format(look_name="anonymous", title="Monthly National Pension in 2022", model=lookml_model, explore=lookml_explore, fields="'national_pension_mom.datapoint_create_yearmonth_year', 'national_pension_mom.datapoint_create_yearmonth_month', 'bizcategory.cat_l1_code','national_pension_mom.count', 'national_pension_mom.total_monthly_fixed_amount'")

In [None]:
lookml_template = """
- name: add_a_unique_name_1699948162
  title: Untitled Visualization
  model: lookml_hol_sample
  explore: national_pension_mom
  type: looker_column
  fields: [bizcategory.cat_l1, national_pension_mom.create_ym_month, national_pension_mom.total_monthly_fixed_amount,
    national_pension_mom.total_monthly_fixed_amount_compared]
  filters:
    national_pension_mom.compared_month: '1'
    national_pension_mom.create_ym_month: '2022'
    bizcategory.cat_l1: 건 설 업
  sorts: [national_pension_mom.create_ym_month]
  limit: 500
  column_limit: 50
  query_timezone: America/Los_Angeles
  x_axis_gridlines: false
  y_axis_gridlines: true
  show_view_names: false
  show_y_axis_labels: true
  show_y_axis_ticks: true
  y_axis_tick_density: default
  y_axis_tick_density_custom: 5
  show_x_axis_label: true
  show_x_axis_ticks: true
  y_axis_scale_mode: linear
  x_axis_reversed: false
  y_axis_reversed: false
  plot_size_by_field: false
  trellis: ''
  stacking: ''
  limit_displayed_rows: false
  legend_position: center
  point_style: none
  show_value_labels: false
  label_density: 25
  x_axis_scale: auto
  y_axis_combined: true
  ordering: none
  show_null_labels: false
  show_totals_labels: false
  show_silhouette: false
  totals_color: "#808080"
  defaults_version: 1
"""