In [None]:
!pip install google-cloud-aiplatform langchain --upgrade

In [None]:
# Restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

# LangChain

In [None]:
import vertexai
import langchain
from langchain.llms import VertexAI
from google.cloud import aiplatform

PROJECT_ID = ! gcloud config get-value project
PROJECT_ID = PROJECT_ID[0]
LOCATION = "us-central1" # @param {type:"string"}

# define project information manually if the above code didn't work
if PROJECT_ID == "(unset)":
  PROJECT_ID = "[your-project-id]" # @param {type:"string"}

print(PROJECT_ID)
vertexai.init(project=PROJECT_ID, location=LOCATION)

print(f"LangChain version: {langchain.__version__}")
print(f"Vertex AI SDK version: {aiplatform.__version__}")


In [None]:
llm = VertexAI(
    model_name="code-bison@002",
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=False,
)

print(llm("""Write me a function to reverse the bits of an
             integer in Objective-C
          """))

# Prompt Templates

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template(
    """
    Context: You write SQL Queries based on natural language inputs.
    Given the following database Schema: {schema}

    Write a SQL SELECT query based on the following question: {question}.
    Only return the SQL statement
    Make sure you put semicolons after each line
    """
)

schema = """
TABLE Owners (
  OwnerID STRING(36) NOT NULL,
  OwnerName STRING(MAX) NOT NULL,
  LastName STRING(MAX),
) PRIMARY KEY(OwnerID);

TABLE Pets (
  OwnerID STRING(36) NOT NULL,
  PetID STRING(MAX) NOT NULL,
  PetType STRING(MAX) NOT NULL,
  PetName STRING(MAX) NOT NULL,
  Breed STRING(MAX) NOT NULL,
  DOB DATE,
) PRIMARY KEY(OwnerID, PetID),
  INTERLEAVE IN PARENT Owners ON DELETE CASCADE;
"""

In [None]:
print(llm(prompt_template.format(schema=schema,
                                 question="How many owners are there")))

In [None]:
print(llm(prompt_template.format(schema=schema,
                                 question="""Show me the number of pets
                                 for each owner. Include the name of each owner""")))

In [None]:
print(llm(prompt_template.format(schema=schema,
                                 question="Show me the dogs sorted by age")))

In [None]:
print(llm(prompt_template.format(schema=schema,
                                 question="""Show me the average age of the pets by type.
                                 Round the average age to the year""")))

# Code Generation Examples

In [None]:
import vertexai
from vertexai.language_models import CodeChatModel

vertexai.init(project="vertext-ai-dar", location="us-central1")
chat_model = CodeChatModel.from_pretrained("codechat-bison")
parameters = {
    "candidate_count": 1,
    "max_output_tokens": 1024,
    "temperature": 0.2
}
chat = chat_model.start_chat()


## Create a Prompt template with instructions for the model

In [None]:
prompt = '''
Context: You are a Coding Chatbot. You answer coding questions.
         You generate code in Python unless you are told otherwise.
         You follow Python code best practices as defined by the
         PEP 8 – Style Guide for Python Code

Question: {0}
Code:

'''

## Code Generation

In [None]:
question = "Write a function that reverses the bits of an integer."

response = chat.send_message(prompt.format(question), **parameters)
print(response.text)

In [None]:
question = '''Write a Python class that can be used to convert Fahrenheit to Celsius
              and visa versa
'''

response = chat.send_message(prompt.format(question), **parameters)
print(response.text)

## Write test code

In [None]:
question = '''Write unit tests for the following function:

def reverse_bits(n):
  """Reverses the bits of an integer.

  Args:
    n: The integer to reverse.

  Returns:
    The reversed integer.
  """

  # Convert the integer to a binary string.
  binary_string = bin(n)[2:]

  # Reverse the binary string.
  reversed_binary_string = binary_string[::-1]

  # Convert the reversed binary string back to an integer.
  reversed_integer = int(reversed_binary_string, 2)

  return reversed_integer
'''

response = chat.send_message(prompt.format(question), **parameters)
print(response.text)

In [None]:
question = '''Write a test fixture for the following class:

class TemperatureConverter:
  """A class that can be used to convert Fahrenheit to Celsius
              and visa versa."""

  def __init__(self):
    self.fahrenheit_to_celsius_factor = 5 / 9
    self.celsius_to_fahrenheit_factor = 9 / 5

  def fahrenheit_to_celsius(self, fahrenheit):
    """Converts a temperature in Fahrenheit to Celsius."""
    return (fahrenheit - 32) * self.fahrenheit_to_celsius_factor

  def celsius_to_fahrenheit(self, celsius):
    """Converts a temperature in Celsius to Fahrenheit."""
    return (celsius * self.celsius_to_fahrenheit_factor) + 32
'''

response = chat.send_message(prompt.format(question), **parameters)
print(response.text)

## Code explanations

In [None]:
question = """
Explain the following function:

def rate_limit(max_per_minute):
    period = 60 / max_per_minute
    print("Waiting")
    while True:
        before = time.time()
        yield
        after = time.time()
        elapsed = after - before
        sleep_time = max(0, period - elapsed)
        if sleep_time > 0:
            print(".", end="")
            time.sleep(sleep_time)

"""

response = chat.send_message(prompt.format(question), **parameters)
print(response.text)

## Adding documentation and comments

In [None]:
question = """
Rewrite the following function with proper documentation:

def rate_limit(max_per_minute):
    period = 60 / max_per_minute
    print("Waiting")
    while True:
        before = time.time()
        yield
        after = time.time()
        elapsed = after - before
        sleep_time = max(0, period - elapsed)
        if sleep_time > 0:
            print(".", end="")
            time.sleep(sleep_time)

"""

response = chat.send_message(prompt.format(question), **parameters)
print(response.text)

In [None]:
question = """
Rewrite the following function with comments:

def reverse_bits(n):

  binary_string = bin(n)[2:]
  reversed_binary_string = binary_string[::-1]
  reversed_integer = int(reversed_binary_string, 2)

  return reversed_integer
"""


response = chat.send_message(prompt.format(question), **parameters)
print(response.text)


## Code debugging

In [None]:
question = '''
The following function does not work.
Debug it for me and explain what the problem is:

def fahrenheit_to_celsius(fahrenheit):
  """Converts Fahrenheit to Celsius.

  Args:
    fahrenheit: A float representing the temperature in Fahrenheit.

  Returns:
    A float representing the temperature in Celsius.
  """

  celsius = fahrenheit - 32 * 5 / 9
  return celsius
'''

response = chat.send_message(prompt.format(question), **parameters)
print(response.text)

## Code Optimization

In [None]:
question = '''
The following function is slow,
can you optimize it for me:

def getpi():
    k = 1
    s = 0
    for i in range(1000000):
	    # even index elements are positive
        if i % 2 == 0:
            s += 4/k
        else:
		    # odd index elements are negative
            s -= 4/k
	    # denominator is odd
        k += 2

    return s
'''

response = chat.send_message(prompt.format(question), **parameters)
print(response.text)

## Code Conversion

In [None]:
question = '''
Convert the following Python function to JavaScript:

def reverse_bits(n):

  binary_string = bin(n)[2:]
  reversed_binary_string = binary_string[::-1]
  reversed_integer = int(reversed_binary_string, 2)

  return reversed_integer
'''

response = chat.send_message(prompt.format(question), **parameters)
print(response.text)


# Run BigQuery queries from natural language

In [None]:
import vertexai
from vertexai.language_models import CodeGenerationModel
from vertexai.preview.language_models import CodeGenerationModel

vertexai.init(project="vertext-ai-dar", location="us-central1")
parameters = {
    "candidate_count": 1,
    "max_output_tokens": 1024,
    "temperature": 0.2
}
model = CodeGenerationModel.from_pretrained("code-bison@001")

In [None]:
prompt = '''
    Context: You write SQL Queries based on natural language inputs.
    Your queries will run on BigQuery.
    You should always generate Google Standard SQL.
    Only return the SQL statement.
    Make sure you put semicolons after each line.
    Always include the project_id in the from clause.
    Surround the full table name with back tics

    Given the following database Schema:
    {0}

    Q: Write a SQL SELECT query based on the following question: How many customers are in Virginia.
    A: SELECT COUNT(*) AS num_customers FROM `{1}.dataset_id.customers` WHERE country = 'USA' AND region = 'VA';

    Q: Write a SQL SELECT query based on the following question: {2}.
    A:
    '''

In [None]:
data_project_id = "joey-gagliardo"
dataset_id = "northwind"

from google.cloud import bigquery

client = bigquery.Client(project=PROJECT_ID)

schema_query = """SELECT
  table_catalog AS project_id,
  table_schema AS dataset_id,
  table_name AS table_name,
  ARRAY_AGG(STRUCT(
      column_name AS name,
      data_type AS type)
  ORDER BY
    ordinal_position) AS SCHEMA
FROM
  `{0}.{1}.INFORMATION_SCHEMA.COLUMNS`
GROUP BY
  table_catalog,table_schema,table_name
  """.format(data_project_id, dataset_id)

schema = list(client.query(schema_query).result())
print(schema)

In [None]:

question = '''
How many orders were placed each year?
'''

prompt.format(schema, data_project_id, question)

In [None]:
question = '''
How many orders were placed each year?
'''

response = model.predict(
    prefix = prompt.format(schema, data_project_id, question),
    **parameters
)

generated_sql = response.text.strip()
print(generated_sql)

In [None]:
from google.cloud import bigquery

client = bigquery.Client(project=PROJECT_ID)
results = client.query(generated_sql).to_dataframe()
results
