In [None]:
!pip install -U -q 'google-generativeai>=0.8.3'

In [None]:
!python -V

Python 3.11.11


In [None]:
import cgi, math, os, pickle, random, re, socket, sys, time, urllib
import datetime, re, sys, time

import google.generativeai as genai
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

from IPython.display import HTML, Markdown, display
'''
!curl "https://generativelanguage.googleapis.com/v1bets/models/gemeni-1.5-flash:generateContent?key=!GOOGLE_API_KEY" \
-H 'Content-Type: application/json' -X POST \
- d '{"contents:[{"parts":[{"text": "Please give me the Python code to generate heapsort"}]}]}'
'''

##### Selecting models

In [None]:
for model in genai.list_models():
  print(model.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-exp-1206
models/gemini-exp-1121
models/gemini-exp-1114
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experimental
models/emb

In [None]:
for model in genai.list_models():
  if model.name == 'models/gemini-2.0-flash-thinking-exp-01-21':
    print(model); break

Model(name='models/gemini-2.0-flash-thinking-exp-01-21',
      base_model_id='',
      version='2.0-exp-01-21',
      display_name='Gemini 2.0 Flash Thinking Experimental 01-21',
      description='Experimental release (January 21st, 2025) of Gemini 2.0 Flash Thinking',
      input_token_limit=1048576,
      output_token_limit=65536,
      supported_generation_methods=['generateContent', 'countTokens'],
      temperature=0.7,
      max_temperature=2.0,
      top_p=0.95,
      top_k=64)


In [None]:
from google.api_core import retry
high_temperature_model = genai.GenerativeModel('gemini-1.5-flash',
          generation_config=genai.GenerationConfig(temperature=2.0))

retry_policy = {
    "retry": retry.Retry(predicate=retry.if_transient_error,
                         initial=10, multiplier=1.5, timeout=300) }
for _ in range(5):
  response = high_temperature_model.generate_content(
      'Pick a random color', request_options=retry_policy)
  if response.parts:
    print(response.text, '_'*20)

#### Top-K and top-P

In [None]:
model = genai.GenerativeModel(
    'gemini-1.5-flash-001',
    generation_config = genai.GenerationConfig(
        temperature=1.0,
        top_k=64,
        top_p=0.95)
)
story_prompt = 'query - goes here '
response = model.generate_content(story_prompt, request_options=retry_policy)
Markdown(response.text)

#### Prompting
- zero shot, enum mode, one-shot and few-shot - json
- chain-of-thought (COT)
- reason and act (ReAct)

In [None]:
''' Zero shot '''

model = genai.GenerativeModel(
    'gemini-1.5-flash-001',
    generation_config = genai.GenerationConfig(
        temperature=0.1,
        top_p=1,
        max_output_tokens=5 )
)
zero_shot_prompt = '''
Classify movie reviews as positive, neutral, negative
Review: I wish there were more movies like this masterpiece
Sentiment:
'''
response = model.generate_content(
    zero_shot_prompt, request_options=retry_policy)
print(response.text)

Sentiment: **Positive**


In [None]:
''' enum mode '''
import enum
class Sentiment(enum.Enum):
  positive = 'positive'
  neutral = 'neutral'
  negative = 'negative'

model = genai.GenerativeModel(
    'gemini-1.5-flash-001',
    generation_config = genai.GenerationConfig(
        response_mime_type = 'text/x.enum',
        response_schema=Sentiment)
)

response = model.generate_content(
    zero_shot_prompt, request_options=retry_policy)
print(response.text)

positive


In [None]:
''' one-shot and few-shot '''
model = genai.GenerativeModel(
    'gemini-1.5-flash-latest',
    generation_config = genai.GenerationConfig(
        temperature=0.1,
        top_p=1,
        max_output_tokens=250 )
)
few_shot_prompt = '''
parse a customer's order into valid json

EXAMPLE:
small order with item-1, item-2, and item-3
Json response:
{ "size": "small", "type": normal, "items": ["item-1", "item-2", "item-3"] }

EXAMPLE:
large order with item-2, item-4, and item-5
Json response:
{ "size": "small", "type": normal, "items": ["item-1", "item-2", "item-3"] }
Json response:
{ "size": "large", "type": normal, "items": ["item-2", "item-4", "item-5"] }
Order:
'''

customer_order = "Give me large order with item-1, item-6"
response = model.generate_content([few_shot_prompt, customer_order],
              request_options=retry_policy)
print(response.text)

```json
{
  "size": "large",
  "type": "normal",
  "items": ["item-1", "item-6"]
}
```



In [None]:
''' json mode '''
import typing_extensions as typing

class Order(typing.TypedDict):
  size: str
  ingredients: list[str]
  type: str

model = genai.GenerativeModel(
    'gemini-1.5-flash-latest',
    generation_config=genai.GenerationConfig(
        temperature=0.1,
        response_mime_type="application/json",
        response_schema=Order)
)
response = model.generate_content(
    'Can I have a large order with item-1 and item-2')
print(response.text)

{"ingredients": ["item-1", "item-2"], "size": "large"}


In [None]:
''' Chain of thought '''
model = genai.GenerativeModel('gemini-1.5-flash-latest')
prompt = ''' ccontent here  '''
response = model.generate_content(prompt, request_options=retry_policy)
print(response.text)

In [None]:
''' ReAct - reason and act '''

model_instructions = '''

Solve a question answering with interleaving Thought, Action, Observation steps.
Thought can reason about current situation.
Observation is understanding relevant information from Action's output
Action can be one of the three types:
(1) <search>entity</search>, searches exact entity in wikipedia and return the
first paragraph if it exists. If not, it will return similar entities to search
and you can try information from those topics
(2) <lookup>keyword</lookup> return next sentence containing keyword in the
current context. This only does exact matches
(3) <finish>answer</finish> returns the answer and finishes the task.
'''

response = react_chat.send_message(observation, generation_config=config,
            request_options=retry_policy)
Markdown(response.text)

#### Generating code

In [None]:
model = genai.GenerativeModel(
    'gemini-1.5-flash-latest',
    generation_config=genai.GenerationConfig(
        temperature=1,
        top_p=1,
        max_output_tokens=1024)
)
code_prompt = '''
write a Python function to calculate the factorial of a number.
no explanation, provide code only
'''
retry_policy = {'retry': retry.Retry(predicate=retry.if_transient_error)}
response = model.generate_content(code_prompt, request_options=retry_policy)
Markdown(response.text)

```python
def factorial(n):
  if n == 0:
    return 1
  else:
    return n * factorial(n-1)
```


In [None]:
''' automatically run the generated code, and return the response '''
model = genai.GenerativeModel(
    'gemini-1.5-flash-latest', tools='code_execution',)
code_prompt = '''
calculate the sum of the first 14 prime numbers, only consider the odd primes,
and make sure they are counted all
'''
response = model.generate_content(code_prompt, request_options=retry_policy)
Markdown(response.text)

In [None]:
for part in response.candidates[0].content.parts:
  print(part)
  print('*-------------*')

text: "To calculate the sum of the first 14 odd prime numbers, I will first generate a list of prime numbers, filter out the even number 2, and then sum the first 14 remaining numbers.\n\n"

*-------------*
executable_code {
  language: PYTHON
  code: "\nimport sympy\n\ndef is_prime(n):\n    \"\"\"Checks if a number is prime.\"\"\"\n    return sympy.isprime(n)\n\nprimes = []\nnum = 2\ncount = 0\nwhile count < 15: # generate at least 14 odd primes, since we will remove 2 later\n    if is_prime(num):\n        primes.append(num)\n        count += 1\n    num += 1\n\nodd_primes = [p for p in primes if p != 2]\nsum_of_primes = sum(odd_primes[:14])  # Sum the first 14 odd primes\n\nprint(f\'{primes=}\')\nprint(f\'{odd_primes=}\')\nprint(f\'{sum_of_primes=}\')\n\n"
}

*-------------*
code_execution_result {
  outcome: OUTCOME_OK
  output: "primes=[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]\nodd_primes=[3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]\nsum_of_primes=326\n"
}

*

In [None]:
file_contents = !curl "https://github.com/google-gemini/cookbook/blob/main/quickstarts/Code_Execution.ipynb"
explain_prompt = f'''
Explain what this file does, very high level
...
{file_contents}
...
'''
model = genai.GenerativeModel('gemini-1.5-flash-latest')
response = model.generate_content(explain_prompt, request_options=retry_policy)
Markdown(response.text)

This file is a HTML representation of a GitHub page displaying a Jupyter Notebook file (`.ipynb`).  It includes the notebook's content rendered as HTML, along with the standard GitHub header, navigation, and footer.  The HTML heavily uses embedded JavaScript and React components to dynamically load and display the notebook content and interact with the GitHub interface.  Essentially, it's a web page designed to show a specific Jupyter Notebook file within the context of a GitHub repository.


In [None]:
file_contents = !curl https://raw.githubusercontent.com/magicmonty/bash-git-prompt/refs/heads/master/gitprompt.sh
explain_prompt = f'''
Explain what this file does, very high level
...
{file_contents}
...
'''
model = genai.GenerativeModel('gemini-1.5-flash-latest')
response = model.generate_content(explain_prompt, request_options=retry_policy)
Markdown(response.text)

This Bash script (`bash-git-prompt`) enhances the command prompt to display Git repository information (branch, status, etc.).  It does this by:

1. **Loading a Theme:** It selects a color theme for the prompt from a set of predefined themes or a custom user-defined theme.

2. **Fetching Git Status:** It runs a Git command to obtain the repository's status (branch, changes, etc.).  It handles the case where the status command may not be available, and attempts to locate a suitable alternative.

3. **Formatting the Prompt:** It formats the Git status information, incorporating the selected theme colors,  branch name, changes, and other configurable elements to create a visually informative prompt.  It also handles edge cases such as a detached HEAD state or no Git repository being present.

4. **Integrating with the Shell:** It integrates its prompt function into the shell's `PROMPT_COMMAND` variable to ensure the prompt is updated automatically after each command execution.  It smartly handles various bash versions.  Additionally, it handles various virtual environments and displays their information in the prompt.

5. **Providing Customization:**  It allows users to customize the prompt's appearance (colors, symbols, information displayed) via configuration files and environment variables.

In essence, it's a sophisticated prompt customization script tailored for Git users to provide real-time repository status directly in their shell.


#### Embeddings and similarity scores

In [None]:
''' cosine similarity '''
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def calculate_cosine_similarity(text1, text2):
  vectorizer = TfidfVectorizer(
      stop_words = 'english',
      use_idf = True,
      norm = 'l2',
      ngram_range = (1, 2),
      sublinear_tf = True,
      analyzer = 'word'
  )

  tfidf = vectorizer.fit_transform([text1, text2])
  similarity = cosine_similarity(tfidf[0:1], tfidf[1:2])
  return similarity[0][0]

#### Classifying Embeddings with Keras and Gemini API

In [None]:
''' create embeddings

- retrieval query
- retrieval document
- semantic similarity
- classification
- clustering
- fact_verification
'''
from google.api_core import retry
from tqdm.rich import tqdm
tqdm.pandas()
@retry.Retry(timeout=300.0)
def embed_fn(text: str) -> list[float]:
  response = genai.embed_content(
      model='models/text-embedding-004', content=text, task_type='classification')
  return response['embedding']

def create_embeddings(df):
  df['Embeddings'] = df['Text'].progress_apply(embed_fn)
  return df

#### Build a Classification Model

In [None]:
import keras
from keras import layers
def build_classification_model(input_size: int, num_classes: int) -> keras.Model:
  return keras.Sequential(
      [ layers.Input([input_size], name='embedding_inputs'),
        layers.Dense(input_size, activation='relu', name='hidden'),
        layers.Dense(num_classes, activation='softmax', name='output_probs') ] )

embedding_size = len(df_train['Embeddings'].iloc[0])
classifier = build_classification_model(
    embedding_size, len(df_train['Class Name'].unique()))

classifier.summary()
classifier.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'] )

In [None]:
''' train the model '''
num_epochs = 20
batch_size = 32
y_train = df_train['Encoded Label']
x_train = np.stack(df_train['Embeddings'])
y_val = df_test['Encoded Label']
x_val = np.stack(df_test['Embeddings'])
early_stop = keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)
history = classifier.fit(
    x=x_train,
    y=y_train,
    validation_data=(x_val, y_val),
    callbacks=[early_stop],
    batch_size=batch_size,
    epochs=num_epochs
)
''' evaluate model performance '''
classifier.evaluate(x=x_val, y=y_val, return_dict=True)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9960 - loss: 0.0976 


{'accuracy': 0.9950000047683716, 'loss': 0.07911157608032227}

In [None]:
embedded = embed_fn(new_text)
input_batched = np.array([embedded])
[result] = classifier.predict(input_batched)
for idx, category in enumerate(df_test['Class Name'].cat.categories):
  print(f" {category}: {result[idx]*100:0.2f}%")

#### Function Calling with Gemini

In [None]:
%load_ext sql
%sql sqlite:///sample.db

In [None]:
%%sql
CREATE TABLE IF NOT EXISTS products(
  product_id INTEGER PRIMARY KEY AUTOINCREMENT,
  product_name VARCHAR(255) NOT NULL,
  price DECIMAL(10, 2) NOT NULL ) ;

CREATE TABLE IF NOT EXISTS staff(
  staff_id INTEGER PRIMARY KEY AUTOINCREMENT,
  first_name VARCHAR(255) NOT NULL,
  last_name VARCHAR(255) NOT NULL ) ;

CREATE TABLE IF NOT EXISTS orders(
  order_id INTEGER PRIMARY KEY AUTOINCREMENT,
  customer_name VARCHAR(255) NOT NULL,
  staff_id INTEGER NOT NULL,
  product_id INTEGER NOT NULL,
  FOREIGN KEY (staff_id) REFERENCES staff (staff_id),
  FOREIGN KEY (product_id) REFERENCES product (product_id) ) ;

 * sqlite:///sample.db
Done.
Done.
Done.


[]

In [None]:
%%sql
INSERT INTO products(product_name, price) VALUES
('laptop', 1200.00), ('iPhone', 799.00), ('usb charger', 25.00) ;

INSERT INTO staff(first_name, last_name) VALUES
('firstname1', 'lastname1'), ('firstname2', 'lastname2'), ('firstname3', 'lastname3');

INSERT INTO orders (customer_name, staff_id, product_id) VALUES
('cust1', 1, 1), ('cust2', 2, 2), ('cust3', 1, 3) ;

 * sqlite:///sample.db
3 rows affected.
3 rows affected.
3 rows affected.


[]

In [None]:
import sqlite3
db_file = 'sample.db'
db_conn = sqlite3.connect(db_file)
cursor = db_conn.cursor()
cursor.execute("SELECT name from sqlite_master where type='table'; ")
tables = cursor.fetchall()
list_of_tables = [t[0] for t in tables]
list_of_tables

['products', 'sqlite_sequence', 'staff', 'orders']

In [None]:
cursor.execute(f'PRAGMA table_info(products);')
schema = cursor.fetchall()
[(col[1], col[2]) for col in schema]

[('product_id', 'INTEGER'),
 ('product_name', 'VARCHAR(255)'),
 ('price', 'DECIMAL(10, 2)')]

In [None]:
cursor.execute('select * from products')
cursor.fetchall()

In [None]:
''' create db tools '''
def list_tables() -> list[str]:
  print('DB Call: list_tables')
  cursor = db_conn.cursor()
  cursor.execute("SELECT name from sqlite_master where type='table'; ")
  tables = cursor.fetchall()
  return [t[0] for t in tables]

def describe_table(table_name: str) -> list[tuple[str,str]]:
  print('DB Call: describe_tables')
  cursor.execute(f'PRAGMA table_info({table_name});')
  schema = cursor.fetchall()
  return [(col[1], col[2]) for col in schema]

def execute_query(sql: str) -> list[list[str]]:
  print('DB Call: execute_query')
  cursor.execute(sql)
  return cursor.fetchall()

list_tables(), describe_table('products'), execute_query('select * from products')

In [None]:
db_tools = [list_tables, describe_table, execute_query]

instruction = '''
You are a helpful chatbot that can interact with an SQL database for a computer
store. You will take the users questions and turn them into SQL queries using
the tools available. Once you have the information you need, you will answer the
user's question using the data returned.
Use list_tables to see what tables are present, describe_table to understand
the schema, and execute_query to issue an SQL SELECT query.
'''

model = genai.GenerativeModel('models/gemini-1.5-flash-latest',
                              tools=db_tools, system_instruction=instruction)

retry_policy = {'retry': retry.Retry(predicate=retry.if_transient_error)}

chat = model.start_chat(enable_automatic_function_calling=True)

r_ = chat.send_message('what is the cheapest product', request_options=retry_policy)
print(r_.text)

DB Call: list_tables
DB Call: describe_tables
DB Call: execute_query
The cheapest product is the usb charger at $25.00.



In [None]:
r_ = chat.send_message('and how much it is?', request_options=retry_policy)
print(r_.text)

The cheapest product, the usb charger, costs $25.00.

