In [None]:
# Installing the Google SDK

!pip install -q -U google-generativeai

In [None]:
# Imports

import textwrap
import json
import requests
import numpy as np
import pandas as pd
import google.generativeai as genai
from google.colab import userdata
from IPython.display import Markdown

In [None]:
# Initial config

API_KEY = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=API_KEY)

In [None]:
# Available models

for m in genai.list_models():
  if 'embedContent' in m.supported_generation_methods:
    print(m.name)

models/embedding-001
models/text-embedding-004
models/gemini-embedding-exp-03-07
models/gemini-embedding-exp


In [None]:
# Import JSON

json_url = 'https://raw.githubusercontent.com/rdrgzbhnng/marketplace_ai/main/liquidity.json'
response = requests.get(json_url)

if response.status_code == 200:
  json_data = response.json()
else:
  print('Error fetching JSON data:', response.status_code)

# Convert it to a DataFrame
liquidity_data = pd.DataFrame(json_data)

# Explore data
liquidity_data.head()

Unnamed: 0,ID,From,To,Distance,Duration,Date,Type of truck,Weight,Length,Contact,Equipament,Cerificates,Pallets
0,A00001,ES-28001 Madrid,ES-01001 Vitoria-Gasteiz,354 km,4h 58m,09/05,Box,"0,1 t",2 m,+34 756 56 45,Tail Lift,,0
1,N00033,ES-28864 Ajalvir,ES-08820 Prat De Llobregat,592 km,8h 1m,09/05,Tautliner,"0,1 t",1 m,+34 741 82 46,,,0
2,M00056,ES-08001 Barcelona,ES-24420 Fabero,927 km,12h 56m,09/05,Tautliner,"0,1 t",7 m,+34 708 38 36,,ADR,1
3,J00024,ES-28830 San Fernando De Henares,ES-20303 Irun,465 km,6h 26m,10/05,Tautliner,"0,2 t","0,4 m",+34 768 66 86,,,0
4,X00053,ES-29300 Archidona,ES-41440 Lora del Río,178 km,2h 53m,09/05,Frigo,"0,4 t",1 m,+34 764 56 88,,,1


In [None]:
# Manipulating data

liquidity_data['Title_from'] = ('"' + liquidity_data[['From']] + '"').apply(lambda row: ''.join(row.astype(str)), axis=1)
liquidity_data['Title_to'] = (' to "' + liquidity_data[['To']] + '"').apply(lambda row: ''.join(row.astype(str)), axis=1)
liquidity_data['Title'] = liquidity_data[['Title_from', 'Title_to']].apply(lambda row: ' '.join(row.astype(str)), axis=1)
liquidity_data['Text_from_to'] = ('General merchandise freight from ' + liquidity_data[['Title']] + ' ').apply(lambda row: ''.join(row.astype(str)), axis=1)
liquidity_data['Text_date'] = ('Loading date: ' + liquidity_data[['Date']] + ' ').apply(lambda row: ''.join(row.astype(str)), axis=1)
liquidity_data['Text_distance'] = ('Distance: ' + liquidity_data[['Distance']] + ' ').apply(lambda row: ''.join(row.astype(str)), axis=1)
liquidity_data['Text_duration'] = ('Duration: ' + liquidity_data[['Duration']] + ' ').apply(lambda row: ''.join(row.astype(str)), axis=1)
liquidity_data['Text_truck_type'] = ('Type of truck: ' + liquidity_data[['Type of truck']] + ' ').apply(lambda row: ''.join(row.astype(str)), axis=1)
liquidity_data['Text'] = liquidity_data[['Text_from_to', 'Text_date', 'Text_distance', 'Text_duration', 'Text_truck_type']].apply(lambda row: ' '.join(row.astype(str)), axis=1)
liquidity_data = liquidity_data.drop(['Title_from', 'Title_to', 'Text_from_to' , 'Text_distance', 'Text_duration', 'Text_date', 'Text_truck_type', 'ID', 'From', 'To', 'Date', 'Distance', 'Duration', 'Type of truck', 'Contact', 'Weight', 'Length', 'Equipament', 'Cerificates', 'Pallets'], axis=1)

liquidity_data = liquidity_data

In [None]:
# Get the embeddings of each text and add to an embeddings column in the dataframe

model = "models/embedding-001"

def embed_fn(title, text):
  return genai.embed_content(model=model,
                             content=text,
                             task_type="retrieval_document",
                             title=title)["embedding"]

liquidity_data.head()

Unnamed: 0,Title,Text
0,"""ES-28001 Madrid"" to ""ES-01001 Vitoria-Gasteiz""","General merchandise freight from ""ES-28001 Mad..."
1,"""ES-28864 Ajalvir"" to ""ES-08820 Prat De Llob...","General merchandise freight from ""ES-28864 Aja..."
2,"""ES-08001 Barcelona"" to ""ES-24420 Fabero""","General merchandise freight from ""ES-08001 Bar..."
3,"""ES-28830 San Fernando De Henares"" to ""ES-20...","General merchandise freight from ""ES-28830 Sa..."
4,"""ES-29300 Archidona"" to ""ES-41440 Lora del ...","General merchandise freight from ""ES-29300 Ar..."


In [None]:
# Embedding JSON

liquidity_data['Embeddings'] = liquidity_data.apply(lambda row: embed_fn(row['Title'], row['Text']), axis=1)

liquidity_data.head()

Unnamed: 0,Title,Text,Embeddings
0,"""ES-28001 Madrid"" to ""ES-01001 Vitoria-Gasteiz""","General merchandise freight from ""ES-28001 Mad...","[0.024121914, -0.011058849, -0.0761652, -0.022..."
1,"""ES-28864 Ajalvir"" to ""ES-08820 Prat De Llob...","General merchandise freight from ""ES-28864 Aja...","[0.0006247966, -0.014735748, -0.053620886, -0...."
2,"""ES-08001 Barcelona"" to ""ES-24420 Fabero""","General merchandise freight from ""ES-08001 Bar...","[0.018859513, -0.0224191, -0.061320875, -0.033..."
3,"""ES-28830 San Fernando De Henares"" to ""ES-20...","General merchandise freight from ""ES-28830 Sa...","[0.0111035835, -0.004670841, -0.08075084, -0.0..."
4,"""ES-29300 Archidona"" to ""ES-41440 Lora del ...","General merchandise freight from ""ES-29300 Ar...","[0.047270477, 0.015737392, -0.08134027, 0.0030..."


In [None]:
# Compute the distances between the query and each document in the dataframe using the dot product

def find_best_offer(query, dataframe):
  query_embedding = genai.embed_content(model=model,
                                        content=query,
                                        task_type="retrieval_query")
  dot_products = np.dot(np.stack(dataframe['Embeddings']), query_embedding["embedding"])
  idx = np.argmax(dot_products)

  # Return text from index with max value
  return dataframe.iloc[idx]['Text']

In [None]:
# Define the request
# RUN 1st QUERY

model = 'models/embedding-001'

query = "Barcelona"
request = genai.embed_content(model=model,
                              content=query,
                              task_type="retrieval_query")

recommended_offer = find_best_offer(query, liquidity_data)
recommended_offer

'General merchandise freight from "ES-08001 Barcelona"  to "ES-46001 Valencia"  Loading date: 10/05  Distance: 350 km  Duration: 4h 55m  Type of truck: Tautliner '

In [None]:
# RUN 2st QUERY
query = "Vilanova i la Geltrú"

recommended_offer = find_best_offer(query, liquidity_data)
recommended_offer

'General merchandise freight from "ES-46001 Valencia"  to "ES-08800  Vilanova i la Geltrú"  Loading date: 09/05  Distance: 302 km  Duration: 4h 19m  Type of truck: Tautliner '

In [None]:
# RUN 3st QUERY
query = "FROM Ejea de los Caballeros TO Azuqueca de Henares"

recommended_offer = find_best_offer(query, liquidity_data)
recommended_offer

'General merchandise freight from "ES-50600  Ejea de los Caballeros"  to "ES-19200  Azuqueca de Henares"  Loading date: 10/05  Distance: 308 km  Duration: 5h 59m  Type of truck: Tautliner '

In [None]:
# RUN 4st QUERY
query = "Type of truck: Frigo"

recommended_offer = find_best_offer(query, liquidity_data)
recommended_offer

'General merchandise freight from "ES-29300  Archidona"  to "ES-41440  Lora del Río"  Loading date: 09/05  Distance: 178 km  Duration: 2h 53m  Type of truck: Frigo '

In [None]:
# RUN 5st QUERY
query = "I'd like a freight From Oviedo to my Box truck"

recommended_offer = find_best_offer(query, liquidity_data)
recommended_offer

'General merchandise freight from "ES-33001 Oviedo"  to "ES-31001 Pamplona"  Loading date: 10/05  Distance: 449 km  Duration: 6h 40m  Type of truck: Box '

NEXT STEPS:

1. Add a prompt to input the queries
2. To list a couple of offers instead of only one
3. Be able to sum the weight in order to create "groupages"
4. Request all "details" from a offer ID
