# Introduction



# Dependencies

In [None]:
!pip3 install pandas==2.0.3 openai==0.28.1 termcolor==2.3.0 tenacity==8.2.3 tiktoken==0.5.1

# Imports

In [2]:
import json

import pandas as pd
import openai
import tiktoken

from tenacity import retry, stop_after_attempt, wait_random_exponential
from termcolor import colored

# Data Handling

https://www.kaggle.com/datasets/yasserh/housing-prices-dataset/

In [14]:
def load_housing_data(preprocess=True):
    furn = ["unfurnished", "semi-furnished", "furnished"]
    hd = pd.read_csv("housing_data.csv")
    if preprocess:
        for col_name in ["mainroad", "guestroom", "basement", "hotwaterheating", "airconditioning", "prefarea"]:
            hd[col_name] = hd[col_name].apply(lambda x: 0 if x == "no" else 1)
        hd["furnishingstatus"] = hd["furnishingstatus"].apply(lambda x: furn.index(x))
    return hd

In [57]:
housing_data = load_housing_data(False)
print(housing_data.dtypes)
housing_data.head()

price                int64
area                 int64
bedrooms             int64
bathrooms            int64
stories              int64
mainroad            object
guestroom           object
basement            object
hotwaterheating     object
airconditioning     object
parking              int64
prefarea            object
furnishingstatus    object
dtype: object


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [58]:
housing_data = load_housing_data(True)
print(housing_data.dtypes)
housing_data.head()

price               int64
area                int64
bedrooms            int64
bathrooms           int64
stories             int64
mainroad            int64
guestroom           int64
basement            int64
hotwaterheating     int64
airconditioning     int64
parking             int64
prefarea            int64
furnishingstatus    int64
dtype: object


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,2
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,2
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,2
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,2


# OpenAI

## API Key

In [11]:
try:
    openai.api_key = open("../api_key", "r").read()
    print("API Key loaded")
except FileNotFoundError:
    print("Couldn't find the API key file")

API Key loaded


## Functions

In [100]:
def filter_lte(housing_data: pd.DataFrame,
               filter_name: str,
               filter_value: int):
    housing_data = housing_data[housing_data[filter_name] <= filter_value]
    return housing_data, len(housing_data)


def filter_gte(housing_data: pd.DataFrame,
               filter_name: str,
               filter_value: int):
    housing_data = housing_data[housing_data[filter_name] >= filter_value]
    return housing_data, len(housing_data)


def filter_equal(housing_data: pd.DataFrame,
               filter_name: str,
               filter_value: int):
    housing_data = housing_data[housing_data[filter_name] == filter_value]
    return housing_data, len(housing_data)


FUNCTION_DICT = {
    "filter_lte": filter_lte,
    "filter_gte": filter_gte,
    "filter_equal": filter_equal
}

## Function Specifications

In [104]:
FUNCTION_SPECS = [
    {
        "name": "filter_lte",
        "description": "Return the number of houses for which the specified filter is less than or equal to the filter value.",
        "parameters": {
            "type": "object",
            "properties": {
                "filter_name": {
                    "type": "string",
                    "description": "The characteristic to filter by.",
                },
                "filter_value": {
                    "type": "integer",
                    "description": "The value of the characteristic.",
                }
            },
            "required": ["filter_name", "filter_value"],
        }
    },
    {
        "name": "filter_gte",
        "description": "Return the number of houses for which the specified filter is greater than or equal to the filter value.",
        "parameters": {
            "type": "object",
            "properties": {
                "filter_name": {
                    "type": "string",
                    "description": "The characteristic to filter by.",
                },
                "filter_value": {
                    "type": "integer",
                    "description": "The value of the characteristic.",
                }
            },
            "required": ["filter_name", "filter_value"],
        }
    },
    {
        "name": "filter_equal",
        "description": "Return the number of houses which match given value.",
        "parameters": {
            "type": "object",
            "properties": {
                "filter_name": {
                    "type": "string",
                    "description": "The characteristic to filter by.",
                },
                "filter_value": {
                    "type": "integer",
                    "description": "The value to match.",
                }
            },
            "required": ["filter_name", "filter_value"],
        }
    }
]

## API Calls

In [144]:
class Realtor:

    def __init__(self, system_content: str, function_specs: list, functions: dict, model: str = "gpt-3.5-turbo", debug=False):
        self.model = model
        self.function_specs = function_specs
        self.functions = functions
        
        self.housing_data = load_housing_data()
        self.convo: list = [{"role": "system", "content": system_content.format(len(self.housing_data))}]
        self.debug = debug
        self.colours = {
            "function_call": "red",
            "assistant": "blue",
            "function_response": "green"
        }

    @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
    def call_completion_api(self):
        completion = openai.ChatCompletion.create(
            model=self.model,
            messages=self.convo,
            functions=self.function_specs
        )

        return completion.choices[0]["message"]

    def converse(self):

        # Get the initial welcome from ChatGPT
        assistant_message = self.call_completion_api()
        self.remember(assistant_message)

        while True:
            # Get user input and add it to the convo (or quit on 'q')
            user_input = input("> ")
            if user_input == "q":
                break
            self.remember({"role": "user", "content": user_input})

            # Get ChatGPT response to user input
            assistant_message = self.call_completion_api()

            # Handle function calls until ChatGPT is happy
            while assistant_message.get("function_call"):
                # Record the function call
                self.remember(assistant_message)

                # Get result function
                function_result = self.call_function(assistant_message["function_call"])
                self.remember(function_result)

                # Send convo updated with function result to ChatGPT
                assistant_message = self.call_completion_api()

            # Remember initial response to user input or ChatGPT response to function result
            self.remember(assistant_message)

    def remember(self, data):
        colour: str = ""
        print_message: str = ""
        convo_message: str = ""
        
        if data["role"] == "assistant":
            convo_message = data.to_dict_recursive()
            if data.get("function_call"):
                # function call
                if self.debug:
                    colour = self.colours["function_call"]
                    print_message = data.function_call.to_dict_recursive()
            else:
                # assistant message
                colour = self.colours["assistant"]
                print_message = data.content
        elif data["role"] == "user":
            # user content
            convo_message = data
        else:
            # function response
            convo_message = data
            if self.debug:
                colour = self.colours["function_response"]
                print_message = data

        self.convo.append(convo_message)
        if len(print_message) > 0:
            print(colored(print_message, colour))

    def call_function(self, function_call):
    
        # Extract the function call info
        function_name = function_call["name"]
        function_to_call = self.functions[function_name]
        function_args = json.loads(function_call["arguments"])

        self.housing_data, num_properties = function_to_call(
            housing_data=self.housing_data,
            filter_name=function_args.get("filter_name"),
            filter_value=function_args.get("filter_value")
        )

        return {"role": "function", "name": function_name, "content": str(num_properties)}

In [145]:
REALTOR_SYSTEM_CONTENT = \
"""
You are a helpful realtor with a list initially containing {} properties.
Greet the customer and offer to help narrow this selection based on the customer's requirements.
For each property, we have the following characteristics:
    price (asking price in USD),
    area (total floor area in metres squared),
    bedrooms (number of bedrooms),
    bathrooms (number of bathrooms),
    stories (number of stories),
    mainroad (1 if the property is on a main road, otherwise 0),
    guestroom (1 if the property has a guestroom, otherwise 0),
    basement (1 if the property has a basement, otherwise 0),
    hotwaterheating (1 if the property has a water heating system, otherwise 0),
    airconditioning (1 if the property has air conditioning, otherwise 0),
    parking (number of parking spaces),
    prefarea (1 if the property is in a trendy neighbourhood, otherwise 0),
    furnishingstatus (0 if unfurnished, 1 if semi-furnished, 2 if fully furnished)
Help the user to narrow the list.
When there are fewer than 10, advise the user to call the office to arrange viewings.
"""

# I'm looking for a house. The budget is 5000000; we are a two car family; we need somewhere without stairs.

realtor = Realtor(REALTOR_SYSTEM_CONTENT, FUNCTION_SPECS, FUNCTION_DICT, debug=True)
realtor.converse()

[34mHello! How can I assist you today?[0m


>  I'm looking for a house. The budget is 5000000; we are a two car family; we need somewhere without stairs.


[31m{'name': 'filter_lte', 'arguments': '{\n  "filter_name": "price",\n  "filter_value": 5000000\n}'}[0m
[32m{'role': 'function', 'name': 'filter_lte', 'content': '350'}[0m
[31m{'name': 'filter_gte', 'arguments': '{\n  "filter_name": "parking",\n  "filter_value": 2\n}'}[0m
[32m{'role': 'function', 'name': 'filter_gte', 'content': '54'}[0m
[31m{'name': 'filter_match', 'arguments': '{\n  "filter_name": "stories",\n  "filter_value": 1\n}'}[0m
[32m{'role': 'function', 'name': 'filter_match', 'content': '29'}[0m
[34mBased on your requirements, I have narrowed down the list to 29 properties that are within your budget, have at least two parking spaces, and are single-story houses. 

Would you like me to provide more details about these properties?[0m


>  must have aircon and hot showers


[31m{'name': 'filter_match', 'arguments': '{\n  "filter_name": "airconditioning",\n  "filter_value": 1\n}'}[0m
[32m{'role': 'function', 'name': 'filter_match', 'content': '8'}[0m
[31m{'name': 'filter_match', 'arguments': '{\n  "filter_name": "hotwaterheating",\n  "filter_value": 1\n}'}[0m
[32m{'role': 'function', 'name': 'filter_match', 'content': '0'}[0m
[34mOut of the 29 properties that meet your budget, parking requirements, and are single-story houses, only 8 of them have air conditioning. However, none of them have a hot water heating system.

Since the number of properties matching all your requirements is less than 10, I recommend calling our office to arrange viewings for these houses. Our team will be able to provide you with more specific details about each property and help you make a decision.

Please let me know if there's anything else I can assist you with.[0m


>  q


In [143]:
for m in realtor.convo:
    print(m)

{'role': 'system', 'content': "\nYou are a helpful realtor with a list initially containing 545 properties.\nGreet the customer and offer to help narrow this selection based on the customer's requirements.\nFor each property, we have the following characteristics:\n    price (asking price in USD),\n    area (total floor area in metres squared),\n    bedrooms (number of bedrooms),\n    bathrooms (number of bathrooms),\n    stories (number of stories),\n    mainroad (1 if the property is on a main road, otherwise 0),\n    guestroom (1 if the property has a guestroom, otherwise 0),\n    basement (1 if the property has a basement, otherwise 0),\n    hotwaterheating (1 if the property has a water heating system, otherwise 0),\n    airconditioning (1 if the property has air conditioning, otherwise 0),\n    parking (number of parking spaces),\n    prefarea (1 if the property is in a trendy neighbourhood, otherwise 0),\n    furnishingstatus (0 if unfurnished, 1 if semi-furnished, 2 if fully fu

In [8]:
def count_tokens(sentence):
    encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
    tokens = encoding.encode(sentence)
    token_count = len(tokens)
    return token_count


sentence = "I'm looking for a house. The budget is 5000000."
c = count_tokens(sentence)
print(c)

15
