In [66]:
from rdflib import Graph, Namespace, Literal, BNode, URIRef
import os
import getpass
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.schema import Document
from sklearn.cluster import KMeans
import json

In [58]:
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')

In [30]:
# Load the graph
g = Graph()
g.parse("data/dunbar/bacnet.ttl", format="turtle")

<Graph identifier=N7f354840f7bc4cf58b9d6b6afbbe75b0 (<class 'rdflib.graph.Graph'>)>

In [33]:
# Load jsonld file
data = json.load(open("data/dunbar/bacnet.jsonld"))

In [50]:
# Create the documents to load into the vector store
documents = []

# Loop through the jsonld file and create a document for each
for i in range(0, len(data)):
    # Check if its a bacnet device or point
    type = data[i]["@type"][0]
    if type == "http://data.ashrae.org/bacnet/2016#bacnet_Point":
        device_name = data[i]['http://data.ashrae.org/bacnet/2016#device_name'][0]['@value']
        point_name = data[i]['http://data.ashrae.org/bacnet/2016#object_name'][0]['@value']
        present_value = data[i]['http://data.ashrae.org/bacnet/2016#present_value'][0]['@value']
        unit = data[i]['http://data.ashrae.org/bacnet/2016#object_units'][0]['@value']

        # Create the document
        content = device_name + " " + point_name + " " + present_value + " " + unit
        documents.append(Document(page_content=content, metadata={"type": "bacnet_point", }))

In [62]:
# Load the documents into the vector store
db = Chroma.from_documents(documents, OpenAIEmbeddings())

In [65]:
vectorstore_data = db.get(include=["embeddings", "metadatas"])
embs = vectorstore_data["embeddings"]

In [72]:
# use a predefined number of clusters
num_clusters = 100

# Perform k-means clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=0)
cluster_assignments = kmeans.fit_predict(embs)
cluster_assignments

  super()._check_params_vs_input(X, default_n_init=10)


array([24, 11, 49, ..., 23,  8,  0], dtype=int32)

In [73]:
# Use the cluster assignments to create a dictionary of clusters
clusters = {}
for i in range(0, len(cluster_assignments)):
    cluster = cluster_assignments[i]
    if cluster not in clusters:
        clusters[cluster] = []
    clusters[cluster].append(documents[i].page_content)

clusters

{24: ['VAV-D2-28 7A37VavOccupM active ',
  'VAV-D2-45 3A42VavOccupM active ',
  'VAV-D2-50 3A37VavOccupM active ',
  'VAV-D2-6 7A18VavOccupM active ',
  'VAV-D2-21 7A42VavOccupM active ',
  'VAV-D2-37 3A14VavOccupM active ',
  'VAV-D2-24 7A31VavOccupM active ',
  'VAV-D2-56 3A34VavOccupM active ',
  'VAV-D2-53 3A31VavOccupM active ',
  'VAV-D2-41 3A9VavOccupM active ',
  'VAV-D2-44 3A44VavOccupM active ',
  'VAV-D2-27 7A38VavOccupM active ',
  'VAV-D2-18 7A25VavOccupM active ',
  'VAV-D2-48 3A40VavOccupM active ',
  'VAV-D2-22 7A40VavOccupM active ',
  'VAV-D2-46 3A41CwClose active ',
  'VAV-D2-4 7A4VavOccupM active ',
  'VAV-D2-38 3A13VavOccupM active ',
  'VAV-D2-17 7A44VavOccupM active ',
  'VAV-D2-44 EF-33-S inactive ',
  'VAV-D2-44 EF-33-LOW-SPEED active ',
  'VAV-D2-3 7A19VavOccupM active ',
  'VAV-D2-10 7A7VavOccupM active ',
  'VAV-D2-2 7A20VavOccupM active ',
  'VAV-D2-32 3A3VavOccupM active ',
  'VAV-D2-26 7A35VavOccupM active ',
  'VAV-D2-55 3A35VavOccupM active ',
  'VAV-D2

In [77]:
import matplotlib.pyplot as plt
 
#filter rows of original data
filtered_label0 = embs[label == 0]
 
#plotting the results
plt.scatter(filtered_label0[:,0] , filtered_label0[:,1])
plt.show()

AttributeError: 'list' object has no attribute 'shape'