To convert natural language voice into a SPARQL query using Python, you need to integrate various components, including speech recognition, natural language understanding, and query generation. Here's a detailed step-by-step guide with example codes for each stage:

Step 1: Speech Recognition
Start by transcribing the spoken words into text using speech recognition. The SpeechRecognition library is commonly used for this task.

In [11]:
import speech_recognition as sr

def recognize_speech():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Speak:")
        audio = recognizer.listen(source)

    try:
        text = recognizer.recognize_google(audio)
        print("You said:", text)
        return text
    except sr.UnknownValueError:
        print("Speech recognition could not understand audio.")
    except sr.RequestError as e:
        print("Could not request results from speech recognition service; {0}".format(e))

# Call the function to get the recognized text
recognized_text = recognize_speech()


Speak:
You said: the properties about Apollo 7


Step 2: Natural Language Understanding (NLU)
Perform natural language understanding to extract intent and entities from the recognized text. Libraries like spaCy or NLTK can be used for this purpose.

In [12]:
import spacy

def extract_intent_entities(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    
    # Extract intent
    intent = None
    for token in doc:
        if token.pos_ == "VERB":
            intent = token.lemma_
            break
    
    # Extract entities
    entities = []
    for entity in doc.ents:
        entities.append((entity.text, entity.label_))
    
    return intent, entities

# Call the function to extract intent and entities
intent, entities = extract_intent_entities(recognized_text)


Step 3: Query Generation
Based on the extracted intent and entities, generate a SPARQL query. Define query templates and fill in the entities as necessary.

In [13]:
def generate_sparql_query(intent, entities):
    if intent == "find":
        # Query template for finding information
        entity = None
        for ent_text, ent_type in entities:
            if ent_type == "PERSON" or ent_type == "ORG":
                entity = ent_text
                break
        if entity:
            sparql_query = f"SELECT ?property ?value WHERE {{ <{entity}> ?property ?value }}"
        else:
            sparql_query = "No entity found for the query."
    elif intent == "count":
        # Query template for counting entities
        entity_type = None
        for ent_text, ent_type in entities:
            if ent_type == "PERSON" or ent_type == "ORG":
                entity_type = ent_type.lower()
                break
        if entity_type:
            sparql_query = f"SELECT (COUNT(?entity) AS ?count) WHERE {{ ?entity rdf:type dbpedia:{entity_type} }}"
        else:
            sparql_query = "No entity type found for the query."
    else:
        sparql_query = "Intent not supported for the query."

    return sparql_query

# Call the function to generate the SPARQL query
sparql_query = generate_sparql_query(intent, entities)


In [9]:
sparql_query = generate_sparql_query(intent, entities)

In [14]:
sparql_query

'Intent not supported for the query.'

Putting it all together:

In [15]:
import speech_recognition as sr
import spacy

def recognize_speech():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Speak:")
        audio = recognizer.listen(source)

    try:
        text = recognizer.recognize_google(audio)
        print("You said:", text)
        return text


SyntaxError: incomplete input (3212650815.py, line 13)

In [None]:
import requests
import json
import time
while True:
    url0 = "https://thingsboard.cs.cf.ac.uk/api/auth/login"
    headers = {"Content-Type": "application/json"}

    payload = {
        "username": "SuhasAbacwsLivingLab@cardiff.ac.uk",
        "password": "SuhasDevmane"
    }

    response = requests.post(url0, json=payload, headers=headers)

    if response.status_code == 200:
        # Authentication successful
        token = response.json().get("token")
        print("JWT_Token:", token)
    else:
        print("Authentication failed. Status Code:", response.status_code)
        print("Response:", response.text)

    access_token = token  
    keys_to_fetch = [
        "UV_light", "Loudness", "PM1.0Atmospheric", "PM2.5Atmospheric", "PM10Atmospheric", 
        "PIR_Status", "tVOC_Concentration", "CO2eq_Concentration", "MQ2_sensor_voltage", 
        "MQ2_Rs_ratio", "MQ2_Rs/R0_Ratio", "MQ3_sensor_voltage", "MQ3_Rs_ratio", 
        "MQ3_Rs/R0_Ratio", "HCHO_ppm", "Air_Quality", "Light_value", "Visible_light", 
        "IR_light", "MQ5_sensor_voltage", "MQ5_Rs_ratio", "MQ5_Rs/R0_Ratio", 
        "MQ9_sensor_voltage", "MQ9_Rs_ratio", "MQ9_Rs/R0_Ratio", "O2_22.10_N2_gas", 
        "C2H5CH_gas", "VOC_gas", "CO_gas", "Co2", "Temperature", "Humidity", "Luminance"
    ]

    units = {
        "UV_light": "UV Index",
        "Loudness": "dB",
        "PM1.0Atmospheric": "µg/m³",
        "PM2.5Atmospheric": "µg/m³",
        "PM10Atmospheric": "µg/m³",
        "PIR_Status": "",  # Assuming it's a status indicator with no specific unit
        "tVOC_Concentration": "ppb",
        "CO2eq_Concentration": "ppm",
        "MQ2_sensor_voltage": "Volts",
        "MQ2_Rs_ratio": "",
        "MQ2_Rs/R0_Ratio": "",
        "MQ3_sensor_voltage": "Volts",
        "MQ3_Rs_ratio": "",
        "MQ3_Rs/R0_Ratio": "",
        "HCHO_ppm": "ppm",
        "Air_Quality": "",  # Assuming it's a qualitative measure with no specific unit
        "Light_value": "Lux",
        "Visible_light": "Lux",
        "IR_light": "Lux",
        "MQ5_sensor_voltage": "Volts",
        "MQ5_Rs_ratio": "",
        "MQ5_Rs/R0_Ratio": "",
        "MQ9_sensor_voltage": "Volts",
        "MQ9_Rs_ratio": "",
        "MQ9_Rs/R0_Ratio": "",
        "O2_22.10_N2_gas": "",  # Assuming it's a gas measurement with no specific unit
        "C2H5CH_gas": "",  # Assuming it's a gas measurement with no specific unit
        "VOC_gas": "",  # Assuming it's a gas measurement with no specific unit
        "CO_gas": "",  # Assuming it's a gas measurement with no specific unit
        "Co2": "",  # Assuming it's a gas measurement with no specific unit
        "Temperature": "°C",
        "Humidity": "%",
        "Luminance": "cd/m²"
    }

    pairs = [
            ('70ad22a0-b82c-11ed-b196-bb47e24272bc','node_5.01'),
            ('75d29440-b82c-11ed-b196-bb47e24272bc','node_5.02'),
            ('7b717ba0-b82c-11ed-b196-bb47e24272bc','node_5.03'),
            ('a673eb80-b82c-11ed-b196-bb47e24272bc','node_5.04'),
            ('83456b70-b82c-11ed-b196-bb47e24272bc','node_5.05'),
            ('b96d6720-b82c-11ed-b196-bb47e24272bc','node_5.06'),
            ('be98a520-b82c-11ed-b196-bb47e24272bc','node_5.07'),
            ('c3110de0-b82c-11ed-b196-bb47e24272bc','node_5.08'),
            ('c950f030-b82c-11ed-b196-bb47e24272bc','node_5.09'),
            ('cfddba00-b82c-11ed-b196-bb47e24272bc','node_5.10'),
            ('278505c0-0f7a-11ee-bf90-a16a1a9e1e0a','node_5.11'),
            ('d9576a90-b82c-11ed-b196-bb47e24272bc','node_5.12'),
            ('de18ea40-b82c-11ed-b196-bb47e24272bc','node_5.13'),
            ('f57a1560-7cf3-11ee-94bc-d389020903a3','node_5.14'),
            ('508d1b60-57eb-11ee-8714-19d56ba0c4fd','node_5.15'),
            ('86c63bd0-57f0-11ee-8714-19d56ba0c4fd','node_5.16'),
            ('3efd82d0-7cf4-11ee-94bc-d389020903a3','node_5.17'),
            ('f583bc50-57e6-11ee-8714-19d56ba0c4fd','node_5.18'),
            ('9458c560-0f75-11ee-bf90-a16a1a9e1e0a','node_5.19'),
            ('cbc851c0-57ee-11ee-8714-19d56ba0c4fd','node_5.20'),
            ('2ae959b0-53c6-11ee-8714-19d56ba0c4fd','node_5.21'),
            ('351b0eb0-57ef-11ee-8714-19d56ba0c4fd','node_5.22'),
            ('0f96bed0-b82d-11ed-b196-bb47e24272bc','node_5.23'),
            ('13e642d0-b82d-11ed-b196-bb47e24272bc','node_5.24'),
            ('18a159e0-b82d-11ed-b196-bb47e24272bc','node_5.25'),
            ('fef50770-57f1-11ee-8714-19d56ba0c4fd','node_5.26'),
            ('2a5d9a90-b82d-11ed-b196-bb47e24272bc','node_5.27'),
            ('99c6a3b0-b82b-11ed-b196-bb47e24272bc','node_5.28'),
            ('51f2d170-57e1-11ee-8714-19d56ba0c4fd','node_5.29'),
            ('9c563630-0f75-11ee-bf90-a16a1a9e1e0a','node_5.30'),
            ('391303e0-b82d-11ed-b196-bb47e24272bc','node_5.31'),
            ('3d3a3f60-b82d-11ed-b196-bb47e24272bc','node_5.32'),
            ('a83a46c0-7cf4-11ee-94bc-d389020903a3','node_5.33'),
            ('4665a8e0-b82d-11ed-b196-bb47e24272bc','node_5.34')
        
    ]

    # Keep track of the last received timestamp
    last_ts = None

    # Run the code for each pair
    for device_id, node_name in pairs:
        url1 = f"https://thingsboard.cs.cf.ac.uk/api/plugins/telemetry/DEVICE/{device_id}/values/timeseries?keys={','.join(keys_to_fetch)}"
        headers = {
            'Content-Type': 'application/json',
            'X-Authorization': f'Bearer {access_token}'
        }

        response = requests.get(url1, headers=headers)

        if response.status_code == 200:
            data = response.json()

            # Remove the extra brackets and format the data
            formatted_data = {}
            for key in data:
                if key in units:
                    try:
                        value = float(data[key][0]["value"])
                    except ValueError:
                        value = data[key][0]["value"]  # If not numeric, keep the original value

                    formatted_data[key.lower()] = {
                        "value": value,
                        "units": units[key]
                    }

            # Check if the current timestamp is different from the last one
            current_ts = data[key][0]["ts"]
            if current_ts != last_ts:
                print(f"Received data for {device_id}:{node_name}:")
                print(json.dumps(formatted_data, indent=2))

                # ... (your code for sending data)

                try:
                    # time.sleep(1)
                    url2 = f'http://visualiser:80/api/devices/{node_name}/data'
                    response = requests.put(url2, headers=headers, data=json.dumps(formatted_data))

                    if response.status_code == 200:
                        json_data = response.json()
                        # Process the JSON data as needed
                        print(json_data)

                        # Update the last timestamp
                        last_ts = current_ts
                    else:
                        print(f"Error: {response.status_code}")
                        print(response.text)

                except requests.exceptions.RequestException as e:
                    print(f"Request error: {e}")
            else:
                print(f"Received data with the same timestamp for {device_id}:{node_name}. Skipping...")
        else:
            print(f"Failed to get data for {device_id}:{node_name}. Status Code: {response.status_code}")
            print(response.text)
    time.sleep(5)
