In [None]:
import cv2 as cv # OpenCV computer vision library
import numpy as np # Scientific computing library 
import networkx as nx
import matplotlib.pyplot as plt
import json
import gtts
import os
from playsound import playsound
import speech_recognition as sr
from fuzzywuzzy import fuzz

### This function is used to detect all the objects in the scene

In [None]:
def detectAllObjects():
    
    #All the objects present in COCO dataset
    classes = ["background", "person", "bicycle", "car", "motorcycle",
      "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
      "unknown", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
      "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "unknown", "backpack",
      "umbrella", "unknown", "unknown", "handbag", "tie", "suitcase", "frisbee", "skis",
      "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
      "surfboard", "tennis racket", "bottle", "unknown", "wine glass", "cup", "fork", "knife",
      "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
      "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "unknown", "dining table",
      "unknown", "unknown", "toilet", "unknown", "tv", "laptop", "mouse", "remote", "keyboard",
      "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "unknown",
      "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ]

    # Colors we will use for the object labels
    colors = np.random.uniform(0, 255, size=(len(classes), 3))

    cam = cv.VideoCapture(0)
    
    #using trained model, which can be replaced by any object detection model
    pb  = 'frozen_inference_graph.pb'
    pbt = 'ssd_inception_v2_coco_2017_11_17.pbtxt'

    # Read the neural network
    cvNet = cv.dnn.readNetFromTensorflow(pb,pbt)   
    objectList = set() 

    # Reading the frame from webcam
    ret_val, img = cam.read()
    rows = img.shape[0]
    cols = img.shape[1]
    cvNet.setInput(cv.dnn.blobFromImage(img, size=(300, 300), swapRB=True, crop=False))

    # Running object detection
    cvOut = cvNet.forward()
    # Go through each object detected and label it
    detected_list = list()
    for detection in cvOut[0,0,:,:]:
        score = float(detection[2])
        if score > 0.3:
            ids = int(detection[1])  
            #Adding check to detect only specified objects from the list
            if classes[ids] == 'banana' or classes[ids] == 'person' or classes[ids] == 'knife' or classes[ids] == 'fork' or classes[ids] == 'bottle' or classes[ids] == 'cup' or classes[ids] == 'apple' or classes[ids] == 'cell phone' or classes[ids] == 'laptop' or classes[ids] == 'book':
                detected_list.append(classes[ids])
                
    # releasing resources
    cam.release()

    # Close down OpenCV
    cv.destroyAllWindows()
    return detected_list

### This function contains a simple sentence generation logic to speak about objects in the scene

In [None]:
def speakingObjectsInScene(detectedObjects):
        sentence = "I can see"
        vowelList = ['a','e','i','o','u']
        tempStr =" a "
        size = len(detectedObjects)
        if(size > 1) :
            itemCounter = 1
            for i in detectedObjects :
                if i[0] in vowelList:
                        tempStr = " an "
                if itemCounter == 1:
                    sentence = sentence + tempStr+i
                elif itemCounter == size :
                    sentence = sentence + " and"+ tempStr +i+"."
                else : sentence = sentence = sentence + ","+tempStr+i
                itemCounter += 1
                tempStr = " a "
        elif size == 1:
            if detectedObjects[0][0] in vowelList:
                tempStr = " an "
            sentence = sentence + tempStr +detectedObjects[0]
        else : 
            sentence = sentence + " nothing."
        #print(sentence)
        return sentence

### This function is responsible for detecting the risks in the scene and generating a sentence

In [None]:
def displayRisksInScene(detectedObjects):
    allRisksList = nx.all_neighbors(G,"risks")
    allRisksListnew = list()
    sentenceList = list()
    
    for i in allRisksList:    
        allRisksListnew.append(i)
        
    for i in detectedObjects:
        nlist = nx.neighbors(G,i)
        for j in nlist:
            for m in allRisksListnew:
                if j == m :
                    sentence = i+" "+ G.get_edge_data(i,j)["value"]+" "+j
                    sentenceList.append(sentence)
    sentence = ""
    size = len(sentenceList)
    if(size > 1) :
        itemCounter = 1
        for i in sentenceList :
            if itemCounter == 1:
                sentence = sentence + i
            elif itemCounter == size :
                sentence = sentence + " and "+i+"."
            else : sentence = sentence = sentence + ", "+i
            itemCounter += 1
        #print(sentence)
    elif size == 1: 
        sentence = sentence + sentenceList[0]+"."
        #print(sentence)
    
    electronicsPresent = False
    for i in nx.all_neighbors(G,"electronic"):
        #print(i,detectedObjects)
        if i in detectedObjects:
            electronicsPresent = True
            break
    
    consumingLiqPresent = False
    for i in nx.all_neighbors(G,"consuming liquids"):
        if i in detectedObjects:
            consumingLiqPresent = True
            break
    
    if electronicsPresent and consumingLiqPresent:
        if sentence != "":
            sentence = sentence + " Also, I can see electronic objects, careful not to spill anything."
        if sentence == "" :
            sentence = sentence + "I can see electronic objects, careful not to spill anything."
    elif sentence == "":
        sentence = sentence + "I do not see any risks."
    
    
    return sentence

### This function is responsible for detecting the edible items in the scene and generating a sentence

In [None]:
def displayEdiblesInScene(detectedObjects):
    allEdibleList = nx.all_neighbors(G,"edible")
    edibleList = list()
    for i in allEdibleList:
        for j in detectedObjects:
            if i == j:
                edibleList.append(i)
    sentence = speakingObjectsInScene(edibleList)
    return sentence

### This function is responsible for detecting the drinkable items in the scene and generating a sentence

In [None]:
def displayDrinkablesInScene(detectedObjects):
    alldrinkablesList = nx.all_neighbors(G,"consuming liquids")
    drinkableList = list()
    for i in alldrinkablesList:
        for j in detectedObjects:
            if i == j:
                drinkableList.append(i)
    sentence = speakingObjectsInScene(drinkableList)
    return sentence

### This function is responsible for detecting the location and generating a sentence

In [None]:
def displayLocation(detectedObjects):
    possibleLocationList = list()
    if len(detectedObjects) > 0:
        for i in detectedObjects:
            pLoc = G.nodes[i]['probable_location']
            if possibleLocationList.count(pLoc)==0:
                if pLoc != "any":
                    possibleLocationList.append(pLoc)
        #print(possibleLocationList)
        sentence = "I am not entirely sure but based on the things I can see, you could be in "
        count = 1
        for i in possibleLocationList:
            if count==1 :
                sentence = sentence  + i
            else : sentence = sentence + " or "+i
            count+=1
        #print(sentence)
    else : sentence = "I cannot see any thing nearby to make a logical decision"
    return sentence

### This function decides the function to call based on the question asked

In [None]:
def functionCaller(functionName):
    detectedObjects = detectAllObjects()
    
    if functionName == "allObjectsInScene": 
        say(speakingObjectsInScene(detectedObjects))
    
    if functionName == "displayRisksInScene": 
        say(displayRisksInScene(detectedObjects))
    
    if functionName == "displayEdiblesInScene": 
        say(displayEdiblesInScene(detectedObjects))
        
    if functionName == "displayDrinkablesInScene": 
        say(displayDrinkablesInScene(detectedObjects))
    
    if functionName == "displayLocation": 
        say(displayLocation(detectedObjects))
        
    if functionName == "Sorry I am Unable to understand": 
        say("Sorry I could not understand")    

### This function is for converting Text to Speech

In [None]:
def say(speech):
    print("Luna: ",speech)
    tts = gtts.gTTS(speech)
    tts.save("audio.mp3")
    playsound("audio.mp3")
    os.remove("audio.mp3")

### This function is for converting Speech to Text

In [None]:
def speechToText():
    with sr.Microphone() as source:
        print("Say something!")
        r = sr.Recognizer()
        audio = r.listen(source)
        
    # recognize speech using Google Speech Recognition
    try:
        convertedSpeechToText = r.recognize_google(audio) 
        print("User: " + convertedSpeechToText)
        return convertedSpeechToText.lower()
    except sr.UnknownValueError:
        print("Speech Recognition could not understand audio")
        return "thank you"
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))

### Activation function

In [None]:
 def activation():
    activationWord = ""    
    while activationWord not in activationWordsList :
        activationWord=speechToText()
    say("Hello Rishil")

### Constructing the knowledge Graph

In [None]:
#declaring file name
file = "Knowledge.json"

#declaring a Directional Graph
G = nx.DiGraph()

#general declarations of varaibles required 
edgeList = list(list())
nodesList = list(list())
nodeColorList = list()
edge_labels = dict()

#reading json file 
with open (file,"r") as x:
    data = json.load(x)
    relationship = data["relationship"]
    nodes = data["nodes"]
    keywords = data["keywords"]
    activationWordsList = data["activation"]
    for i in relationship:
        edgeList.append(i)
    for i in nodes:
        nodesList.append(i)



#adding code and assigning colors according to node type
for i in nodesList :
    G.add_node(i[0],type = i[1],probable_location = i[2])
    if(i[1]=='object') :
        nodeColorList.append('green')
    elif (i[1]=='property') :
        nodeColorList.append('cyan')
    else : nodeColorList.append('red')

#adding edges       
for i in edgeList :
    G.add_edge(i[0],i[1],value=i[2])
    edge_labels[(i[0],i[1])] = i[2]

#declaring type of graph     
pos = nx.spring_layout(G)

#drawing methods 
nx.draw(G, pos, edge_color='black', width=1, linewidths=10,node_size=250, node_color=nodeColorList, alpha=0.9,with_labels=True)
nx.draw_networkx_edge_labels(G,pos,edge_labels,label_pos=0.5)

plt.show()

### Initiating the Voice assistant and understing the questions

In [None]:
while True:
    activation()
    convertedSpeechToText = ""
    while convertedSpeechToText != "thank you" :
        if convertedSpeechToText == "":
            convertedSpeechToText = speechToText()
        if convertedSpeechToText != "thank you" :
            bestmatch = 0
            for i in keywords.keys():
                ratio = fuzz.ratio(convertedSpeechToText,i)
                if ratio>bestmatch:
                    bestmatch = ratio
                    functionName = keywords[i]
            if bestmatch<60:
                #print(bestmatch)
                functionName = "Sorry I am Unable to understand"
            
            #print(functionName,bestmatch)
            functionCaller(functionName)
            convertedSpeechToText = speechToText()