### GERALD , Conversational AI.


In [1]:
'''
GERALD , Conversational AI.

An open-source and open-domain conversation AI model that utilizes a sequential ensemble 
of retrieval- and generation-based systems to intelligently react to user queries while
maintaining structural and contextual relevance.
'''

# -*- coding: utf-8 -*-
# importing all required libraries
import tensorflow as tf
import keras
from keytotext import pipeline
import time 
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd
from nltk.tokenize import word_tokenize
import pyaudio
import pyttsx3
import speech_recognition as sr
import msvcrt as m
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sklearn.decomposition import PCA
import pickle as pk

#Initialize the objects here
model = SentenceTransformer('bert-base-nli-mean-tokens') #BERT Sentence Transformer
tokenizer = T5Tokenizer.from_pretrained("t5-large") #T5 Pretrained Tokenizer
model_2 = T5ForConditionalGeneration.from_pretrained("t5-large") #T5 Conditional Generated

  from .autonotebook import tqdm as notebook_tqdm
Global seed set to 42


In [2]:
# Reading the data from the files.
df = pd.read_csv('q_a_pairs.csv') #Question Answer pairs
q_pca = np.loadtxt('q_pca_embed.txt') #Vector embeddings of questions 
pca = pk.load(open("pca.pkl",'rb')) # pca model for reducing dimensions of user input

# dataframe datatype fixed, and irrelevant Index columns removed
df['Answer'] = df['Answer'].astype(str)
df = df[['Question', 'Answer']]

# Text Input model

In [15]:
# Input for the user to Initialize the conversation

#Type 'quit' to Exit

inp = input()

#while quit is not in the input, the code will execute the conditions accordingly.
while (inp!='quit'):
    target = model.encode(inp) #user input transformed in vector of length 768 
    target = target.reshape(1,-1) #vector representation reshaped
    target = pca.transform(target) #pca to reduce dimensions of user input
    match = cosine_similarity(q_pca,target) #cosine similarity of user input against dataset
    match = np.dstack(match)[0][0] #vector of cosine similarities
    max_index = np.argsort(match)[::-1][:3] #index of top 3 responses extracted
    
    
    
    #LAYER:1
    # If match > 90%, use top matched response and feed to generative model  
    if max(match) > 0.90:
        i = max_index[0] #index of top match
        #summarize function of t5 for a string containing data query and data response. 
        enc = 'summarize: '+ df['Question'].iloc[i] + ' ' + df['Answer'].iloc[i] #encoder
        input_ids = tokenizer(enc, return_tensors="pt").input_ids 
        #decoder,  max length cap 1.5(length of response)
        outputs = model_2.generate(input_ids, max_length = 1.5*len(df['Answer'].iloc[i])) #decoder
        print(tokenizer.decode(outputs[0], skip_special_tokens=True)) #model output 
        print('\n \n')
        
        
        
    #LAYER:2
    else:
     # else take top 3 matches and feed to generative model. 
        ret = 'summarize: '
        for i in max_index: #loop and generate a new response from each retrieved response and query pair 
            enc = 'summarize: '+ df['Question'].iloc[i] + ' ' + df['Answer'].iloc[i] #encoder
            input_ids = tokenizer(enc, return_tensors="pt").input_ids #
            outputs = model_2.generate(input_ids, max_length = 25) #decoder, max length cap 25
            ret += ' ' + tokenizer.decode(outputs[0], skip_special_tokens=True) #store generated response in a string
            
        input_ids_2 = tokenizer(ret, return_tensors="pt").input_ids
        outputs_2 = model_2.generate(input_ids_2, max_length = 60) #decoder, max length cap 60
        print(tokenizer.decode(outputs_2[0], skip_special_tokens=True)) #print output
        print('\n \n')
    inp = input()
    

Who is the author of Harry Potter
harry potter was first published by bloomsbury in 1913.

 

quit


### Speech to text function

In [5]:
# speech to text init 
r = sr.Recognizer() 
# function to listen from the user
def hear():
    print('speak now')
    with sr.Microphone() as source: 
        audio = r.listen(source)
        text = r.recognize_google(audio) # google audio recognize here

        return text

### Text to Speech function

In [6]:
# text to speech Init the voice property of the device
engine = pyttsx3.init()
engine.setProperty('rate', 150)
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[1].id) ## for male voice

# function to speck with run and wait
def speak(text):
    engine.say(text)
    engine.runAndWait()
    return

# Speech - Text Model

### Run this block of code if you wish to provide voice input and recieve audio output. 

In [None]:
# Input for the user to Initialize the conversation
#speak 'quit' to exit

print('Press enter to begin\n')
input()
# Listening fucntion defined here
inp = hear()
#while quit is not in the input, the code will execute the conditions accordingly.
while (inp!='quit'):
    print(inp)
    target = model.encode(inp)
    target = target.reshape(1,-1)
    target = pca.transform(target)
    match = cosine_similarity(q_pca,target)
    match = np.dstack(match)[0][0]
    max_index = np.argsort(match)[::-1][:3]
    # If it mathces 0.90 then it will generate matched sentences 
    if max(match) > 0.90:
        i = max_index[0]
        #ret = 'summarize: '
        enc = 'summarize: '+ df['Question'].iloc[i] + ' ' + df['Answer'].iloc[i]
        input_ids = tokenizer(enc, return_tensors="pt").input_ids
        outputs = model_2.generate(input_ids, max_length = 1.5*len(df['Answer'].iloc[i]))
        sent = (tokenizer.decode(outputs[0], skip_special_tokens=True))
    else:
     # else it will summarize and generate the relevent sentences
        ret = 'summarize: '
        for i in max_index:
            enc = 'summarize: '+ df['Question'].iloc[i] + ' ' + df['Answer'].iloc[i]
            input_ids = tokenizer(enc, return_tensors="pt").input_ids
            outputs = model_2.generate(input_ids, max_length = 25)
            ret += ' ' + tokenizer.decode(outputs[0], skip_special_tokens=True)
        input_ids_2 = tokenizer(ret, return_tensors="pt").input_ids
        outputs_2 = model_2.generate(input_ids_2, max_length = 60)
        sent = (tokenizer.decode(outputs_2[0], skip_special_tokens=True))
    print(sent)
    # speak fucntion is defined here
    speak(sent)
    print('Press enter to speak again\n')
    input()
    inp = hear()