In [1]:
# required libraries

import numpy as np
import nltk
import string
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# for importing and reading corpus

f = open('data.txt', 'r', errors='ignore')
rawdata = f.read()
rawdata = rawdata.lower()    #coverts text of datafile into lowercase
nltk.download('punkt')
nltk.download('wordnet')
sent_tokens = nltk.sent_tokenize(rawdata)   #converts data to list of sentences
word_tokens = nltk.word_tokenize(rawdata)   #converts data to list of words

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shikh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\shikh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
sent_tokens[:3]

['gaganyaan is an indian crewed orbital spacecraft intended to be the formative spacecraft of the indian human spaceflight programme.',
 'the spacecraft is being designed to carry three people, and a planned upgraded version will be equipped with rendezvous and docking capability.',
 "in its maiden crewed mission, indian space research organisation (isro)'s largely autonomous 5.3 t (12,000 lb) capsule will orbit the earth at 400 km (250 mi) altitude for up to seven days with a two or three-person crew on board."]

In [4]:
word_tokens[:5]

['gaganyaan', 'is', 'an', 'indian', 'crewed']

In [5]:
# for text preprocessing

lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

In [6]:
# definition of greet function

greet_inputs = ("hi", "hello", "hey", "greetings", "hola", "wassup", "sup", "what's up")
greet_responses = ["hey", "hi", "nods", "hi there!", "hello", "I am glad that you are talking to me!"]

def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_responses)

In [7]:
# for response generation

def response(user_response):
    robo1_response = ''
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if req_tfidf==0:
        robo1_response = robo1_response + "I am sorry! I'm not able to understand you friend"
        return robo1_response
    else:
        robo1_response = robo1_response + sent_tokens[idx]
        return robo1_response

In [8]:
flag = True
print("BOT: My name is Goofy. Let's have a coversation! If you want to exit, just type bye")
while flag==True:
    user_response = input()
    user_response = user_response.lower()
    if user_response!='bye':
        if user_response=='thank you' or user_response=='thanks':
            flag = False
            print("BOT: You are welcome, friend!")
        else:
            if greet(user_response)!=None:
                print("BOT: " + greet(user_response))
            else:
                sent_tokens.append(user_response)
                word_tokens = word_tokens + nltk.word_tokenize(user_response)
                final_words = list(set(word_tokens))
                print("BOT: ", end="")
                print(response(user_response))
                sent_tokens.remove(user_response)
                
    else:
        flag = False
        print("BOT: It was my pleasure in assisting you. Goodbye!")

BOT: My name is Goofy. Let's have a coversation! If you want to exit, just type bye
hey
BOT: hey
hi
BOT: hi there!
mission
BOT: 



"india's first manned mission gaganyaan may take 1 astronaut".
contents
BOT: [14]


contents
1	history
1.1	funding and infrastructure
2	description
2.1	crew module
2.2	service module
3	development
3.1	test flights
3.2	test flight profile
4	testing
4.1	crew module atmospheric re-entry experiment
4.2	pad abort test
4.3	long duration hot test - vikas engine
4.4	service module propulsion system
5	vyommitra
6	see also
7	references
history
preliminary studies and technological development of gaganyaan started in 2006 under the generic name "orbital vehicle".
crew module
BOT: the crew module is mated to the service module, and together they constitute 8.2 t (18,000 lb) orbital module.
development
BOT: development
following two non-crewed orbital flight demonstrations of the spacecraft, a crewed gaganyaan is slated to be launched on the gslv mk iii launcher no earlier than 2023.
spacecraft
BOT: gaganyaan is an indian crewed orbital spacecraft intended to be the formative spacecraft of the indi