<a href="https://colab.research.google.com/github/shivanktyagi001/NLP/blob/main/NLP_based_ChatBot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
import string

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression


In [None]:
training_data = [
    ("hello", "greet"),
    ("hi", "greet"),
    ("hello there", "greet"),
    ("hi there", "greet"),
    ("hey", "greet"),
    ("good morning", "greet"),

    ("what's the weather today", "weather"),
    ("what's the temperature", "weather"),
    ("how's the weather today", "weather"),
    ("is it raining", "weather"),
    ("tell me temperature", "weather"),

    ("open google", "open_web"),
    ("open facebook", "open_web"),
    ("open youtube", "open_web"),
    ("go to google", "open_web"),

    ("bye", "exit"),
    ("goodbye", "exit"),
    ("exit", "exit")
]


In [None]:
sentences = []
labels = []
for text,intent in training_data:
  sentences.append(text)
  labels.append(intent)

In [None]:
def preprocess_text(documents):
    english_stopwords = stopwords.words("english")
    punctuations = string.punctuation
    cleaned_documents = []
    for doc in documents:
        # Step-1 : Lowercase
        raw_text = doc.lower()
        # print("After lowercase: ",raw_text)

        tokens = word_tokenize(raw_text)
        # print("Tokens:",tokens)

        filtered_tokens = []
        for word in tokens:
            if word not in english_stopwords:
                filtered_tokens.append(word)

        # print("Filtered Tokens :",filtered_tokens)

        clean_tokens = [word for word in filtered_tokens if word not in punctuations]
        # print("After removing punctuations:",clean_tokens)

        lemmatized_words = []
        wnet = WordNetLemmatizer()
        for word in clean_tokens:
            lemmatized_words.append(wnet.lemmatize(word,"v"))

        # print("After Lemmatization :",lemmatized_words)

        final_tokens = []
        for word in lemmatized_words:
            if word.isalpha():
                final_tokens.append(word)

        # print("Final Tokens:",final_tokens)

        cleaned_text = " ".join(final_tokens)
        # print("Cleaned Text:",cleaned_text)

        cleaned_documents.append(cleaned_text)
        # print("="*50)
    return cleaned_documents

In [None]:
cleaned_data = preprocess_text(sentences)

In [None]:
cleaned_data

['hello',
 'hi',
 'hello',
 'hi',
 'hey',
 'good morning',
 'weather today',
 'temperature',
 'weather today',
 'rain',
 'tell temperature',
 'open google',
 'open facebook',
 'open youtube',
 'go google',
 'bye',
 'goodbye',
 'exit']

In [None]:
vectorize = TfidfVectorizer()
X = vectorize.fit_transform(cleaned_data)

In [None]:
print("vocabulary:",vectorize.get_feature_names_out())

vocabulary: ['bye' 'exit' 'facebook' 'go' 'good' 'goodbye' 'google' 'hello' 'hey' 'hi'
 'morning' 'open' 'rain' 'tell' 'temperature' 'today' 'weather' 'youtube']


In [None]:
print("TF_IDF MAtrix:")
print(X.toarray())

TF_IDF MAtrix:
[[0.         0.         0.         0.         0.         0.
  0.         1.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         1.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         1.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         1.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         1.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.   

In [None]:
model = LogisticRegression()
model.fit(X,labels)

In [None]:
user_msg = "how to know that there will be conlict or not?"
processed = preprocess_text([user_msg])
user_vector = vectorize.transform(processed)
prediction = model.predict(user_vector)
print("Prediction is: :",prediction)

Prediction is: : ['greet']
