In [None]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
dataset = [
    ("hi", "greeting"),
    ("hello", "greeting"),
    ("how are you", "greeting"),
    ("what's up", "greeting"),
    ("bye", "farewell"),
    ("goodbye", "farewell"),
    ("see you later", "farewell"),
    ("tell me a joke", "joke"),
    ("make me laugh", "joke"),
    ("joke, please", "joke"),
    # Add more examples here...
]


In [None]:
def preprocess_text(text):
    # Tokenize the text and convert to lowercase
    words = nltk.word_tokenize(text.lower())

    # Remove stopwords and punctuation
    stop_words = set(stopwords.words("english"))
    words = [word for word in words if word.isalnum() and word not in stop_words]

    return " ".join(words)

X_train, y_train = [], []
for message, intent in dataset:
    X_train.append(preprocess_text(message))
    y_train.append(intent)


In [None]:
tfidf_vectorizer = TfidfVectorizer()
svm_classifier = SVC(kernel='linear', probability=True)
intent_classifier = make_pipeline(tfidf_vectorizer, svm_classifier)

# Fit the SVM model on the training data
intent_classifier.fit(X_train, y_train)


In [None]:
def get_intent(text):
    preprocessed_text = preprocess_text(text)
    intent = intent_classifier.predict([preprocessed_text])[0]
    return intent

def chat():
    print("Bot: Hi! I'm a simple chatbot. Type 'bye' to exit.")

    while True:
        user_input = input("You: ")
        if user_input.lower() == 'bye':
            print("Bot: Goodbye!")
            break

        intent = get_intent(user_input)
        if intent == "greeting":
            print("Bot: Hello! How can I help you?")
        elif intent == "farewell":
            print("Bot: Goodbye! Have a great day!")
        elif intent == "joke":
            print("Bot: Why don't scientists trust atoms? Because they make up everything!")
        else:
            print("Bot: Sorry, I don't understand that.")


In [None]:
if __name__ == "__main__":
    chat()


Bot: Hi! I'm a simple chatbot. Type 'bye' to exit.
You: Hi
Bot: Hello! How can I help you?
You: joke
Bot: Why don't scientists trust atoms? Because they make up everything!
You: bye
Bot: Goodbye!


In [None]:
import spacy

In [None]:
another_lib = spacy.load('en_core_web_sm')
another_lib.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [None]:
data = another_lib("Why don't scientists trust atoms? Because they make up everything")

In [None]:
for token in data:
  print(token , "|" ,spacy.explain(token.pos_) , "|" , token.lemma_)

Why | subordinating conjunction | why
do | auxiliary | do
n't | particle | not
scientists | noun | scientist
trust | noun | trust
atoms | noun | atom
? | punctuation | ?
Because | subordinating conjunction | because
they | pronoun | they
make | verb | make
up | adposition | up
everything | pronoun | everything
