In [36]:
import pandas as pd
import numpy as np
import random
import nltk

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.sentiment import SentimentIntensityAnalyzer

nltk.download("vader_lexicon")


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [37]:
df = pd.read_csv("Conversation.csv")

df = df[["question", "answer"]]
df.dropna(inplace=True)

df.head()


Unnamed: 0,question,answer
0,"hi, how are you doing?",i'm fine. how about yourself?
1,i'm fine. how about yourself?,i'm pretty good. thanks for asking.
2,i'm pretty good. thanks for asking.,no problem. so how have you been?
3,no problem. so how have you been?,i've been great. what about you?
4,i've been great. what about you?,i've been good. i'm in school right now.


In [38]:
answers = df["answer"].unique()
answer_to_id = {ans: i for i, ans in enumerate(answers)}
id_to_answer = {i: ans for ans, i in answer_to_id.items()}

df["label"] = df["answer"].map(answer_to_id)


In [39]:
X_train, X_test, y_train, y_test = train_test_split(
    df["question"],
    df["label"],
    test_size=0.2,
    random_state=42
)


In [40]:
vectorizer = TfidfVectorizer(ngram_range=(1,2), stop_words="english")
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_vec, y_train)


In [41]:
y_pred = clf.predict(X_test_vec)
print("Model Accuracy:", accuracy_score(y_test, y_pred))


Model Accuracy: 0.008053691275167786


In [42]:
embedder = SentenceTransformer("all-MiniLM-L6-v2")
sentiment = SentimentIntensityAnalyzer()

question_embeddings = embedder.encode(df["question"].tolist())


Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [43]:
GREETINGS = ["hi", "hello", "hey", "hii"]
GOODBYES = ["bye", "exit", "quit"]


In [44]:
def chatbot_reply(user_text):
    text = user_text.lower().strip()

    # 1Ô∏è‚É£ Simple rules
    if text in GREETINGS:
        return random.choice(["hey üëã", "hello üòä", "hi there"])

    if text in GOODBYES:
        return random.choice(["bye üëã", "take care üí´", "see you"])

    # 2Ô∏è‚É£ ML Model Prediction (TRAINED)
    vec = vectorizer.transform([user_text])
    probs = clf.predict_proba(vec)[0]
    confidence = np.max(probs)
    pred_label = np.argmax(probs)

    if confidence >= 0.55:
        return id_to_answer[pred_label]

    # 3Ô∏è‚É£ Semantic fallback (dataset-based)
    user_emb = embedder.encode([user_text])
    sims = cosine_similarity(user_emb, question_embeddings)
    best_idx = sims.argmax()

    if sims[0][best_idx] >= 0.5:
        return df.iloc[best_idx]["answer"]

    # 4Ô∏è‚É£ Sentiment fallback (last resort)
    sent = sentiment.polarity_scores(user_text)["compound"]

    if sent <= -0.3:
        return random.choice([
            "yeah that sounds rough",
            "i get why that feels bad",
            "that‚Äôs not great honestly"
        ])

    if sent >= 0.3:
        return random.choice([
            "okay that‚Äôs nice",
            "sounds good üòÑ",
            "love that"
        ])

    return random.choice([
        "hmm okay",
        "tell me more",
        "yeah, i‚Äôm listening"
    ])


In [45]:
print("Chatbot ready üí¨ (type 'exit' to stop)\n")

while True:
    user = input("You: ")

    if user.lower() == "exit":
        print("Bot: bye üëã")
        break

    print("Bot:", chatbot_reply(user))


Chatbot ready üí¨ (type 'exit' to stop)

You: hi
Bot: hello üòä
You: how are you
Bot: i'm doing great. what about you?
You: im great
Bot: i've been good. i'm in school right now.
You: acha?
Bot: yeah, i‚Äôm listening
You: ok i am a bit sad
Bot: tell me more
You: nothing
Bot: there must be something on tv!
You: no
Bot: how do you know?
You: i saw
Bot: i didn't do anything.
You: i know
Bot: but you're doing 75.
You: leave
Bot: but we just got here.
You: bye
Bot: take care üí´
You: stop
Bot: no, you didn't. you slowed down, but you didn't come to a full stop.
You: exit
Bot: bye üëã
