In [57]:
import nltk
import random
import numpy as np
import pandas as pd
import string
import datetime
import schedule
import time

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from torch.optim import AdamW


In [21]:
data = {
    "intent": [
        "greeting", "greeting",
        "goodbye", "goodbye",
        "thanks", "thanks",
        "loan_info", "loan_info",
        "support", "support"
    ],
    "text": [
        "hello", "hi",
        "bye", "see you",
        "thank you", "thanks a lot",
        "how can i apply for loan", "loan eligibility",
        "i need help", "customer support"
    ]
}

responses = {
    "greeting": ["Hello! How can I help you?", "Hi there!"],
    "goodbye": ["Goodbye!", "Have a nice day!"],
    "thanks": ["You're welcome!", "Happy to help!"],
    "loan_info": ["You can apply for a loan via our website."],
    "support": ["Please describe your issue. Our support team will assist you."]
}

df = pd.DataFrame(data)


In [22]:
def preprocess(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text


In [23]:
df['clean_text'] = df['text'].apply(preprocess)

In [24]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['clean_text'])
y = df['intent']

In [25]:
model = LogisticRegression()
model.fit(X, y)

In [26]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

MODEL_NAME = "bert-base-uncased"

tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=len(set(df['intent']))
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [27]:

optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()

X_texts = df['clean_text'].tolist()
y_labels = pd.factorize(df['intent'])[0]

for epoch in range(3):
    model.train()
    inputs = bert_encode(X_texts)
    labels = torch.tensor(y_labels).to(device)

    inputs = {k: v.to(device) for k, v in inputs.items()}

    outputs = model(**inputs, labels=labels)
    loss = outputs.loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch {epoch+1} | Loss: {loss.item():.4f}")


Epoch 1 | Loss: 1.6268
Epoch 2 | Loss: 1.5920
Epoch 3 | Loss: 1.5595


In [28]:
intent_labels = list(pd.unique(df['intent']))

def predict_intent_bert(text):
    model.eval()
    inputs = bert_encode([text])
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    probs = torch.softmax(outputs.logits, dim=1)
    confidence, pred = torch.max(probs, dim=1)

    return intent_labels[pred.item()], confidence.item()


In [29]:
conversation_context = []

MAX_CONTEXT = 3

def update_context(user_text, intent):
    conversation_context.append((user_text, intent))
    if len(conversation_context) > MAX_CONTEXT:
        conversation_context.pop(0)


In [30]:
def contextual_input(user_text):
    context_text = " ".join([u for u, _ in conversation_context])
    return context_text + " " + user_text

In [31]:
def chatbot_response_transformer(user_text):
    enriched_input = contextual_input(user_text)

    intent, confidence = predict_intent_bert(enriched_input)

    update_context(user_text, intent)

    if confidence < 0.5:
        return "Could you please clarify that?"

    return random.choice(responses[intent])


In [32]:
def bert_encode(texts, max_len=64):
    return tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=max_len,
        return_tensors="pt"
    )

In [33]:
def chatbot_response(user_input):
    user_input = preprocess(user_input)
    vector = vectorizer.transform([user_input])

    intent = model.predict(vector)[0]
    return random.choice(responses[intent])

In [35]:
print("Chatbot is running (type 'exit' to stop)")

while True:
    user = input("You: ")

    if user.lower() == "exit":
        print("Bot: Goodbye!")
        break

    print("Bot:", chatbot_response_transformer(user))

Chatbot is running (type 'exit' to stop)
You: hello
Bot: Could you please clarify that?
You: goodbye
Bot: Could you please clarify that?
You: i need help
Bot: Could you please clarify that?
You: exit
Bot: Goodbye!


In [36]:
def chatbot_response_advanced(user_input):
    user_input = preprocess(user_input)
    vector = vectorizer.transform([user_input])

    probabilities = model.predict_proba(vector)
    confidence = np.max(probabilities)

    if confidence < 0.4:
        return "I'm not sure I understood that. Can you rephrase?"

    intent = model.classes_[np.argmax(probabilities)]
    return random.choice(responses[intent])


In [37]:
def add_new_intent(text, intent):
    df.loc[len(df)] = [intent, text, preprocess(text)]

In [38]:
pip install nltk scikit-learn pandas numpy schedule pyttsx3


Collecting schedule
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Collecting pyttsx3
  Downloading pyttsx3-2.99-py3-none-any.whl.metadata (6.2 kB)
Downloading schedule-1.2.2-py3-none-any.whl (12 kB)
Downloading pyttsx3-2.99-py3-none-any.whl (32 kB)
Installing collected packages: pyttsx3, schedule
Successfully installed pyttsx3-2.99 schedule-1.2.2


In [39]:
data = {
    "intent": [
        "greeting", "greeting",
        "time_query", "time_query",
        "set_reminder", "set_reminder",
        "schedule_meeting", "schedule_meeting",
        "goodbye", "goodbye"
    ],
    "text": [
        "hello", "hi",
        "what is the time", "tell me current time",
        "remind me to study", "set a reminder",
        "schedule a meeting", "add event to calendar",
        "bye", "exit"
    ]
}

responses = {
    "greeting": ["Hello! How can I help you today?"],
    "time_query": ["The current time is {time}"],
    "set_reminder": ["Reminder has been set successfully."],
    "schedule_meeting": ["Your meeting has been scheduled."],
    "goodbye": ["Goodbye! Have a productive day."]
}

df = pd.DataFrame(data)


In [40]:
def preprocess(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text


In [41]:
df["clean_text"] = df["text"].apply(preprocess)

In [42]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["clean_text"])
y = df["intent"]

In [43]:
intent_model = LogisticRegression()
intent_model.fit(X, y)

In [44]:
conversation_memory = []

def update_memory(user_text, intent):
    conversation_memory.append((user_text, intent))
    if len(conversation_memory) > 5:
        conversation_memory.pop(0)


In [45]:
def get_time():
    return datetime.datetime.now().strftime("%H:%M:%S")


In [46]:
def reminder_task(message):
    print(" REMINDER:", message)

def set_reminder(message, seconds=10):
    schedule.every(seconds).seconds.do(reminder_task, message)


In [47]:
calendar = []

def schedule_meeting(title, date):
    calendar.append({"title": title, "date": date})


In [48]:
def assistant_response(user_input):
    clean = preprocess(user_input)
    vector = vectorizer.transform([clean])

    intent = intent_model.predict(vector)[0]
    update_memory(user_input, intent)

    if intent == "time_query":
        return responses[intent][0].format(time=get_time())

    elif intent == "set_reminder":
        set_reminder("This is your reminder!", 15)
        return responses[intent][0]

    elif intent == "schedule_meeting":
        schedule_meeting("Team Meeting", "Tomorrow 10 AM")
        return responses[intent][0]

    elif intent == "goodbye":
        return responses[intent][0]

    return random.choice(responses[intent])


In [49]:
def learn_new_phrase(text, intent):
    df.loc[len(df)] = [intent, text, preprocess(text)]

    X_new = vectorizer.fit_transform(df["clean_text"])
    y_new = df["intent"]

    intent_model.fit(X_new, y_new)


In [54]:
!apt-get install espeak-ng
import pyttsx3

engine = pyttsx3.init()

def speak(text):
    engine.say(text)
    engine.runAndWait()


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0
The following NEW packages will be installed:
  espeak-ng espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0
0 upgraded, 5 newly installed, 0 to remove and 41 not upgraded.
Need to get 4,526 kB of archives.
After this operation, 11.9 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libpcaudio0 amd64 1.1-6build2 [8,956 B]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libsonic0 amd64 0.2.0-11build1 [10.3 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 espeak-ng-data amd64 1.50+dfsg-10ubuntu0.1 [3,956 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libespeak-ng1 amd64 1.50+dfsg-10ubuntu0.1 [207 kB]
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 espeak-ng amd64 1.50+dfsg-1

In [56]:
import schedule
import time # ensure time is also imported if it wasn't, although the error specifically mentioned schedule

print(" AI Virtual Assistant Started (type 'exit' to stop)")

while True:
    user = input("You: ")

    if user.lower() == "exit":
        print("Assistant: Goodbye!")
        break

    response = assistant_response(user)
    print("Assistant:", response)
    speak(response)

    schedule.run_pending()
    time.sleep(1)


 AI Virtual Assistant Started (type 'exit' to stop)
You: Hello
Assistant: Hello! How can I help you today?
You: YES SURE
Assistant: Hello! How can I help you today?
You: EXIT
Assistant: Goodbye!
