In [1]:
# Install required libraries
!pip install kagglehub pandas numpy scikit-learn nltk

# Import necessary modules
import kagglehub
import pandas as pd
import numpy as np
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
nltk.download('punkt')




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [2]:
# Download customer-support dataset from Kaggle
path1 = kagglehub.dataset_download("thoughtvector/customer-support-on-twitter")
print("Dataset 1 Path:", path1)

# Download simple chatbot dialogs dataset
path2 = kagglehub.dataset_download("grafstor/simple-dialogs-for-chatbot")
print("Dataset 2 Path:", path2)


Downloading from https://www.kaggle.com/api/v1/datasets/download/thoughtvector/customer-support-on-twitter?dataset_version_number=10...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 169M/169M [00:01<00:00, 147MB/s]

Extracting files...





Dataset 1 Path: /root/.cache/kagglehub/datasets/thoughtvector/customer-support-on-twitter/versions/10
Downloading from https://www.kaggle.com/api/v1/datasets/download/grafstor/simple-dialogs-for-chatbot?dataset_version_number=2...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 59.3k/59.3k [00:00<00:00, 36.8MB/s]

Extracting files...
Dataset 2 Path: /root/.cache/kagglehub/datasets/grafstor/simple-dialogs-for-chatbot/versions/2





In [4]:
import os

# Find CSV/JSON files in the folders
files1 = [f for f in os.listdir(path1) if f.endswith(('.csv', '.json'))]
files2 = [f for f in os.listdir(path2) if f.endswith(('.csv', '.json'))]
print("Files in Dataset 1:", files1)
print("Files in Dataset 2:", files2)

# Load one file from each (adjust filename based on printed list)
df1 = pd.read_csv(os.path.join(path1, files1[0]))

# Only load df2 if there are files in the directory
df2 = None
if files2:
    df2 = pd.read_csv(os.path.join(path2, files2[0]))


print("Dataset 1 shape:", df1.shape)
if df2 is not None:
    print("Dataset 2 shape:", df2.shape)
else:
    print("Dataset 2 could not be loaded as no CSV or JSON files were found.")

df1.head()

Files in Dataset 1: ['sample.csv']
Files in Dataset 2: []
Dataset 1 shape: (93, 7)
Dataset 2 could not be loaded as no CSV or JSON files were found.


Unnamed: 0,tweet_id,author_id,inbound,created_at,text,response_tweet_id,in_response_to_tweet_id
0,119237,105834,True,Wed Oct 11 06:55:44 +0000 2017,@AppleSupport causing the reply to be disregar...,119236.0,
1,119238,ChaseSupport,False,Wed Oct 11 13:25:49 +0000 2017,@105835 Your business means a lot to us. Pleas...,,119239.0
2,119239,105835,True,Wed Oct 11 13:00:09 +0000 2017,@76328 I really hope you all change but I'm su...,119238.0,
3,119240,VirginTrains,False,Tue Oct 10 15:16:08 +0000 2017,@105836 LiveChat is online at the moment - htt...,119241.0,119242.0
4,119241,105836,True,Tue Oct 10 15:17:21 +0000 2017,@VirginTrains see attached error message. I've...,119243.0,119240.0


In [6]:
# Let's assume the main columns are 'text' and 'response'
# You can rename them if dataset columns differ

df = df1[['text', 'response_tweet_id']].rename(columns={'text': 'question', 'response_tweet_id': 'response'})
df.dropna(inplace=True)
df = df.head(1000)  # sample 1000 pairs for quick training

# Text preprocessing
df['question'] = df['question'].str.lower()
df['response'] = df['response'].str.lower()

print(df.sample(5))

                                             question response
65  @105854 i am sorry for the poor experience. ca...   119305
83  @105857 we'd like to help if we can. when did ...   119324
44  @spotifycares problem has come back again toda...   119286
90  @105861 hey sara, sorry to hear of the issues ...   119333
68  @105855 hi thomas, if a colleague believes the...   119309


In [7]:
# Convert all questions into TF-IDF vectors
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['question'])

def chatbot_response(user_input):
    user_input = user_input.lower()
    user_vec = vectorizer.transform([user_input])
    similarity = cosine_similarity(user_vec, X)
    idx = similarity.argmax()
    if similarity.max() < 0.2:
        return "I'm sorry, I didn‚Äôt quite get that. Could you rephrase?"
    return df.iloc[idx]['response']


In [8]:
print("ü§ñ Chatbot is ready! Type 'bye' to exit.")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "exit", "quit"]:
        print("Bot: Bye! Have a great day üòä")
        break
    print("Bot:", chatbot_response(user_input))


ü§ñ Chatbot is ready! Type 'bye' to exit.
You: BYE
Bot: Bye! Have a great day üòä


In [9]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m10.1/10.1 MB[0m [31m75.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m101.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.50.0


In [10]:
import streamlit as st

st.title("üí¨ Customer Support Chatbot")

user_input = st.text_input("You:", "")
if user_input:
    response = chatbot_response(user_input)
    st.write("ü§ñ Bot:", response)


2025-10-24 04:32:16.648 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-10-24 04:32:16.657 Session state does not function when running a script without `streamlit run`


In [11]:
!streamlit run app.py


Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Invalid value: File does not exist: app.py


In [12]:
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = re.sub(r'[^a-zA-Z ]', '', text.lower())
    tokens = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]
    return ' '.join(tokens)

df['question'] = df['question'].apply(clean_text)
df['response'] = df['response'].apply(clean_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [13]:
def chatbot_response(user_input):
    user_input = user_input.lower()
    if any(g in user_input for g in ["hi", "hello", "hey"]):
        return "Hey there! How can I assist you today?"
    if "bye" in user_input:
        return "Goodbye! Have a wonderful day üí´"

    user_vec = vectorizer.transform([user_input])
    similarity = cosine_similarity(user_vec, X)
    idx = similarity.argmax()
    if similarity.max() < 0.25:
        return "I'm not sure I understand. Could you rephrase that?"
    return df.iloc[idx]['response']


In [14]:
test_questions = df['question'][:20]
correct = 0
for q in test_questions:
    if chatbot_response(q) in df['response'].values:
        correct += 1
print("Chatbot Accuracy:", correct/len(test_questions))


Chatbot Accuracy: 0.75


In [15]:
custom_data = {
    "question": [
        "where is my order",
        "how can i return a product",
        "what are your working hours",
        "how can i contact support"
    ],
    "response": [
        "You can track your order using the tracking ID in your email.",
        "To return a product, visit your orders page and select ‚ÄòReturn‚Äô.",
        "Our support team is available 24/7!",
        "You can reach support via email at help@futureinterns.in."
    ]
}

custom_df = pd.DataFrame(custom_data)
df = pd.concat([df, custom_df], ignore_index=True)
X = vectorizer.fit_transform(df['question'])


In [16]:
import streamlit as st

st.set_page_config(page_title="Customer Support Chatbot", page_icon="ü§ñ")

st.title("üí¨ Customer Support Chatbot")
st.markdown("Your friendly AI assistant from Future Interns!")

user_input = st.text_input("Type your message here üëá", "")
if user_input:
    response = chatbot_response(user_input)
    st.chat_message("user").write(user_input)
    st.chat_message("assistant").write(response)




In [19]:
!pip install requests geopy
import datetime, requests
from geopy.geocoders import Nominatim




In [24]:
!pip install pytz




In [20]:
def get_current_time():
    now = datetime.datetime.now()
    return now.strftime("It's %I:%M %p right now.")

def get_current_date():
    today = datetime.date.today()
    return today.strftime("Today's date is %B %d, %Y.")

def get_current_day():
    today = datetime.date.today()
    return today.strftime("Today is %A.")

def get_weather(city="Bangalore"):
    try:
        api_key = "https://wttr.in/{}?format=3".format(city)
        response = requests.get(api_key)
        if response.status_code == 200:
            return f"Here's the weather: {response.text}"
        else:
            return "Sorry, I couldn‚Äôt fetch the weather right now."
    except:
        return "I‚Äôm having trouble checking the weather at the moment."

def get_president(country="usa"):
    if country.lower() == "usa":
        return "The President of the United States is Joe Biden."
    elif country.lower() == "india":
        return "The President of India is Droupadi Murmu."
    else:
        return f"Sorry, I don‚Äôt have information about {country.title()}."


In [25]:
import pytz

def get_current_time():
    india_tz = pytz.timezone("Asia/Kolkata")
    now = datetime.datetime.now(india_tz)
    return now.strftime("It's %I:%M %p right now in India.")


In [21]:
def chatbot_response(user_input):
    user_input = user_input.lower()

    # Greetings
    if any(g in user_input for g in ["hi", "hello", "hey"]):
        return "Hey there! How can I help you today?"

    # Exit
    if any(x in user_input for x in ["bye", "exit", "quit"]):
        return "Goodbye! Have a wonderful day üí´"

    # Time / Date / Day
    if "time" in user_input:
        return get_current_time()
    if "date" in user_input:
        return get_current_date()
    if "day" in user_input:
        return get_current_day()

    # Weather
    if "weather" in user_input:
        words = user_input.split()
        city = None
        for i, word in enumerate(words):
            if word == "in" and i + 1 < len(words):
                city = words[i + 1]
        return get_weather(city if city else "Bangalore")

    # President / Prime Minister
    if "president" in user_input:
        if "usa" in user_input or "america" in user_input:
            return get_president("usa")
        elif "india" in user_input:
            return get_president("india")
        else:
            return get_president()

    # General small talk
    if "how are you" in user_input:
        return "I'm great! Thanks for asking ‚Äî how are you doing today?"
    if "your name" in user_input:
        return "I'm your Customer Support Assistant, powered by Future Interns ü§ñ"

    # Fallback to FAQ / dataset similarity
    user_vec = vectorizer.transform([user_input])
    similarity = cosine_similarity(user_vec, X)
    idx = similarity.argmax()
    if similarity.max() < 0.25:
        return "Hmm, I‚Äôm not sure about that. Could you try rephrasing?"
    return df.iloc[idx]['response']


In [26]:
print("ü§ñ Smart Chatbot ready! Type 'bye' to exit.")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "exit", "quit"]:
        print("Bot: Bye! üëã")
        break
    print("Bot:", chatbot_response(user_input))


ü§ñ Smart Chatbot ready! Type 'bye' to exit.
You: HEYY
Bot: Hey there! How can I help you today?
You: WHAT IS THE TIME NOW?
Bot: It's 10:15 AM right now in India.
You: WHAT DAY ISIT?
Bot: Today is Friday.
You: HOW'S THE WEATHER?
Bot: Here's the weather: Bangalore: ‚õÖÔ∏è  +72¬∞F

You: OKAY,THANKYOUUUUU. BYE
Bot: Goodbye! Have a wonderful day üí´
You: BYE
Bot: Bye! üëã
