# Design a Chatbot

In [3]:
#!pip install pandas nltk flask transformers
import pandas as pd


In [5]:
# Path to the Titanic dataset's CSV file on Kaggle
# I assumes the training dataset as a main dataset and split it into train and test datasets later on in this notebook;
file_path = '/kaggle/input/titanic-machine-learning-from-disaster/train.csv'

In [6]:
# Load the CSV into a pandas DataFrame
titanic_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
titanic_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [7]:
# Check the data columns
print(titanic_data.columns)

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')


In [8]:
# Basic User Input Understanding
# Use NLP techniques to parse the user's query and match it to the table's columns.

import nltk
nltk.download('punkt')

# Define function to extract relevant words from user input
def preprocess_input(user_input):
    words = nltk.word_tokenize(user_input.lower())  # Tokenize user input
    return words

# Basic column matching from the user input
def match_column(user_input):
    columns = titanic_data.columns
    words = preprocess_input(user_input)

    for word in words:
        if word in columns:
            return word
    return None


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [16]:
# Define Question and Answering Logic
def respond_to_query(user_input):
    if 'survived' in preprocess_input(user_input):
        survivors = titanic_data['Survived'].sum()
        return f'There were {survivors} survivors.'
    
    # Find average age
    if 'age' in preprocess_input(user_input) and 'average' in preprocess_input(user_input):
        average_age = titanic_data['Age'].mean()
        return f'The average age of passengers is {average_age:.2f}.'
    
    return "Sorry, I didn't understand that."


In [17]:
# Run the Chatbot
# Chatbot loop
print("Hello, I am the Titanic chatbot. Ask me about the Titanic dataset.")
while True:
    user_input = input("You: ")
    if user_input.lower() == "exit":
        print("Goodbye!")
        break
    response = respond_to_query(user_input)
    print(f"Bot: {response}")


Hello, I am the Titanic chatbot. Ask me about the Titanic dataset.


You:  how many passenger ID?


Bot: Sorry, I didn't understand that.


KeyboardInterrupt: Interrupted by user

# Handling More Complex Questions with NLP Models

Use a pre-trained language model from HuggingFace’s transformers library (like BERT) to parse more complex user questions. This will improve the bot’s understanding of natural language.

In [None]:
from transformers import pipeline

# Load a pre-trained model for question answering
qa_pipeline = pipeline("question-answering")

def advanced_response(user_input):
    # You can define a context string from the Titanic dataset.
    # For instance, you can convert a subset of the dataset to text.
    context = titanic_data.to_string()

    # Use the pre-trained model to get an answer
    result = qa_pipeline(question=user_input, context=context)
    return result['answer']

# Now the chatbot can use the advanced model
print("Ask more complex questions now!")
while True:
    user_input = input("You: ")
    if user_input.lower() == "exit":
        print("Goodbye!")
        break
    response = advanced_response(user_input)
    print(f"Bot: {response}")


No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


Ask more complex questions now!


You:  how many passengers are in the ship?


In [None]:
# Optionally Deploy the Chatbot with Flask (to create a web-based chatbot)

from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route("/chat", methods=["POST"])
def chat():
    user_input = request.json.get("message")
    response = respond_to_query(user_input)  # Use simple or advanced response
    return jsonify({"response": response})

if __name__ == "__main__":
    app.run(debug=True)
