In [2]:
# Ensure TensorFlow 2.x is installed
# !pip install tensorflow

# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.models import Model
import numpy as np
import requests

In [3]:
questions = [
    "Hi",
    "Hello",
    "Hey",
    "Howdy",
    "What's up?",
    "How are you?",
    "How's it going?",
    "Good morning",
    "Good afternoon",
    "Good evening",
    "Bye",
    "Goodbye",
    "See you later",
    "Farewell",
    "Catch you later",
    "I'm done",
    "Take care",
]

answers = [
    "Hello! How can I assist you?",
    "Hi there! How can I assist you?",
    "Hey! How's it going?",
    "Howdy! What can I do for you?",
    "Not much, how about you?",
    "I'm doing great, thanks for asking! How can I assist you?",
    "All good here! How can I help?",
    "Good morning! How can I assist you?",
    "Good afternoon! How can I assist you?",
    "Good evening! How can I assist you?",
    "Bye! Have a great day!",
    "Goodbye! It was nice talking to you.",
    "See you later! Take care.",
    "Farewell! Hope to see you again soon.",
    "Catch you later! Have a good one.",
    "It was nice talking to you. Goodbye!",
    "Take care! Let me know if you need anything else.",
]

In [4]:
# Example dataset: A small set of question-answer pairs
#questions = ['How are you?', 'What is your name?', 'What is TensorFlow?', 'Do you like Python?']
#answers = ['I am fine.', 'I am a bot.', 'TensorFlow is a machine learning library.', 'Yes, Python is great!']

# Tokenize the sentences (this is a very simplified approach)
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(questions + answers)
sequences_questions = tokenizer.texts_to_sequences(questions)
sequences_answers = tokenizer.texts_to_sequences(answers)

# Pad the sequences
max_length = max(max(len(seq) for seq in sequences_questions), max(len(seq) for seq in sequences_answers))
padded_questions = tf.keras.preprocessing.sequence.pad_sequences(sequences_questions, maxlen=max_length, padding='post')
padded_answers = tf.keras.preprocessing.sequence.pad_sequences(sequences_answers, maxlen=max_length, padding='post')

# Vocabulary size
vocab_size = len(tokenizer.word_index) + 1


In [5]:
# Define model parameters
embedding_dim = 256
units = 1024

# Define the encoder model
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(vocab_size, embedding_dim)(encoder_inputs)
encoder_outputs, state_h, state_c = LSTM(units, return_state=True)(encoder_embedding)
encoder_states = [state_h, state_c]

# Define the decoder model
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(vocab_size, embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(units, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the seq2seq model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 embedding (Embedding)       (None, None, 256)            15872     ['input_1[0][0]']             
                                                                                                  
 embedding_1 (Embedding)     (None, None, 256)            15872     ['input_2[0][0]']             
                                                                                              

In [6]:
# Prepare decoder input data that just contains the start token and the rest is all 0
# It's a simplified approach, in practice, you would use teacher forcing or another technique
decoder_input_data = np.zeros_like(padded_answers)
decoder_input_data[:, 0] = 1  # Assuming 1 is the start token

# Train the model
model.fit([padded_questions, decoder_input_data], np.expand_dims(padded_answers, -1), batch_size=2, epochs=100)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7a51901c8280>

In [11]:
# Implement the chat function
def preprocess_input_text(input_text):
    sequence = tokenizer.texts_to_sequences([input_text])
    padded_sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen=max_length, padding='post')
    return padded_sequence

def generate_response(input_sequence):
    response_sequence = np.zeros((1, max_length))
    response_sequence[0, 0] = 1  # start token
    for i in range(1, max_length):
        prediction = model.predict([input_sequence, response_sequence]).argmax(axis=2)
        response_sequence[0, i] = prediction[0, i-1]
        if prediction[0, i-1] == 2:  # end token
            break
    return response_sequence

def sequence_to_text(sequence):
    return ' '.join(tokenizer.index_word.get(i, '') for i in sequence if i > 2)

def clean_input(user_input):
    cleaned_input = user_input.strip().lower() # Remove leading/trailing spaces and convert to lowercase
    cleaned_input = ''.join(char for char in cleaned_input if char.isalnum() or char.isspace()) # Remove punctuation
    return cleaned_input

def chat_with_bot(input_text):
    input_sequence = preprocess_input_text(input_text)
    response_sequence = generate_response(input_sequence)
    response_text = sequence_to_text(response_sequence[0])
    return response_text

# Function to get weather information using OpenWeatherMap API
def get_weather(location):
    api_key = ""
    base_url = "http://api.openweathermap.org/data/2.5/weather?"
    try:
        city_name, state_code = location.split(", ")
        query = f"{city_name},{state_code},US"
    except ValueError:
        query = f"{location},US"
    complete_url = base_url + "appid=" + api_key + "&q=" + query + "&units=imperial"
    response = requests.get(complete_url)
    weather_data = response.json()
    if weather_data['cod'] == 200:
        main = weather_data['main']
        temperature = main['temp']
        humidity = main['humidity']
        weather_description = weather_data['weather'][0]['description']
        return (f"Temperature: {temperature}°F\n"
                f"Humidity: {humidity}%\n"
                f"Description: {weather_description.capitalize()}")
    else:
        return "Sorry, I couldn't find the weather for that location."

In [13]:
# Introduce the chatbot and its capabilities
print("Hi! My name is WeatherBot, but I go by 🌤🤖.\nI can assist you with basic greetings, answer how I'm doing, say goodbye, and provide current weather information for any city in the US.\nTo get a weather report, just type 'Weather: [city name], [state code].\nFor cities with unique names, simply typing 'Weather: [city name]' works too!")

# Main loop to handle chat input/output
while True:
    user_input = input("> ")
    if clean_input(user_input) in ["bye", "goodbye", "im done"]:
        print("🌤🤖: Goodbye! Have a great day!")
        break
    elif user_input.lower().startswith("weather: "):
        city = user_input.split(" ", 1)[1]
        weather_info = get_weather(city)
        print("🌤🤖:", weather_info)
    else:
        response = chat_with_bot(user_input)
        print("🌤🤖:", response)

Hi! My name is WeatherBot, but I go by 🌤🤖.
I can assist you with basic greetings, answer how I'm doing, say goodbye, and provide current weather information for any city in the US.
To get a weather report, just type 'Weather: [city name], [state code].
For cities with unique names, simply typing 'Weather: [city name]' works too!
> Hi!
🌤🤖: hello
> Howdy!
🌤🤖: howdy
> How are you?
🌤🤖: i'm doing great thanks for asking
> Weather: Charlottesville
🌤🤖: Temperature: 49.59°F
Humidity: 94%
Description: Moderate rain
> Weather: Culpeper, VA
🌤🤖: Temperature: 49.08°F
Humidity: 97%
Description: Moderate rain
> Bye!
🌤🤖: Goodbye! Have a great day!
