In [4]:
# upload dataset
from google.colab import files
import json
uploaded = files.upload()
import io
filename = list(uploaded.keys())[0]
with io.open(filename, 'r', encoding='utf-8') as f:
    data = json.load(f)

Saving Mikro_LSTM_Dataset.json to Mikro_LSTM_Dataset (1).json


In [5]:
# Importing the modules and libraries
import nltk
import numpy as np
import json
import pickle
import random

from nltk.stem import WordNetLemmatizer
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Embedding,LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.optimizers import Adam

# Download NLTK resources
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [8]:
# Load Dataset
with open(filename,'r') as file:
    data=json.load(file)

In [9]:
lemmatizer = WordNetLemmatizer()
sentences, labels = [], []
classes = []
ignore_chars = ['?', '!', ',', '.']

In [10]:
# Process dataset
for intent in data['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        words = [lemmatizer.lemmatize(w.lower()) for w in word_list if w not in ignore_chars]
        sentences.append(" ".join(words))
        labels.append(intent['tag'])
    if intent['tag'] not in classes:
        classes.append(intent['tag'])

classes = sorted(set(classes))

In [11]:
# Tokenize sentences
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
word_index = tokenizer.word_index

In [12]:
# Pad sequences to the same length
max_length = max(len(seq) for seq in sequences)
x_train = pad_sequences(sequences, maxlen=max_length, padding='post')


In [13]:
# Convert labels to numerical form
label_dict = {label: index for index, label in enumerate(classes)}
y_train = np.array([label_dict[label] for label in labels])

In [14]:
# Save tokenizer and classes
pickle.dump(tokenizer, open('tokenizer.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

In [15]:
# Build LSTM(Long-Short Term Memory) model with Embedding (Keras 3 Fix)
model = Sequential([
    Embedding(input_dim=len(word_index) + 1, output_dim=128, mask_zero=True),
    Bidirectional(LSTM(128, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(64)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(len(classes), activation='softmax')
])

In [16]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [17]:
# Train model
model.fit(x_train, y_train, epochs=100, batch_size=8, verbose=1)

Epoch 1/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 36ms/step - accuracy: 0.0278 - loss: 3.5266
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.1017 - loss: 3.5132
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.0682 - loss: 3.4838
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 0.0899 - loss: 3.3980
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - accuracy: 0.1522 - loss: 3.1809
Epoch 6/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - accuracy: 0.1877 - loss: 2.9950
Epoch 7/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - accuracy: 0.2119 - loss: 2.6048
Epoch 8/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.4181 - loss: 2.2036
Epoch 9/100
[1m14/14[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7c5661146b90>

In [18]:
model.save('chatbot_model.keras')  # Save in Keras 3 format

In [19]:
def chatbot_response(text):
    words = [lemmatizer.lemmatize(w.lower()) for w in nltk.word_tokenize(text)]
    seq = tokenizer.texts_to_sequences([" ".join(words)])
    padded_seq = pad_sequences(seq, maxlen=max_length,padding='post')

    prediction = model.predict(padded_seq,verbose=0)[0]
    tag = classes[np.argmax(prediction)]

    for intent in data['intents']:
        if intent['tag'] == tag:
            return random.choice(intent['responses'])
    return "I'm not sure about that. Can you ask something else?"

In [20]:
if __name__ == "__main__":
    print("Chatbot is ready! Type 'exit' to stop.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Thank you for using Airbus chatbot tool. Goodbye!")
            break
        print("Bot: ",chatbot_response(user_input))

Chatbot is ready! Type 'exit' to stop.
You: exit
Thank you for using Airbus chatbot tool. Goodbye!


In [21]:
!pip install streamlit --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m59.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m97.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [36]:
%%writefile app.py
import streamlit as st
import nltk
import numpy as np
import json
import pickle
import random

from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

nltk.download('punkt')
nltk.download('wordnet')

# Initialize tools
lemmatizer = WordNetLemmatizer()

# Load model and preprocessing objects
model = load_model('chatbot_model.keras')
tokenizer = pickle.load(open('tokenizer.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))

# Load intents
with open('Mikro_LSTM_Dataset.json', 'r') as file:
    data = json.load(file)

# Max input length
max_length = model.input_shape[1]

# Chatbot response function
def chatbot_response(text):
    words = [lemmatizer.lemmatize(w.lower()) for w in nltk.word_tokenize(text)]
    seq = tokenizer.texts_to_sequences([" ".join(words)])
    padded_seq = pad_sequences(seq, maxlen=max_length, padding='post')
    prediction = model.predict(padded_seq, verbose=0)[0]
    tag = classes[np.argmax(prediction)]

    for intent in data['intents']:
        if intent['tag'] == tag:
            return random.choice(intent['responses'])
    return "I'm not sure about that. Can you ask something else?"

# Page setup
st.set_page_config(layout="wide", page_title="Mikro's Chatbot", page_icon="🤖")

if "messages" not in st.session_state:
    st.session_state.messages = []

# Sidebar settings
with st.sidebar:
    st.title("⚙️")
    st.markdown("Welcome to the **Mikro's Chatbot**. Ask questions about defense, anatomy, or any trained topic.")
    if st.button("Clear Chat"):
        st.session_state.messages = []
        st.rerun()

# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Main layout
col1, col2, col3 = st.columns([3, 0.2, 1])




# Left side: chat
with col1:

  st.markdown("""
    <div style="padding-top: 100px;">
        <h2>🤖 Mikro's Chatbot</h2>
        <h4>Ask me anything related to your dataset!</h4>
    </div>
""", unsafe_allow_html=True)
# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display past messages
for msg in st.session_state.messages:
    with st.chat_message(msg["role"]):
        st.markdown(msg["content"])

# Chat input
prompt = st.chat_input("Type your message...")

if prompt:
    # Add user message
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    # Generate response
    response = chatbot_response(prompt)

    # Add bot message
    st.session_state.messages.append({"role": "assistant", "content": response})
    with st.chat_message("assistant"):
        st.markdown(response)



# Right side: info panel
with col3:
    st.markdown("### 📌 Info Panel")
    st.markdown("- Supports Airbus, Defense & Human Anatomy topics")
    st.markdown("- Uses trained Keras model")
    st.markdown("- Powered by Streamlit UI")
    st.markdown("- Built for learning & demo")

Overwriting app.py


In [23]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
from google.colab import files
config = files.upload()
print(config)

Saving config.toml to config (1).toml
{'config (1).toml': b'[theme]\r\nprimaryColor = "#4CAF50"\r\nbackgroundColor = "#F9F9F9"\r\nsecondaryBackgroundColor = "#E8EAF6"\r\ntextColor = "#262730"\r\nfont = "sans serif"'}


In [34]:
import streamlit as st
import toml

# Load config
with open('config.toml', 'r') as f:
    config = toml.load(f)
# Apply theme settings from config
st.markdown(
    f"""
    <style>
        .stApp {{
            background-color: {config['theme']['backgroundColor']};
            color: {config['theme']['textColor']};
            font-family: {config['theme']['font']};
        }}
        .stButton>button {{
            background-color: {config['theme']['primaryColor']};
        }}
    </style>
    """,
    unsafe_allow_html=True
)

2025-04-22 12:28:43.177 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [37]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.74.204.34:8501[0m
[0m
[1G[0K⠧[1G[0Kyour url is: https://calm-candles-grab.loca.lt
2025-04-22 12:33:39.489781: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745325219.530498    6151 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745325219.542715    6151 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has alrea