In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive
drive.mount('/content/drive')

# Define the path to your dataset in Google Colab
dataset_path = "/content/drive/MyDrive/mojo_ai/mojo_ai_code/mojo_Dataset"

# Upload the Mojo_AI_code folder (zip it if needed)
# You can comment out the following lines if you've already uploaded the dataset manually.
code_path = "/content/drive/MyDrive/mojo_ai/mojo_ai_code"

# Initialize empty lists to store text and labels
texts = []
labels = []

# Loop through each folder (examples, proposals, user, workshops)
folders = ["examples", "proposals", "user", "workshops"]

for folder in folders:
    folder_path = os.path.join(dataset_path, folder)

    # Loop through each file in the folder
    for root, dirs, files in os.walk(folder_path):
        for filename in files:
            file_path = os.path.join(root, filename)

            # Read the content of the file
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Append the content to the texts list and the folder name to the labels list
            texts.append(content)
            labels.append(folder)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences for fixed input size
max_seq_length = max(map(len, sequences))
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

# Convert labels to numerical format
label_mapping = {label: index for index, label in enumerate(set(labels))}
numerical_labels = np.array([label_mapping[label] for label in labels])  # Convert to NumPy array

# Build a basic NLP model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=50, input_length=max_seq_length),
    tf.keras.layers.LSTM(50),
    tf.keras.layers.Dense(len(label_mapping), activation='softmax')
])

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(padded_sequences, numerical_labels, epochs=10, batch_size=32)

# Save the model
model.save('/content/drive/MyDrive/mojo_ai/mojo_ai_code/mojo_ai_model.h5')

# User interaction and reinforcement learning
user_folder_path = os.path.join(dataset_path, "user")
data_file_path = os.path.join(user_folder_path, "data")

# Simulate user interaction (replace this with your actual user interaction mechanism)
user_question = "How does Mojo handle interop with Python?"
user_answer = "Mojo uses a Python API for seamless interop."

# Store the user question and answer
user_data = [f"{user_question}\t{user_answer}\n"]

# Save the updated user data
with open(data_file_path, 'w', encoding='utf-8') as user_data_file:
    user_data_file.writelines(user_data)

# Now, you can use this updated user data for reinforcement learning.
# Load the model
loaded_model = tf.keras.models.load_model('/content/mojo_ai_model.h5')

# Tokenize the new user data
user_sequences = tokenizer.texts_to_sequences([user_question])
user_padded_sequences = pad_sequences(user_sequences, maxlen=max_seq_length, padding='post')

# Convert the label to numerical format
user_label = label_mapping["user"]

# Continue training with the new user data
loaded_model.fit(user_padded_sequences, np.array([user_label]), epochs=5, batch_size=1)


In [4]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive
drive.mount('/content/drive')

# Define the path to your dataset in Google Colab
dataset_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset"
code_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code"

# Initialize empty lists to store text and labels
texts = []
labels = []

# Loop through each folder (examples, proposals, user, workshops)
folders = ["examples", "proposals", "user", "workshops"]

for folder in folders:
    folder_path = os.path.join(dataset_path, folder)

    # Loop through each file in the folder
    for root, dirs, files in os.walk(folder_path):
        for filename in files:
            file_path = os.path.join(root, filename)

            # Read the content of the file
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Append the content to the texts list and the folder name to the labels list
            texts.append(content)
            labels.append(folder)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences for fixed input size
max_seq_length = max(map(len, sequences))
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

# Convert labels to numerical format
label_mapping = {label: index for index, label in enumerate(set(labels))}
numerical_labels = np.array([label_mapping[label] for label in labels])

# Build a CNN-LSTM model
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 50

model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_seq_length))
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model.add(tf.keras.layers.LSTM(50))
model.add(tf.keras.layers.Dense(len(label_mapping), activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(padded_sequences, numerical_labels, epochs=10, batch_size=32)

# Save the model
model.save('/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_cnn_lstm.h5')

# Simulate user interaction and continue training (if needed)
# user_question = "How does Mojo handle interop with Python?"
# user_answer = "Mojo uses a Python API for seamless interop."

# user_sequences = tokenizer.texts_to_sequences([user_question])
# user_padded_sequences = pad_sequences(user_sequences, maxlen=max_seq_length, padding='post')
# user_label = label_mapping["user"]

# model.fit(user_padded_sequences, np.array([user_label]), epochs=5, batch_size=1)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


KeyError: ignored

In [11]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive
drive.mount('/content/drive')

# Define the path to your dataset in Google Colab
dataset_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset"
code_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code"

# Initialize empty lists to store text and labels
texts = []
labels = []

# Loop through each folder (examples, proposals, user, workshops)
folders = ["examples", "proposals", "user", "workshops"]

for folder in folders:
    folder_path = os.path.join(dataset_path, folder)

    # Loop through each file in the folder
    for root, dirs, files in os.walk(folder_path):
        for filename in files:
            file_path = os.path.join(root, filename)

            # Read the content of the file
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Append the content to the texts list and the folder name to the labels list
            texts.append(content)
            labels.append(folder)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences for fixed input size
max_seq_length = max(map(len, sequences))
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

# Convert labels to numerical format
label_mapping = {label: index for index, label in enumerate(set(labels))}
numerical_labels = np.array([label_mapping[label] for label in labels])

# Build a CNN-LSTM model
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 50  # Replace with your sequence length

# Load the model
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_cnn_lstm.h5')

# Simulate user interaction (replace with actual user interaction mechanism)
user_question = "How does Mojo handle interop with Python?"
user_answer = "Mojo uses a Python API for seamless interop."

# Tokenize the new user data (assuming tokenizer exists)
user_sequences = tokenizer.texts_to_sequences([user_question])
user_padded_sequences = pad_sequences(user_sequences, maxlen=max_seq_length, padding='post')

# Ensure 'user' is mapped in label_mapping
if "user" not in label_mapping:
    label_mapping["user"] = len(label_mapping)

# Map 'user' label to a numerical value
user_label = np.array([label_mapping["user"]])

# Continue training with the new user data
loaded_model.fit(user_padded_sequences, user_label, epochs=5, batch_size=1)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/5


InvalidArgumentError: ignored

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive
drive.mount('/content/drive')

# Define the path to your dataset in Google Colab
dataset_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset"
code_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code"

# Initialize empty lists to store text and labels
texts = []
labels = []

# Loop through each folder (examples, proposals, user, workshops)
folders = ["examples", "proposals", "user", "workshops"]

for folder in folders:
    folder_path = os.path.join(dataset_path, folder)

    # Loop through each file in the folder
    for root, dirs, files in os.walk(folder_path):
        for filename in files:
            file_path = os.path.join(root, filename)

            # Read the content of the file
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Append the content to the texts list and the folder name to the labels list
            texts.append(content)
            labels.append(folder)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences for fixed input size
max_seq_length = max(map(len, sequences))
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

# Convert labels to numerical format
label_mapping = {label: index for index, label in enumerate(set(labels))}
numerical_labels = np.array([label_mapping[label] for label in labels])

# Build a CNN-LSTM model
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 50

# Simulate user interaction (replace with actual user interaction mechanism)
user_question = "How does Mojo handle interop with Python?"
user_answer = "Mojo uses a Python API for seamless interop."

# Tokenize the new user data
user_sequences = tokenizer.texts_to_sequences([user_question])
user_padded_sequences = pad_sequences(user_sequences, maxlen=max_seq_length, padding='post')

# Ensure 'user' is mapped in label_mapping
if "user" not in label_mapping:
    label_mapping["user"] = len(label_mapping)
else:  # Map 'user' label to a numerical value
    print(label_mapping)

# Continue training with the new user data
loaded_model.fit(user_padded_sequences, np.array([label_mapping["user"]]), epochs=5, batch_size=1)

# Save the updated model
loaded_model.save('/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_retrained.h5')

# Store user question and response in data.txt file
data_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/data.txt'
with open(data_path, 'a') as file:
    file.write(f"{user_question}\t{user_answer}\n")


Mounted at /content/drive


NameError: ignored

In [5]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define paths
dataset_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset"
code_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code"
data_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset/User/data.txt'
model_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_cnn_lstm.h5'

# Initialize lists for text and labels
texts = []
labels = []

# List of folders
folders = ["examples", "proposals", "user", "workshops"]

# Read data from folders
for folder in folders:
    folder_path = os.path.join(dataset_path, folder)

    # Read files in each folder
    for root, dirs, files in os.walk(folder_path):
        for filename in files:
            file_path = os.path.join(root, filename)

            # Read file content
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Append content to texts and folder name to labels
            texts.append(content)
            labels.append(folder)

# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text to sequences and pad sequences
sequences = tokenizer.texts_to_sequences(texts)
max_seq_length = max(map(len, sequences))
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

# Convert labels to numerical format
label_mapping = {label: index for index, label in enumerate(set(labels))}
numerical_labels = np.array([label_mapping[label] for label in labels])

# Load the model
loaded_model = tf.keras.models.load_model(model_path)

# Simulate user interaction
user_question = "How does Mojo handle interop with Python?"
user_answer = "Mojo uses a Python API for seamless interop."

# Tokenize user data
user_sequences = tokenizer.texts_to_sequences([user_question])
user_padded_sequences = pad_sequences(user_sequences, maxlen=max_seq_length, padding='post')

# Ensure 'user' is mapped in label_mapping
if "user" not in label_mapping:
    label_mapping["user"] = len(label_mapping)
    num_classes = len(label_mapping)  # Update the number of classes
else:  # Map 'user' label to a numerical value
    print(label_mapping)

# Continue training with the new user data
loaded_model.fit(user_padded_sequences, np.array([label_mapping["user"]]), epochs=5, batch_size=1)

# Save the updated model
loaded_model.save('/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_retrained.h5')

# Store user question and response in data.txt file
with open(data_path, 'a') as file:
    file.write(f"{user_question}\t{user_answer}\n")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/5


InvalidArgumentError: ignored

In [6]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define paths
dataset_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset"
code_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code"
data_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset/User/data.txt'
model_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_cnn_lstm.h5'

# Initialize lists for text and labels
texts = []
labels = []

# List of folders
folders = ["examples", "proposals", "user", "workshops"]

# Read data from folders
for folder in folders:
    folder_path = os.path.join(dataset_path, folder)

    # Read files in each folder
    for root, dirs, files in os.walk(folder_path):
        for filename in files:
            file_path = os.path.join(root, filename)

            # Read file content
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Append content to texts and folder name to labels
            texts.append(content)
            labels.append(folder)

# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text to sequences and pad sequences
sequences = tokenizer.texts_to_sequences(texts)
max_seq_length = max(map(len, sequences))
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

# Convert labels to numerical format
label_mapping = {label: index for index, label in enumerate(set(labels))}
numerical_labels = np.array([label_mapping[label] for label in labels])

# Load the model
loaded_model = tf.keras.models.load_model(model_path)

# Simulate user interaction
user_question = "How does Mojo handle interop with Python?"
user_answer = "Mojo uses a Python API for seamless interop."

# Tokenize user data
user_sequences = tokenizer.texts_to_sequences([user_question])
user_padded_sequences = pad_sequences(user_sequences, maxlen=max_seq_length, padding='post')

# Ensure 'user' is mapped in label_mapping
if "user" not in label_mapping:
    label_mapping["user"] = len(label_mapping)

# Compile the model with categorical cross-entropy loss
loaded_model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                     optimizer=tf.keras.optimizers.Adam(),
                     metrics=['accuracy'])

# Continue training with the new user data
loaded_model.fit(user_padded_sequences, np.array([label_mapping["user"]]), epochs=5, batch_size=1)

# Save the updated model
loaded_model.save('/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_retrained.h5')

# Store user question and response in data.txt file
with open(data_path, 'a') as file:
    file.write(f"{user_question}\t{user_answer}\n")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/5


ValueError: ignored

In [14]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
from google.colab import drive
import pickle
# Mount Google Drive
drive.mount('/content/drive')

# Define paths
dataset_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset"
code_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code"
data_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset/User/data.txt'
model_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_cnn_lstm.h5'

# Initialize lists for text and labels
texts = []
labels = []

# List of folders
folders = ["examples", "proposals", "user", "workshops"]

# Read data from folders
for folder in folders:
    folder_path = os.path.join(dataset_path, folder)

    # Read files in each folder
    for root, dirs, files in os.walk(folder_path):
        for filename in files:
            file_path = os.path.join(root, filename)

            # Read file content
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Append content to texts and folder name to labels
            texts.append(content)
            labels.append(folder)

# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
# Convert text to sequences and pad sequences
sequences = tokenizer.texts_to_sequences(texts)
max_seq_length = max(map(len, sequences))
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')
# Save variables to a file using pickle
data_to_save = {
    'texts': texts,
    'max_seq_length': max_seq_length,
    'padded_sequences': padded_sequences
}

file_path_to_save = "/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset/content_of_ds.pkl"
with open(file_path_to_save, 'wb') as f:
    pickle.dump(data_to_save, f)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the trained model
model_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_cnn_lstm.h5'
loaded_model = load_model(model_path)

# Function to preprocess user input
def preprocess_input(user_input, tokenizer, max_length):
    # Tokenize the user input
    sequence = tokenizer.texts_to_sequences([user_input])
    # Pad sequences
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
    return padded_sequence

# Load tokenizer and max sequence length
# You might need to adapt this based on how you saved these values earlier
tokenizer_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset/content_of_ds.pkl'
max_length = 10000  # Update this with your actual max sequence length

# Load tokenizer (you need to implement this loading part if you saved it in a pickle file)
# tokenizer = load_tokenizer(tokenizer_path)

# Get user input
user_question = input("Enter your question: ")

# Preprocess user input
user_padded_sequence = preprocess_input(user_question, tokenizer, max_length)

# Generate the model's response
predicted_probabilities = loaded_model.predict(user_padded_sequence)
predicted_label = np.argmax(predicted_probabilities)  # Get the class with the highest probability

# Display the response to the user
response = label_mapping_inverse[predicted_label]  # Assuming you have a label mapping
print("Model's response:", response)

# Store the question and response into a file
data_file_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset/User/data.txt'
with open(data_file_path, 'a') as file:
    file.write(f"Question: {user_question}\n")
    file.write(f"Response: {response}\n")
    file.write("-" * 20 + "\n")


Enter your question: what is mojo


ValueError: ignored

In [18]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# Load the trained model
model_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_cnn_lstm.h5'
loaded_model = load_model(model_path)

# Load tokenizer and max sequence length
tokenizer_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset/content_of_ds.pkl'
max_length = 10000  # Update this with your actual max sequence length

# Load tokenizer
tokenizer = Tokenizer()
with open(tokenizer_path, 'rb') as f:
    tokenizer = pickle.load(f)

# Function to preprocess user input
def preprocess_input(user_input, tokenizer, max_length):
    # Tokenize the user input
    sequence = tokenizer.texts_to_sequences([user_input])
    # Pad sequences
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
    return padded_sequence

# Get user input
user_question = input("Enter your question: ")

# Preprocess user input
user_padded_sequence = preprocess_input(user_question, tokenizer, max_length)

# Generate the model's response
predicted_probabilities = loaded_model.predict(user_padded_sequence)
predicted_label = np.argmax(predicted_probabilities)  # Get the class with the highest probability

# Define your label mapping (Replace this with your actual label mapping)
label_mapping = {0: 'label_1', 1: 'label_2', 2: 'label_3'}  # Example label mapping

# Display the response to the user
predicted_class = label_mapping[predicted_label]
print("Model's predicted class:", predicted_class)

# Store the question and response into a file
data_file_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset/User/data.txt'
with open(data_file_path, 'a') as file:
    file.write(f"Question: {user_question}\n")
    file.write(f"Predicted Response: {predicted_class}\n")
    file.write("-" * 20 + "\n")


Enter your question: Can you provide an example of using loops in Mojo?


AttributeError: ignored

In [20]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import os
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define paths
dataset_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset"
code_path = "/content/drive/MyDrive/mojoai/Mojo_AI_code"
data_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_Dataset/User/data.txt'
model_path = '/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_cnn_lstm.h5'

# Initialize lists for text and labels
texts = []
labels = []

# List of folders
folders = ["examples", "proposals", "user", "workshops"]

# Read data from folders
for folder in folders:
    folder_path = os.path.join(dataset_path, folder)

    # Read files in each folder
    for root, dirs, files in os.walk(folder_path):
        for filename in files:
            file_path = os.path.join(root, filename)

            # Read file content
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()

            # Append content to texts and folder name to labels
            texts.append(content)
            labels.append(folder)

# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text to sequences and pad sequences
sequences = tokenizer.texts_to_sequences(texts)
max_seq_length = max(map(len, sequences))
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

# Convert labels to numerical format
label_mapping = {label: index for index, label in enumerate(set(labels))}
numerical_labels = np.array([label_mapping[label] for label in labels])

max_seq_length = 100  # Replace with your sequence length

# Load the model
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/mojoai/Mojo_AI_code/mojo_ai_model_cnn_lstm.h5')

# Simulate user interaction (replace with actual user interaction mechanism)
user_question = "How does Mojo handle interop with Python?"
user_answer = "Mojo uses a Python API for seamless interop."

# Tokenize the new user data (assuming tokenizer exists)
user_sequences = tokenizer.texts_to_sequences([user_question])
user_padded_sequences = pad_sequences(user_sequences, maxlen=max_seq_length, padding='post')

# Ensure 'user' is mapped in label_mapping
if "user" not in label_mapping:
    label_mapping["user"] = len(label_mapping)

# Map 'user' label to a numerical value
user_label = np.array([label_mapping["user"]])

# Continue training with the new user data
loaded_model.fit(user_padded_sequences, user_label, epochs=5, batch_size=1)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/5


ValueError: ignored

# New Section