In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import os

database_url = os.getenv("DATABASE_URL")
print(database_url)

localhost:5432/mydb


In [4]:
import json
import nltk
import numpy as np
import random
import pickle
import torch
import torch.nn as nn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem import SnowballStemmer

# Cargar datos de entrenamiento
with open('data/intents.json', encoding='iso-8859-1') as file:
    data = json.load(file)

# Obtener todas las palabras y etiquetas
all_words = []
tags = []
patterns = []

for intent in data['intents']:
    tag = intent['tag']
    tags.append(tag)
    
    for pattern in intent['patterns']:
        # Convertir a minúsculas y tokenizar
        words = nltk.word_tokenize(pattern.lower())
        all_words.extend(words)
        patterns.append((words, tag))

# Stemming y eliminación de duplicados
stemmer = SnowballStemmer('spanish')
all_words = [stemmer.stem(word) for word in all_words]
all_words = sorted(list(set(all_words)))

tags = sorted(list(set(tags)))

# Preparar datos de entrenamiento
training = []
output = np.zeros((len(patterns), len(tags)))

for i, pattern in enumerate(patterns):
    bag = []
    pattern_words = pattern[0]
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    
    for word in all_words:
        if word in pattern_words:
            bag.append(1)
        else:
            bag.append(0)
    
    output[i][tags.index(pattern[1])] = 1
    
    training.append(bag)

training = np.array(training)
output = torch.from_numpy(output).float()

# Definir modelo
class ChatbotModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_rate):
        super(ChatbotModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x):
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = nn.functional.softmax(x, dim=1)
        return x

model = ChatbotModel(len(training[0]), 128, len(output[0]), 0.5)

# Entrenar modelo
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

for epoch in range(100):
    inputs = torch.from_numpy(training).float()
    outputs = model(inputs)
    
    loss = criterion(outputs, output)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Guardar modelo en un archivo pkl
with open('models/chatbot_model.pkl', 'wb') as file:
    pickle.dump(model, file)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
import pickle
import numpy as np
from nltk.stem import SnowballStemmer

# Load the model from the file
with open('models/chatbot_model.pkl', 'rb') as file:
    model = pickle.load(file)

# Load the words and tags from the data file
with open('data/intents.json', encoding='iso-8859-1') as file:
    data = json.load(file)

stemmer = SnowballStemmer('spanish')
words = []
tags = []
patterns = []

for intent in data['intents']:
    tag = intent['tag']
    tags.append(tag)
    
    for pattern in intent['patterns']:
        # Tokenize and stem the words in the pattern
        tokens = nltk.word_tokenize(pattern.lower())
        stemmed_tokens = [stemmer.stem(token) for token in tokens]
        words.extend(stemmed_tokens)
        patterns.append((stemmed_tokens, tag))

words = sorted(list(set(words)))
tags = sorted(list(set(tags)))

In [6]:
def preprocess(input_text, words):
    # Tokenize the input text
    tokens = nltk.word_tokenize(input_text.lower())

    # Stem the tokens
    stemmer = SnowballStemmer('spanish')
    stemmed_tokens = [stemmer.stem(token) for token in tokens]

    # Create a bag of words vector
    input_bag = torch.zeros(len(words))
    for word in stemmed_tokens:
        if word in words:
            input_bag[words.index(word)] = 1

    return input_bag

In [7]:
def generate_response(input_text):
    # Preprocess the input text
    input_bag = preprocess(input_text, words)
    #input_bag = preprocess_input(input_text)
    #print(input_bag)

    # Convert input to a PyTorch tensor
    #input_tensor = torch.tensor(input_bag).float().unsqueeze(0)
    #input_tensor = torch.tensor(input_bag, dtype=torch.float).clone().detach().unsqueeze(0)
    input_tensor = input_bag.clone().detach().unsqueeze(0)

    # Use the model to predict the output
    output = model(input_tensor)

    # Convert the output to a numpy array
    output_np = output.detach().numpy()

    # Find the tag with the highest probability
    tag_index = np.argmax(output_np)

    if output_np[0][tag_index] < 0.5:
        return "Lo siento, no te he entendido. ¿Podrías reformular la pregunta?"

    tag = tags[tag_index]

    # Choose a random response from the tag's list of responses
    responses = []
    for intent in data['intents']:
        if intent['tag'] == tag:
            responses = intent['responses']

    return random.choice(responses)

In [8]:
generate_response('Hola')

'Hola, ¿cómo puedo ayudarte hoy?'

In [9]:
generate_response('Adiós')

'Adiós'

In [10]:
generate_response('me siento triste')

'Entiendo que puede ser difícil lidiar con problemas personales. ¿Quieres hablar de ello?'

In [11]:
generate_response('tuve problemas en el trabajo')

'Parece que estás teniendo problemas en tu carrera. ¿Te gustaría hablar de ello?'

In [12]:
generate_response('me voy a mater')

'Mi consejo es que escuches tu corazón y hagas lo que sientas que es correcto para ti. ¿Hay algo específico que te preocupa?'

In [13]:
generate_response('si pierde blooming pierde mi familia')

'Lo siento mucho por lo que estás pasando. ¿Quieres hablar de tus problemas en el trabajo?'

In [None]:
generate_response('josue ')