# Test - TUYA 
## DS AI - Pablo Andres

### Librerías

In [29]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import os

from datetime import datetime

from dotenv import load_dotenv

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
import warnings

warnings.filterwarnings("ignore")

## Environment Variables

In [2]:
load_dotenv('../.env')

True

In [3]:
MODEL_ID = os.getenv('MODEL_ID')

cd       = os.getcwd()

## Questions

### Question 1:
    - ¿Cuáles son los nombres de las tarjetas que tiene disponibles Tuya S.A?

### Question 2:
    - ¿ Cuáles son los valores la tasa de interés y póliza del producto credicompras?

## Code

## Web Scrapping

In [4]:
def scrape_credit_cards(url, headers):
    response = requests.get(url, headers = headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        card_elements = soup.find_all("h2")
        credit_cards = [element.get_text(strip=True) for element in card_elements if "Tarjeta" in element.get_text(strip=True) and \
                        "crédito" not in element.get_text(strip=True).lower()]
        return sorted(set(credit_cards))
    else:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return []
    

def scrape_credicompras(url, headers):
    response = requests.get(url, headers = headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        interest_rates = [section.get_text(strip = True) for section in soup.find_all("p") if "Tasa Efectiva Anual" in section.get_text(strip=True)]
        policy_details = [section.get_text(strip = True) for section in soup.find_all("p") if "Póliza colectiva" in section.get_text(strip=True) or "seguro deudores" in section.get_text(strip=True)]
        structured_data = {
            "Credicompras": {
                "Interest Rates": list(set(interest_rates)),
                "Insurance Policy": list(set(policy_details))
            }
        }
        return structured_data
    else:
        print(f"❌ Failed to retrieve the page. Status code: {response.status_code}")
        return {}

### Saving Scraping Models

In [31]:
def save_output(data, filename, location = 'Scraped_Data', extension = 'json'):
    
    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename_with_time = f"{filename}_{current_time}"

    if extension == 'json':
        with open(os.path.join(cd, f'../Data/{location}', f'{filename_with_time}.{extension}'), 'w', encoding='utf-8') as file:
            json.dump(data, file, indent=4, ensure_ascii=False)
        print(f"✅ Data saved in '{filename_with_time}.json'.")

    elif extension == 'txt':
        with open(os.path.join(cd, f'../Data/{location}', f'{filename_with_time}.{extension}'), 'w', encoding='utf-8') as file:
            if isinstance(data, dict):
                for key, value in data.items():
                    file.write(f"{key}: {value}\n")
            elif isinstance(data, list):
                for item in data:
                    file.write(f"{item}\n")
            else:
                file.write(str(data))
        print(f"✅ Data saved in '{filename_with_time}.txt'.")

### Set URL's to Scrape

In [6]:
url_scrape_credit_cards = "https://www.tuya.com.co/tarjetas-de-credito"
url_scrape_credicompras = "https://www.tuya.com.co/credicompras"

### Mining Data from WebSite

In [7]:
scrape_credit_cards_data = scrape_credit_cards(url_scrape_credit_cards, headers = {'User-Agent': 'Mozilla/5.0'})
scrape_credicompras_data = scrape_credicompras(url_scrape_credicompras, headers = {'User-Agent': 'Mozilla/5.0'})

In [21]:
save_output(scrape_credit_cards_data, "credit_cards")
save_output(scrape_credicompras_data, "credicompras")

✅ Data saved in 'credit_cards.json'.
✅ Data saved in 'credicompras.json'.


## LLM

### Setting Model and Tokenizer 

In [9]:
model_id    = MODEL_ID
tokenizer   = AutoTokenizer.from_pretrained(model_id)
model       = AutoModelForCausalLM.from_pretrained(model_id)
pipeline    = pipeline("text-generation", model = model, tokenizer = tokenizer)
local_llm   = HuggingFacePipeline(pipeline = pipeline)

Device set to use cpu


### Upload Data to Use

In [11]:
def upload_contexts(filename):
    with open(os.path.join(cd, '../Data/Scraped_Data', f'{filename}.json'), 'r', encoding = 'utf-8') as file:
        data = json.load(file)
    return data

In [12]:
context_credit_cards = upload_contexts("credit_cards")
context_credicompras = upload_contexts("credicompras")

In [22]:
def ask_llama(question, context):
    """Query the Llama 3.2 model using the extracted knowledge."""
    prompt = f"Basado en la siguiente información:\n\n{context}\n\nResponde la siguiente pregunta de forma clara y concisa:\n\n{question}"

    response = local_llm(prompt)
    
    return response

In [None]:
question_1 = "¿Cuáles son los nombres de las tarjetas que tiene disponibles Tuya S.A.?"
question_2 = "¿Cuáles son los valores de la tasa de interés y póliza del producto Credicompras?"

### Getting Answers

In [None]:
answer_1 = ask_llama(question_1, context_credit_cards)
answer_2 = ask_llama(question_2, context_credicompras)

### Saving Answers

In [32]:
save_output(answer_1, 'answer_1', 'Output', 'txt')
save_output(answer_2, 'answer_2', 'Output', 'txt')

✅ Data saved in 'answer_1_20250302_224942.txt'.
✅ Data saved in 'answer_2_20250302_224943.txt'.
