In [None]:
import pickle
import numpy as np
from langchain_openai import OpenAIEmbeddings

#todo: change private function name
#todo: change calling function to arg = arg

class Retrieve_internal:
    def __init__(self, openAi_key: str) -> None:
        with open("data/scrapData.pkl","rb") as file:
            self.internal_docs = pickle.load(file)
        with open("data/128Embedding.npy") as file:
            self.internal_docs_embeddings = np.load(file)
        self.embeddings_model = OpenAIEmbeddings(openai_api_key=openAi_key)

    
    def retrieve_internal(self, input: str, max_length: int) -> str:
        input_embedding = self.embeddings_model.embed_documents([input])
        dot_distances = np.dot(self.internal_docs_embeddings, np.array(input_embedding).transpose())
        dot_distances = [(dot_distances[i],i) for i in range(len(dot_distances))]   
        dot_distances.sort(reverse=True)
        ret = ''
        append_idx = 0
        while len(ret) < max_length:
            ret += self.internal_docs[dot_distances[append_idx][1]] + '\n'
            append_idx += 1
        return ret

In [2]:
import requests
from bs4 import BeautifulSoup
import json
from youtube_transcript_api import YouTubeTranscriptApi


class Partselect_scrapper:
    def __init__(self) -> None:
        pass

    def get_video_title(self, video_ID: str) -> str:
        url = 'https://noembed.com/embed?url=https://www.youtube.com/watch?v='+video_ID
        result = requests.get(url)

        toJson = json.loads(result.content)
        video_title = toJson.get('title')
        return video_title

    def get_youtube_title_and_trasncript(self, video_ID: str) -> tuple():
        transcript_result = YouTubeTranscriptApi.get_transcript(video_ID)
        transcript = ''
        for d in transcript_result:
            transcript += d['text'] + ' '
        transcript = transcript.replace('\n', ' ')
        title = self.get_video_title(video_ID)
        transcript = "vido title: "+ title + '\n' + "Transcript:\n " + transcript
        return (title,transcript)

    def get_page_video_ID(self, soup: BeautifulSoup) -> list:
    
        labels = ['div']
        classes = ['yt-video']
        results = soup.find_all(labels,class_=classes)
        viedo_ID_set = set()
        for result in results:
            # Extract and print details from each result
            video_ID = result['data-yt-init']
            if video_ID == "d6AvOkulk_g":
                continue
            viedo_ID_set.add(video_ID)
        return list(viedo_ID_set)

    def get_all_title_and_transcript(self, video_ID_list: list) -> list:
        ret = []
        for id in video_ID_list:
            try:
                ret.append(self.get_youtube_title_and_trasncript(id))
            except Exception as e:
                print(e)
                continue
        return ret
    @classmethod
    def search_part(self, query: str, get_video = True) -> str:
        # Inspect the website to find the correct URL and parameters
        url = 'https://www.partselect.com/api/search/'
        params = {'searchterm': query}
        response = requests.get(url, params=params)
        ret = ''
        if response.status_code == 200:
            try:
                soup = BeautifulSoup(response.content, 'html.parser')
                # Parse the soup object to find search results
                # This depends on the HTML structure of the search results page
                #results = soup.find_all('div', class_='search-result')  # Example, adjust based on actual page structure
                labels = ['div','h1']
                classes = ['pd__description','title-lg','title-main','repair-story__instruction','col-md-6 mt-3']
                results = soup.find_all(labels,class_=classes)
                printed_story = False
                printed_trouble_shooting = False
                for result in results:
                    # Extract and print details from each result
                    if 'title-lg' in result['class'] or 'title-main' in result['class']:
                        ret += "Name: "+ result.get_text(strip=True)
                    elif 'pd__description' in result['class']:
                        description_title = result.find('h2', class_='title-md').get_text(strip=True)
                        description = result.find('div', itemprop='description').get_text(strip=True)
                        ret += description_title
                        ret += description

                    elif 'repair-story__instruction' in result['class']:
                        if not printed_story:
                            ret += "Repair Story From Customer:"
                            printed_story = True
                        ret += result.get_text(strip=True)
                    else:
                        if not printed_trouble_shooting:
                            ret += "Trouble Shooting:"
                            printed_trouble_shooting = True
                        ret += result.get_text(strip=True)
                if get_video:
                    video_id_list = self.get_page_video_ID(soup)
                    title_and_transcript_list = self.get_all_title_and_transcript(video_ID_list=video_id_list)
                    for _, transcript in title_and_transcript_list:
                        ret += '\n' + transcript
                return ret
            except Exception as e:
                print(e)
                return ""
        else:
            print("Partselect_scrapper: network error")
            return ""
        
    def get_compatible_parts(self, source_part_ID: str, query: str):
        params = {"SearchTerm": query}
        url = "https://www.partselect.com/Models/" + source_part_ID + "/Parts/"
        response = requests.get(url, params)
        soup = BeautifulSoup(response.content, 'html.parser')
        parts_divs = soup.find_all('div', class_='mega-m__part')

        parts_list = []
        for part in parts_divs:
            part_name = part.find('a', class_='mega-m__part__name')
            part_number = part_name.find_next_sibling('div')
            if part_number:
                parts_list.append(part_number.get_text(strip=True).split(':')[-1])
        if len(part_number) == 0:
            return 'could find compatible parts'
        ret = 'Compatible parts found: \n'
        for part_number in parts_list:
            ret += self.search_part(query=part_number, get_video=False)
        return ret


In [3]:
from typing import Optional, Union
from math import sqrt, cos, sin
from langchain.tools import BaseTool



class SearchPartTool(BaseTool):
    name = "Find relevant part for machine"
    description = desc

    def _run(self, source_part_ID: str, query: str) -> str:
        params = {"SearchTerm": query}
        url = "https://www.partselect.com/Models/" + source_part_ID + "/Parts/"
        response = requests.get(url, params)
        soup = BeautifulSoup(response.content, 'html.parser')
        parts_divs = soup.find_all('div', class_='mega-m__part')

        parts_list = []
        for part in parts_divs:
            part_name = part.find('a', class_='mega-m__part__name')
            part_number = part_name.find_next_sibling('div')
            if part_number:
                parts_list.append(part_number.get_text(strip=True).split(':')[-1])
        if len(part_number) == 0:
            return 'could find compatible parts'
        ret = 'Compatible parts found: \n'
        for part_number in parts_list:
            ret += Retrieve_internal.search_part(query=part_number, get_video=False)
        return ret
    
    def _arun(self, query: str):
        raise NotImplementedError("This tool does not support async")



class RelevantPartTool(BaseTool):
    name = "Find relevant part for machine"
    description = (
    "Use this tool when you need to find related part of a machine. "
    "To use the tool you must provide exactly two argument ['model number, query']. "
    "The first argument must be a model number of a machine, such as WDT780SAEM1. " 
    "The second argument can be a part number such as PS3406971, or a description, such as dishrack wheel. "
    "For exmaple: " 
    "question: I want a drawer track for FPHD2491KF0. Arugment = [FPHD2491KF0, drawer track], "
    "question: is PS429725 compatible with my FGHS2631PF4A. Arumgnet = [FGHS2631PF4A, PS429725]")

    def _run(self, source_part_ID: str, query: str) -> str:
        params = {"SearchTerm": query}
        url = "https://www.partselect.com/Models/" + source_part_ID + "/Parts/"
        response = requests.get(url, params)
        soup = BeautifulSoup(response.content, 'html.parser')
        parts_divs = soup.find_all('div', class_='mega-m__part')

        parts_list = []
        for part in parts_divs:
            part_name = part.find('a', class_='mega-m__part__name')
            part_number = part_name.find_next_sibling('div')
            if part_number:
                parts_list.append(part_number.get_text(strip=True).split(':')[-1])
        if len(part_number) == 0:
            return 'could find compatible parts'
        ret = 'Compatible parts found: \n'
        for part_number in parts_list:
            ret += Retrieve_internal.search_part(query=part_number, get_video=False)
        return ret
    
    def _arun(self, query: str):
        raise NotImplementedError("This tool does not support async")


tools = [RelevantPartTool()]