# Setup

### Dependency

In [1]:
!pip -q install langchain huggingface_hub google-search-results tiktoken wikipedia

In [2]:
!pip install openai==0.27.8



In [3]:
!pip install -q google-generativeai

In [4]:
!pip install gitpython



### Library

In [5]:
import langchain
import openai
import os
import git
import json

### Home

In [6]:
class GitHome():

    def __init__(self,
                 work_dir,
                 branch_name,
                 repo_name,
                 repo_home,
                 user_name,
                 user_token):
        ### Save
        os.environ["WORK_DIR"] = work_dir
        os.environ["BRANCH_NAME"] = branch_name
        os.environ["REPO_NAME"] = repo_name
        os.environ["GIT_HOME"] = repo_home
        os.environ["USER_NAME"] = user_name
        os.environ["USER_TOKEN"] = user_token # Github Personal Access Token
        ### Compose
        os.environ["GIT_REPO"] = os.environ["GIT_HOME"] + os.environ["REPO_NAME"]
        os.environ["REPO_DIR"] = os.environ["WORK_DIR"] + "/" + os.environ["REPO_NAME"]
        os.environ["CLONE_FROM"] = "https://" + os.environ["USER_NAME"] + ":" + os.environ["USER_TOKEN"] + "@github.com/" + os.environ["USER_NAME"] + "/" + os.environ["REPO_NAME"] + ".git"

In [7]:
GitHome(work_dir="/content/drive/MyDrive/StanfordLLM/thought-distillation",
        repo_name="thought-distillation",
        repo_home="https://github.com/pablo-tech/",
        branch_name="main",
        user_name="pablo-tech",
        user_token="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")

<__main__.GitHome at 0x7f70dfb73f10>

### Git

In [8]:
os.environ["WORK_DIR"]

'/content/drive/MyDrive/StanfordLLM/thought-distillation'

In [9]:
try:
  os.chdir(os.environ["WORK_DIR"])
except:
  pass

!rm -rf $REPO_DIR


In [10]:
git.Repo.clone_from(os.environ["CLONE_FROM"], os.environ["REPO_DIR"])

<git.repo.base.Repo '/content/drive/MyDrive/StanfordLLM/thought-distillation/thought-distillation/.git'>

In [11]:
os.chdir(os.environ["REPO_DIR"] + "/source/main/py")

### Import

In [12]:
from model_base import OpenaiBase
from tool_gift import GiftDataset, GiftSummarizer, GiftClean

# Data

In [17]:
ds = GiftDataset(n = 3)

len(ds.raw_data)

READING=/content/drive/MyDrive/TataLLM/GiftReader/master_data.json
SUCCESS=master_data.json COUNT=2567
raw_length=3


3

In [18]:
ds.raw_data[0]

{'title': 'Teakwood Leathers Navy & Red Medium Duffle Bag',
 'price': 1999.6,
 'uri': 'https://www.tatacliq.com/p-MP000000017671743',
 'image_url': 'https://img.tatacliq.com/images/i11/437Wx649H/MP000000017671743_437Wx649H_202305242359101.jpeg',
 'description': 'Bag Type : Duffle, Capacity : 67, Closure Type : Zip, Color : Navy & Red, Feature1 : 2, Feature2 : Number, Feature3 : 12 Months manufacturer warranty, Height : 32, Length : 67, Material Type : Nylon, Size : Medium, Strap Type : Double Handle, Width : 42, ',
 'master_output': {'id': 'TATACLIQ-PRIMARY-T_TR_DFT06_RD_NY-TeakwoodLeathers',
  'product': {'name': 'projects/473979811399/locations/global/catalogs/default_catalog/branches/0/products/TATACLIQ-PRIMARY-T_TR_DFT06_RD_NY-TeakwoodLeathers',
   'title': 'Teakwood Leathers Navy & Red Medium Duffle Bag',
   'brands': ['Teakwood Leathers'],
   'attributes': {'bu_name': {'text': ['TATACLIQ PRIMARY']}},
   'uri': 'https://www.tatacliq.com/p-MP000000017671743',
   'variants': [{'name

# LLM

In [19]:
open_ai = OpenaiBase()

inference_llm_30 = open_ai.inference_llm_30()
chat_llm_40 = open_ai.chat_llm_40(max_tokens = 1000)

                engine was transferred to model_kwargs.
                Please confirm that engine is what you intended.
                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


# Parsing

In [20]:
gifts = GiftClean(n=3,
                  completion_llm=chat_llm_40,
                  is_verbose=False)

gifts.clean_data[0]

READING=/content/drive/MyDrive/TataLLM/GiftReader/master_data.json
SUCCESS=master_data.json COUNT=2567
raw_length=3


{'title': 'Teakwood Leathers Navy & Red Medium Duffle Bag',
 'price': 1999.6,
 'currencyCode': 'INR',
 'originalPrice': 4999,
 'uri': 'https://www.tatacliq.com/p-MP000000017671743',
 'image_url': 'https://img.tatacliq.com/images/i11/437Wx649H/MP000000017671743_437Wx649H_202305242359101.jpeg',
 'description': 'Bag Type : Duffle, Capacity : 67, Closure Type : Zip, Color : Navy & Red, Feature1 : 2, Feature2 : Number, Feature3 : 12 Months manufacturer warranty, Height : 32, Length : 67, Material Type : Nylon, Size : Medium, Strap Type : Double Handle, Width : 42',
 'brand': 'Teakwood Leathers',
 'availability': 'IN_STOCK',
 'categories': ['Accessories', 'Mens Bags', 'Backpacks'],
 'color': 'Navy'}

In [24]:
gifts.get_product('Teakwood Leathers Navy & Red Medium Duffle Bag')

{'title': 'Teakwood Leathers Navy & Red Medium Duffle Bag',
 'price': 1999.6,
 'currencyCode': 'INR',
 'originalPrice': 4999,
 'uri': 'https://www.tatacliq.com/p-MP000000017671743',
 'image_url': 'https://img.tatacliq.com/images/i11/437Wx649H/MP000000017671743_437Wx649H_202305242359101.jpeg',
 'description': 'Bag Type : Duffle, Capacity : 67, Closure Type : Zip, Color : Navy & Red, Feature1 : 2, Feature2 : Number, Feature3 : 12 Months manufacturer warranty, Height : 32, Length : 67, Material Type : Nylon, Size : Medium, Strap Type : Double Handle, Width : 42',
 'brand': 'Teakwood Leathers',
 'availability': 'IN_STOCK',
 'categories': ['Accessories', 'Mens Bags', 'Backpacks'],
 'color': 'Navy'}

# Tool