## Basic Setting & Installation

In [3]:
from google.colab import drive
from google.colab import userdata
import os
drive.mount("/content/drive")

if "OPENAI_API_KEY" not in os.environ:
  os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
%pip -q install tiktoken langchain langchain-core langchain-community langchain-openai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.7/54.7 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h

## Data Load

In [32]:
import pandas as pd
import os

base_path = "/content/drive/MyDrive/Profect SAE/test_data"
input_file = os.path.join(base_path, "test_page_data_preprocessed.csv")

if os.path.exists(input_file):
  print("input: good to go")
else:
  print("input: not good")

df = pd.read_csv(input_file)
title = df["Title"].to_list()
url = df["URL"].to_list()
cleaned_text = df["cleaned_innerText"].to_list()

input: good to go


## Common Methods

In [46]:
def prompt_printer(prompt, input="Test"):
  print(prompt.format(input=input))

In [52]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

def mvp_chain(template, model_name):
  llm = ChatOpenAI(model_name=model_name, temperature=0)
  chain = template | llm | StrOutputParser()
  return chain

## LangChain + OpenAI

### MVP Text - (Preprocess) -> OpenAI
- Preprocess Done
- OpenAI
  - ChatPromptTemplate

In [None]:
from langchain_core.prompts import (
    ChatPromptTemplate
)

system_prompt = """
You are a helpful assistant that recommends stocks based on the given input.
The input is collected data from a web page of the user and it contains three different types of information separated by [SEP]

- Title: The title of the web page.
- URL: The url of the web page.
- Page Information: Text content of the web page.

You have to use above information to find relevant stocks.

**Important**
- You need to find three relevant stocks from KOSPI and NASDAQ.
- Don't generate if you can't find any.
- Generate the result in the following format.
[
  {{
    "status": true if the stock is found, false otherwise,
    [
      {{
        "stock_name": Official name of the stock,
        "ticker_symbol": Ticker symbol of the stock,
        "market_name": Name of the market where the stock is traded
      }}
      ...
    ]
  }}
]
"""


final_prompt = ChatPromptTemplate.from_messages(
    [
        ('system', system_prompt),
        ('human', '{input}'),
    ]
)

## Test

In [55]:
import json

SEP = "[SEP]"

stock_generator = mvp_chain(final_prompt, "gpt-4o-mini")

print("Test Start!")
for i in range(len(df)):
  given_title =  "Title: " +title[i]
  given_url = "URL: " + url[i]
  given_text = "Page Information: " + cleaned_text[i]
  user_input = given_title + SEP + given_url + SEP + given_text
  print(f"{i+1}. {given_title}")
  print(f"URL: {given_url}")
  print()
  # print(json.loads(stock_generator.invoke({"input": user_input})))
  print(stock_generator.invoke({"input": user_input}))
  print("-------------")
  print("")


Test Start!
1. Title: 빙속 김준호, 남자 500m 동메달…100m 3위 이어 '멀티 메달' 성공 [하얼빈 현장]
URL: URL: https://m.sports.naver.com/general/article/311/0001825451

[
  {
    "status": true,
    "stocks": [
      {
        "stock_name": "Korea Electric Power Corporation",
        "ticker_symbol": "KEP",
        "market_name": "KOSPI"
      },
      {
        "stock_name": "Samsung Electronics Co., Ltd.",
        "ticker_symbol": "005930",
        "market_name": "KOSPI"
      },
      {
        "stock_name": "NVIDIA Corporation",
        "ticker_symbol": "NVDA",
        "market_name": "NASDAQ"
      }
    ]
  }
]
-------------

2. Title: [국제]중국, 보복 관세 발효...'조용한 기싸움' 언제까지? | YTN


URL: URL: https://www.ytn.co.kr/_cs/_ln_0104_202502101459265615_005.html

[
  {
    "status": true,
    "stocks": [
      {
        "stock_name": "Nucor Corporation",
        "ticker_symbol": "NUE",
        "market_name": "NYSE"
      },
      {
        "stock_name": "Steel Dynamics, Inc.",
        "ticker_symbol": "STLD",
        "m