In [1]:
from typing import TypedDict, Annotated, Sequence
from typing_extensions import TypedDict

from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, FunctionMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import END, StateGraph
from langgraph.graph.message import add_messages
from langchain.tools import BaseTool, StructuredTool, Tool, tool
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain import hub
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser

# Your further code goes here


In [2]:
import re
import time
import pandas as pd
from pprint import pprint

from gsheet_handler import GSheetHandler
import importlib
module = importlib.import_module('gsheet_handler')
importlib.reload(module)
ggl = module.GSheetHandler('google_sheet_key.json')

data_raw = pd.read_csv('/home/sftpuser1/uploads/data.csv')[:5]
data_raw

no docx
no docx


Unnamed: 0,link,reference_time,article_time,title,text,author,tags
0,https://corporate.arcelormittal.com/media/news...,2024-05-29 12:53:23.132432,2024-05-22 00:00:00,Prime Minister of Belgium and Federal Minister...,"On May 22, Prime Minister Alexander De Croo an...",,
1,https://corporate.arcelormittal.com/media/news...,2024-05-29 12:53:23.164944,2024-04-02 00:00:00,ArcelorMittal launches ‘The Steel Works’,New content series features insights from Arce...,,"Climate Change,Automotive,Renewables,Infrastru..."
2,https://corporate.arcelormittal.com/media/news...,2024-05-29 12:53:23.164079,2024-04-22 00:00:00,U.S. Department of Energy announces financial ...,"April 22, 2024, Schererville, IN: ArcelorMitta...",,
3,https://corporate.arcelormittal.com/media/news...,2024-05-29 12:53:23.163250,2024-05-10 00:00:00,ArcelorMittal starts the construction of an el...,"La Granda (Gozón), 10 May 2024\nThe constructi...",,
4,https://corporate.arcelormittal.com/media/news...,2024-05-29 12:53:23.162187,2024-05-21 00:00:00,Trial carbon capture unit begins operating on ...,ArcelorMittal and partners Mitsubishi Heavy In...,,


In [3]:
# main.py
import json
import os
import telebot

# Загрузка конфигурации из JSON-файла
with open('config.json') as config_file:
    config = json.load(config_file)

# Установка переменных окружения
os.environ["LANGCHAIN_API_KEY"] = config["LANGCHAIN_API_KEY"]
os.environ["OPENAI_API_KEY"] = config["OPENAI_API_KEY"]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"]="nlmk_bot"

In [4]:
from datetime import datetime
from langchain import hub

def evaluate_news_article(link, nes_text, news_data, news_topic):
    llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
    prompt = hub.pull("mlenparrot/nlmk_grade")
    today = datetime.now().strftime('%Y-%m-%d')
    params = {
        'link': link,
        'news_text': nes_text,
        'news_data': news_data,
        'news_topic': news_topic,
        'today': today
    }
    chain = prompt | llm
    result = chain.invoke(params)
    return result

result = evaluate_news_article(
    link='https://example.com/news-article',
    nes_text='This is a sample news text.',
    news_data='2024-06-01',
    news_topic='Sample News Topic'
)

print(result)

{'tag_extraction': ['металлургия', 'ИТ'], 'Source_Authority': 4, 'Relevance_for_NLMK': 3, 'Legislative_Changes': 2, 'Novelty_and_Relevance': 4, 'competitor_extraction': [], 'Events_and_Conferences': 3, 'Competitors_and_Startups': 2, 'Products_and_Technologies': 3, 'Impact_on_the_Industry_and_Market': 4}


In [5]:
def apply_evaluation(row):
    result = evaluate_news_article(
        link=row['link'],
        nes_text=row['text'],
        news_data=row['article_time'],
        news_topic=row['title']
    )
    return pd.Series(result)

In [6]:
from tqdm import tqdm
tqdm.pandas()
data_news = data_raw.copy()
evaluation_results = data_news.progress_apply(apply_evaluation, axis=1)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:15<00:00,  3.10s/it]


In [7]:
data_news_rated = pd.concat([data_news, evaluation_results], axis=1).drop(['reference_time', 'author', 'tags'], axis=1)
data_news_rated

Unnamed: 0,link,article_time,title,text,tag_extraction,Source_Authority,Relevance_for_NLMK,Legislative_Changes,Novelty_and_Relevance,competitor_extraction,Events_and_Conferences,Competitors_and_Startups,Products_and_Technologies,Impact_on_the_Industry_and_Market
0,https://corporate.arcelormittal.com/media/news...,2024-05-22 00:00:00,Prime Minister of Belgium and Federal Minister...,"On May 22, Prime Minister Alexander De Croo an...","[новости о металлургической промышленности, но...",4,4,3,5,[],4,3,5,4
1,https://corporate.arcelormittal.com/media/news...,2024-04-02 00:00:00,ArcelorMittal launches ‘The Steel Works’,New content series features insights from Arce...,"[новейшие продукты в металлургии, инновации в ...",4,3,0,5,[ArcelorMittal],3,4,5,4
2,https://corporate.arcelormittal.com/media/news...,2024-04-22 00:00:00,U.S. Department of Energy announces financial ...,"April 22, 2024, Schererville, IN: ArcelorMitta...","[новейшие продукты в металлургии, новизна и ак...",4,3,0,5,[ArcelorMittal],0,4,5,4
3,https://corporate.arcelormittal.com/media/news...,2024-05-10 00:00:00,ArcelorMittal starts the construction of an el...,"La Granda (Gozón), 10 May 2024\nThe constructi...","[новейшие продукты в ИТ и металлургии, инновац...",4,3,0,5,[ArcelorMittal],3,4,5,4
4,https://corporate.arcelormittal.com/media/news...,2024-05-21 00:00:00,Trial carbon capture unit begins operating on ...,ArcelorMittal and partners Mitsubishi Heavy In...,"[новейшие продукты в ИТ и металлургии, инновац...",4,3,2,5,"[BHP, Mitsubishi Development]",4,3,5,4


In [8]:
ggl.update_gsheet_with_df(data_news_rated,
                          'https://docs.google.com/spreadsheets/d/1cIB0yjbP-uInz7k0rs7zRruyct7C3Cks3P7DUGZTFlc/edit?usp=sharing', 
                          0)

In [9]:
hub.pull("mlenparrot/nlmk_grade")

StructuredPrompt(input_variables=['link', 'news_data', 'news_text', 'news_topic', 'today'], metadata={'lc_hub_owner': 'mlenparrot', 'lc_hub_repo': 'nlmk_grade', 'lc_hub_commit_hash': 'a2268bccdbdcb6c263eace3c5fc8272315740b05605b3594fa7915156b7f2efd'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['today'], template='Какую проблему решаем?\nИТ подразделения НЛМК нуждаются в оперативной информации о новейших продуктах, инициативах конкурентов, инновациях в системах хранения и управления данными, ии, стартапах, законодательных требованиях, конференциях, открытиях и других событиях, которые могут повлиять на стратегию компании.\n\nЧто необходимо сделать?\nВыделение самой важной информации о новых продуктах, инициативах конкурентов, инновациях и других событиях.\n\nЦелевая аудитория\nИТ подразделения НЛМК, руководители и первые лица компаний, заинтересованные в оперативной информации о рынке и конкурентах.\n\nМетрики успеха проекта\nТочность сбора информации.\