### **Configuration**

In [None]:
# from newsapi import NewsApiClient

# newsapi = NewsApiClient(api_key="03122dc3b7b84ea29212ca965b40c7aa")
# sources = newsapi.get_sources()
# for source in sources["sources"]:
# 	print(source["id"])


In [4]:
# Dependencies
import os
from config import DARTMOUTH_API_KEY, DARTMOUTH_CHAT_API_KEY
from langchain_dartmouth.llms import ChatDartmouth

# Retrieving keys and creating environment variables
os.environ['DARTMOUTH_CHAT_API_KEY'] = DARTMOUTH_CHAT_API_KEY
os.environ['DARTMOUTH_API_KEY'] = DARTMOUTH_API_KEY

# Defining llm and embeddings models
llm_model_name = "openai.gpt-4o-mini-2024-07-18"
embeddings_model_name = "bge-m3"

# Defining keywords and sources
keywords = "antitrust"
source="breitbart-news"

# Defining testing data file
testing_data = '../input/antitrust.json'

In [24]:
from langchain_dartmouth.llms import DartmouthLLM

ChatDartmouth.list()

[{'name': 'llama-3-8b-instruct',
  'provider': 'meta',
  'display_name': 'Llama 3 8B Instruct',
  'tokenizer': 'meta-llama/Meta-Llama-3-8B-Instruct',
  'type': 'llm',
  'capabilities': ['chat'],
  'server': 'text-generation-inference',
  'parameters': {'max_input_tokens': 8192}},
 {'name': 'llama-3-2-11b-vision-instruct',
  'provider': 'meta',
  'display_name': 'Llama 3.2 11B Vision Instruct',
  'tokenizer': 'meta-llama/Llama-3.2-11B-Vision-Instruct',
  'type': 'llm',
  'capabilities': ['chat', 'vision'],
  'server': 'text-generation-inference',
  'parameters': {'max_input_tokens': 127999}},
 {'name': 'codellama-13b-instruct-hf',
  'provider': 'meta',
  'display_name': 'CodeLlama 13B Instruct HF',
  'tokenizer': 'meta-llama/CodeLlama-13b-Instruct-hf',
  'type': 'llm',
  'capabilities': ['chat'],
  'server': 'text-generation-inference',
  'parameters': {'max_input_tokens': 6144}}]

In [26]:
from langchain_dartmouth.llms import ChatDartmouthCloud
ChatDartmouthCloud.list()

[{'name': 'anthropic.claude-3-5-haiku-20241022',
  'provider': 'anthropic',
  'type': 'llm',
  'capabilities': ['chat'],
  'server': 'dartmouth-chat',
  'parameters': {}},
 {'name': 'anthropic.claude-3-7-sonnet-20250219',
  'provider': 'anthropic',
  'type': 'llm',
  'capabilities': ['chat', 'vision'],
  'server': 'dartmouth-chat',
  'parameters': {}},
 {'name': 'openai.gpt-4o-mini-2024-07-18',
  'provider': 'openai',
  'type': 'llm',
  'capabilities': ['chat', 'vision'],
  'server': 'dartmouth-chat',
  'parameters': {}},
 {'name': 'openai.gpt-4o-2024-08-06',
  'provider': 'openai',
  'type': 'llm',
  'capabilities': ['chat', 'vision'],
  'server': 'dartmouth-chat',
  'parameters': {}},
 {'name': 'openai.o3-mini-2025-01-31',
  'provider': 'openai',
  'type': 'llm',
  'capabilities': ['chat'],
  'server': 'dartmouth-chat',
  'parameters': {}},
 {'name': 'google_genai.gemini-2.0-flash-001',
  'provider': 'google_genai',
  'type': 'llm',
  'capabilities': ['chat', 'vision'],
  'server': '

### **Building Knowledge Base**

In [5]:
# Importing dependencies
import requests
import os
from bs4 import BeautifulSoup
from newsapi import NewsApiClient
import re

# Creating directory to hold scraped articles
os.makedirs(name=f"../knowledge-bases/{keywords}_{source}")

# Creating newsapi client
newsapiclient = NewsApiClient(api_key="03122dc3b7b84ea29212ca965b40c7aa")

# Querying articles
articles = newsapiclient.get_everything(q=keywords, sources=source ,page_size=100)
articles = articles['articles']

# Scraping articles and saving in directory
for article in articles:
	url = article['url']
	response = requests.get(url=url)

	# Printing article content to directory if valid response
	if response.status_code==200:
		beautifulsoup = BeautifulSoup(response.content, "html.parser")
		article_paragraphs = beautifulsoup.find_all("p")

		# Cleaning article title
		article_title = re.sub(' ', '_', article['title'])

		with open(file=f"../knowledge-bases/{keywords}_{source}/{article_title}.txt", mode="w") as fp:
			for paragraph in article_paragraphs:
				paragraph_cleaned = str(paragraph.get_text()).strip()

				if paragraph_cleaned != "":
					fp.write(paragraph_cleaned)

### **Loading and Splitting Documents**

In [7]:
# Importing dependencies
from langchain.document_loaders import DirectoryLoader
from langchain_text_splitters import CharacterTextSplitter

# Defining directory path
directory = f"../knowledge-bases/{keywords}_{source}"

# Creating tokenizer

# Creating loader and splitter
loader = DirectoryLoader(path=directory, glob="*.txt")
splitter = CharacterTextSplitter.from_tiktoken_encoder(encoding_name="cl100k_base", 
                                                       chunk_size=256, chunk_overlap=0)

# Loading and splitting documents
docs = loader.load_and_split(text_splitter=splitter)

### **Embedding and Storing Documents**

In [8]:
# Importing dependencies
from langchain_dartmouth.embeddings import DartmouthEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

# Creating embeddings model
embeddings = DartmouthEmbeddings(model_name=embeddings_model_name, dartmouth_api_key=str(DARTMOUTH_API_KEY))

# Embedding documents and storing them in memory
vector_store = InMemoryVectorStore(embedding=embeddings)

for i in range(0, len(docs), 50):
	_ = vector_store.add_documents(docs[i: i+100])

### **Retrieval and Generation**

In [17]:
# Importing dependencies
from langchain_dartmouth.llms import ChatDartmouthCloud
import json

# Initializing variable referencing LLM
llm = ChatDartmouthCloud(model_name=llm_model_name)

# Open testing data file
with open(testing_data, 'r') as fp:
	test_data = json.load(fp)

counter = 1

# Iterating through each test data point
for tweet in test_data:

	# Retrieving most-similar documents
	query = tweet['Tweet']

	print("Query,", query)
	docs = vector_store.similarity_search(query, k=5)
	
	# Creating augmented prompt
	prompt = (
		"Classify as 'Real News' or 'Fake News': "
		+ query
		+ f"\n\nConsider the following info: \n\n"
		+ "Only respond with the classification"
	)

	for doc in docs:
		prompt += doc.page_content + "\n--\n"

	# Querying LLM and printing response to file
	response = llm.invoke(prompt)
	output = response.pretty_repr()

	with open(f"../output/{source}_monopoly", "a") as fp:
		print(output, file=fp)

Query, BREAKING: The US government has just announced its intention to split up Google's parent company Alphabet into 6 separate entities, citing antitrust concerns.
Query, Did you know that the majority of the world's largest corporations are now owned by just 5 families? It's time to take on the oligarchs and promote competition!
Query, Amazon and Walmart have agreed to merge, creating a retail giant with unparalleled power over the global market.
Query, The EU's antitrust regulators have just fined Google a record $10 billion for abusing its dominance in the search market.
Query, Bill Gates has come out in support of breaking up Microsoft, citing concerns over the company's anti-competitive practices.
Query, The US Justice Department has launched an investigation into Facebook's acquisition of Instagram, citing antitrust concerns.
Query, Apple has just announced that it will be acquiring Spotify in a deal worth $20 billion.
Query, The US Federal Trade Commission has announced that i

### **Evaluation**