### Installing Neccessary Packages

In [1]:
import os
from langchain import HuggingFaceHub
from dotenv import load_dotenv

### Connecting To Llama

In [3]:

# Load environment variables from the .env file
load_dotenv()

# Fetch the Hugging Face token
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")

# Verify that the token is loaded
if not huggingface_token:
    raise ValueError("Hugging Face token not found. Please check your .env file.")
else:
    print("Hugging Face token loaded successfully.")

# Initialize the LLaMA-2 model
llama_model_id = "meta-llama/Llama-2-70b-chat-hf"

try:
    print("Establishing connection with the LLaMA-2 model on Hugging Face...")
    llm = HuggingFaceHub(
        repo_id=llama_model_id,
        huggingfacehub_api_token=huggingface_token,
        model_kwargs={"temperature": 0.0}
    )
    print("Successfully connected to LLaMA-2 model.")
except Exception as e:
    print(f"Error connecting to LLaMA-2 model: {e}")

Hugging Face token loaded successfully.
Establishing connection with the LLaMA-2 model on Hugging Face...
Successfully connected to LLaMA-2 model.


In [None]:
# Test prompt
prompt = "Write a poem on langchain"
response = llm(prompt)
print("Response from LLaMA-2:", response)

  response = llm(prompt)


### Connecting to Embedding Model

### Initializing Pinecone Connection

In [19]:
print("Initializing the connection to the Pinecone Vector Database.")
load_dotenv()  # Load environment variables from the .env file
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_HOST_NAME = os.getenv('PINECONE_HOST_NAME')
pc = Pinecone(api_key = PINECONE_API_KEY)
pc
print("Initialized the connection to the Pinecone Vector Database.")

Initializing the connection to the Pinecone Vector Database.
Initialized the connection to the Pinecone Vector Database.


### Connecting to Google News API

In [21]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import os

def fetch_fair_lending_articles():
    # Pull the NEWS_API_KEY from environment variables
    NEWS_API_KEY = os.getenv('NEWS_API_KEY')

    # Check if API key is available
    if not NEWS_API_KEY:
        raise ValueError("Please set the 'NEWS_API_KEY' environment variable.")

    # Define the base URL for NewsAPI
    url = 'https://newsapi.org/v2/everything'

    # Get the current date and the date from 7 days ago in YYYY-MM-DD format
    current_date = datetime.now().strftime('%Y-%m-%d')
    last_week_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')

    # Define the search query with specific keywords and phrases
    search_query = (
        '("fair lending" OR "disparate treatment" OR "overt discrimination" OR '
        '"redlining" OR "mortgage discrimination" OR "banking discrimination" OR '
        '"racial discrimination in banking" OR "lending bias" OR "credit discrimination")'
    )

    # Set parameters for the request
    params = {
        'q': search_query,
        'from': last_week_date,      # Start date (7 days ago)
        'to': current_date,          # End date (today)
        'language': 'en',            # Filter to English articles
        'sortBy': 'publishedAt',     # Sort by recent publications
        'pageSize': 10,              # Limit to 10 articles
        'apiKey': NEWS_API_KEY
    }

    # Make the request to the NewsAPI
    response = requests.get(url, params=params)

    # Handle the response
    if response.status_code == 200:
        data = response.json()
        articles = data.get('articles', [])
        
        # Extract relevant metadata and store it in a DataFrame
        if articles:
            articles_data = []

            # Loop through each article to scrape and clean the full content
            for article in articles:
                article_data = {
                    'Title': article.get('title'),
                    'Author': article.get('author'),
                    'Source': article.get('source', {}).get('name'),
                    'Description': article.get('description'),
                    'URL': article.get('url'),
                    'Published At': article.get('publishedAt'),
                }

                # Scrape the full content of the article
                article_url = article.get('url')
                try:
                    page = requests.get(article_url)
                    soup = BeautifulSoup(page.content, 'html.parser')

                    # Extract paragraphs from the content
                    paragraphs = soup.find_all('p')
                    raw_content = ' '.join([para.get_text() for para in paragraphs])

                    # Clean the text by removing non-ASCII characters
                    cleaned_content = ''.join(filter(lambda x: x in set(map(chr, range(32, 127))), raw_content))

                    article_data['Content'] = cleaned_content
                except Exception as e:
                    article_data['Content'] = f"Error fetching content: {e}"

                articles_data.append(article_data)

            # Create a DataFrame from the list of dictionaries
            articles_df = pd.DataFrame(articles_data)
            return articles_df
        else:
            print("No articles found for the last week.")
            return pd.DataFrame()  # Return an empty DataFrame if no articles found
    else:
        raise Exception(f"Error: {response.status_code} - {response.text}")

# Example usage of the function
articles_df = fetch_fair_lending_articles()
articles_df


Unnamed: 0,Title,Author,Source,Description,URL,Published At,Content
0,"Ward Scull, 81, passionate Virginia advocate f...",DAVE RESS Richmond Times-Dispatch,Richmond.com,"Ward Scull fought the long, hard battle to sto...",https://richmond.com/news/state-regional/ward-...,2024-11-15T18:33:00Z,E-edition PLUS unlimited articles & videos Per...
1,Mortgage access for Black buyers blamed for wi...,Marian McPherson,Inman,Mortgage discrimination and appraisal bias are...,https://www.inman.com/2024/11/14/mortgage-acce...,2024-11-14T19:03:45Z,Mortgage discrimination and appraisal bias are...
2,On Building Git for Lawyers,Jordan Bryan,Substack.com,"Over this past weekend, Twitter discovered the...",https://jordanbryan.substack.com/p/on-building...,2024-11-14T15:48:45Z,"Over this past weekend, Twitter discovered the..."
3,Project 2025 Document Mentions Trump's Name Mo...,Aleksandra Wrona,Snopes.com,U.S. President-elect Donald Trump's name is li...,https://www.snopes.com//fact-check/trump-proje...,2024-11-14T14:00:00Z,"About this rating The word ""Trump"" appears 31..."
4,"If You Like Being Ripped Off By Comcast, You’l...",Karl Bode,Techdirt,Current FCC Commissioner Brendan Carr has spen...,https://www.techdirt.com/2024/11/13/if-you-lik...,2024-11-13T13:31:35Z,Predictions Current FCC Commissioner Brendan C...
5,Court: Connecticut High School Females’ Discri...,James Nault,Legalinsurrection.com,Connecticut federal court rules that Plaintiff...,https://legalinsurrection.com/2024/11/court-co...,2024-11-13T00:00:29Z,This website is using a security service to pr...
6,CAMEO CEO Carolina Martinez: The Untapped Powe...,"Rhett Buttle, Contributor, \n Rhett Buttle, Co...",Forbes,"As CEO of CAMEO, Carolina Martinez leads a net...",https://www.forbes.com/sites/rhettbuttle/2024/...,2024-11-12T15:57:27Z,"Carolina Martinez, CEO of the CAMEO Network A..."
7,Must the Professor Crusade? W. Ralph Eubanks o...,W. Ralph Eubanks,Lithub.com,“I’ve seen this movie before” is what I though...,https://lithub.com/must-the-professor-crusade-...,2024-11-12T13:41:54Z,Ive seen this movie before is what I thought a...
8,Dis/Trusting the Institution(s) of Literature,groenlat@tcd.ie,Upenn.edu,"updated: \r\nTuesday, November 12, 2024 - 5:41...",http://call-for-papers.sas.upenn.edu/cfp/2024/...,2024-11-12T10:19:01Z,Jump to navigation Call for Papers a service p...
9,I Sold a House Without an Agent — And It Saved...,Dana McMahan,Apartment Therapy,It’s totally worth it.\nREAD MORE...,https://www.apartmenttherapy.com/sell-a-house-...,2024-11-11T21:45:00Z,Access to this page has been denied because w...


### Filtering to only Fair Lending data

### Loading into vector database

### Creating a chain for chatbot