In [27]:
import pandas as pd
df = pd.read_parquet('../data/news_feed_flattened.parquet')
# first entry

df.head(10)
# last entry

df.tail(10)
# number of entries

df.link.nunique()

49

In [28]:
# Display DataFrame columns and sample data
print("DataFrame columns:")
print(df.columns.tolist())
print("\nSample row:")
print(df.iloc[0])

DataFrame columns:
['type', 'start_date', 'end_date', 'ticker', 'count', 'growth', 'text', 'link', 'model']

Sample row:
type                                                 individual
start_date                                           2023-07-17
end_date                                             2023-07-24
ticker                                                     TSLA
count                                                        42
growth                                                    2.43%
text          Billionaire investor Chamath Palihapitiya beli...
link          https://pythoninvest.com/tpost/yk09rupzv1-week...
model                                                      None
Name: 0, dtype: object


In [29]:
from minsearch import Index

# Convert DataFrame to list of dictionaries with string values
docs = [{k: str(v) for k, v in record.items()} for record in df.to_dict('records')]

# Create index with correct DataFrame fields
index = Index(
    text_fields=["type", "start_date", "end_date", "ticker", "count", "growth", "text", "model"],
    keyword_fields=["link"]
)

# Fit the index
index.fit(docs)

<minsearch.minsearch.Index at 0x12698d5e0>

In [30]:
def print_results(results, num_results=5):
    """Print formatted search results.
    
    Args:
        results: List of search results from minsearch
        num_results: Number of results to display (default: 5)
    """
    for i, result in enumerate(results[:num_results], 1):
        print(f"Result {i}:")
        print(f"Text: {result['text'][:200]}...")
        print(f"Type: {result['type']}")
        print(f"Ticker: {result['ticker']}")
        print(f"Growth: {result['growth']}")
        print(f"Count: {result['count']}")
        print(f"Model: {result['model']}")
        print(f"Period: {result['start_date']} to {result['end_date']}")
        print(f"Link: {result['link']}")
        print("-" * 80)

def search_news(query, link=None, boost_dict=None):
    """Search news articles using minsearch.
    
    Args:
        query: Search query string
        link: Optional link to filter results
        boost_dict: Optional dictionary of field boost values
    
    Returns:
        List of search results
    """
    if boost_dict is None:
        boost_dict = {
            "text": 3,
            "type": 2,
            "ticker": 2,
            "growth": 1.5,
            "model": 1.5,
            "count": 1,
            "start_date": 1,
            "end_date": 1
        }
    
    filter_dict = {}
    if link:
        filter_dict["link"] = link
    
    return index.search(query, filter_dict=filter_dict, boost_dict=boost_dict)

In [31]:
# Test the search functionality
test_query = "technology growth"
results = search_news(test_query)

print(f"Search results for '{test_query}':\n")
print_results(results)

Search results for 'technology growth':

Result 1:
Text: Apple is gearing up for a potentially significant announcement, expected to involve artificial intelligence (AI) technology in its products. Analysts have revised the price target for Apple stock, ant...
Type: individual
Ticker: AAPL
Growth: 1.66%
Count: 10
Model: None
Period: 2024-05-21 to 2024-05-27
Link: https://pythoninvest.com/tpost/hcufmog3c1-week-21-27-may-2024
--------------------------------------------------------------------------------
Result 2:
Text: Tesla's stock has been in the spotlight, with anticipation building for the company's upcoming Robotaxi event on October 10th, which could showcase its progress in autonomous driving and AI technology...
Type: individual
Ticker: TSLA
Growth: 0.45%
Count: 13
Model: None
Period: 2024-09-24 to 2024-09-30
Link: https://pythoninvest.com/tpost/fukvxbgbl1-week-24-30-september-2024
--------------------------------------------------------------------------------
Result 3:
Text: T

In [32]:
# Test the search functionality
test_query = "news for NVDA"
results = search_news(test_query)

print(f"Search results for '{test_query}':\n")
print_results(results)

Search results for 'news for NVDA':

Result 1:
Text: Nvidia (NVDA) has been receiving positive attention from investors and analysts. The company's strong financial performance and above-average growth make it an appealing investment option. Despite fai...
Type: individual
Ticker: NVDA
Growth: 1.78%
Count: 52
Model: None
Period: 2023-08-22 to 2023-08-28
Link: https://pythoninvest.com/tpost/ec9rcf2xc1-week-22-28-august-2023
--------------------------------------------------------------------------------
Result 2:
Text: NVIDIA Corporation (NVDA) has been gaining investor attention as it continues to dominate the artificial intelligence (AI) market. The company's stock is receiving positive reviews and attention from ...
Type: individual
Ticker: NVDA
Growth: -0.2%
Count: 34
Model: None
Period: 2024-01-05 to 2024-01-15
Link: https://pythoninvest.com/tpost/hk2eoc0281-week-9-15-january-2024
--------------------------------------------------------------------------------
Result 3:
Text: The n

In [34]:
# Test the search functionality
test_query = "all entries with more than 100 news count"
results = search_news(test_query)

print(f"Search results for '{test_query}':\n")
print_results(results)

Search results for 'all entries with more than 100 news count':

Result 1:
Text: STEM's Q2 earnings are expected to benefit from strong momentum in its solar and storage services revenues. Vornado Realty Trust reported better-than-expected Q2 earnings, driven by decent top-line gr...
Type: market_1_week
Ticker: multiple_tickers
Growth: None
Count: 2408
Model: MARKET NEWS SUMMARY
Period: 2023-08-01 to 2023-08-07
Link: https://pythoninvest.com/tpost/drued3aod1-week-1-7-august-2023
--------------------------------------------------------------------------------
Result 2:
Text: Lake Street upgraded NN Inc from Hold to Buy. B of A Securities upgraded Sendas Distribuidora SA from Neutral to Buy. Morgan Stanley upgraded Curtiss-Wright Corp from Equal-Weight to Overweight. UBS u...
Type: market_1_day
Ticker: multiple_tickers
Growth: None
Count: 250
Model: MARKET NEWS SUMMARY
Period: 2023-08-06 to 2023-08-07
Link: https://pythoninvest.com/tpost/drued3aod1-week-1-7-august-2023
------------------

In [37]:
# Test the search functionality
test_query = "all entries market 1 day summary"
results = search_news(test_query)

print(f"Search results for '{test_query}':\n")
print_results(results)

Search results for 'all entries market 1 day summary':

Result 1:
Text: STEM's Q2 earnings are expected to benefit from strong momentum in its solar and storage services revenues. Vornado Realty Trust reported better-than-expected Q2 earnings, driven by decent top-line gr...
Type: market_1_week
Ticker: multiple_tickers
Growth: None
Count: 2408
Model: MARKET NEWS SUMMARY
Period: 2023-08-01 to 2023-08-07
Link: https://pythoninvest.com/tpost/drued3aod1-week-1-7-august-2023
--------------------------------------------------------------------------------
Result 2:
Text: for the last 24 hours from 31/07/2023 21:55 UTC time: Associated Banc-Corp's (ASB) profits are expected to be impacted by rising expenses. AB InBev's (BUD) Q2 earnings are expected to benefit from its...
Type: market_1_day
Ticker: multiple_tickers
Growth: None
Count: 289
Model: 1 day
Period: 2023-07-30 to 2023-07-31
Link: https://pythoninvest.com/tpost/j1x7ayspd1-week-25-31-july-2023
-----------------------------------------

In [38]:
# Test the search functionality
test_query = "all entries market 1 week summary"
results = search_news(test_query)

print(f"Search results for '{test_query}':\n")
print_results(results)

Search results for 'all entries market 1 week summary':

Result 1:
Text: This week's financial news was characterized by fluctuating markets and mixed investor sentiment. Market participants closely monitored ongoing economic recovery efforts, while grappling with concerns...
Type: market_1_week
Ticker: multiple_tickers
Growth: None
Count: 2210
Model: week
Period: 2023-08-08 to 2023-08-14
Link: https://pythoninvest.com/tpost/u7h0i2kxy1-week-8-14-august-2023
--------------------------------------------------------------------------------
Result 2:
Text: STEM's Q2 earnings are expected to benefit from strong momentum in its solar and storage services revenues. Vornado Realty Trust reported better-than-expected Q2 earnings, driven by decent top-line gr...
Type: market_1_week
Ticker: multiple_tickers
Growth: None
Count: 2408
Model: MARKET NEWS SUMMARY
Period: 2023-08-01 to 2023-08-07
Link: https://pythoninvest.com/tpost/drued3aod1-week-1-7-august-2023
--------------------------------------