---

### Import your Necessary Libraries

---

In [1]:
# Do I even need to say anything?
import pandas as pd
import numpy as np

# Plots for days
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp

# Useful datetime functions
from datetime import date
from datetime import timedelta
from datetime import datetime

# Sentiment polarity analysis
from textblob import TextBlob 

# Save your models, reuse them
import pickle

# Fit, transform, all you want
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# The main powerhouse of our project
from openbb_terminal.sdk import openbb

---

### Lets get the Sentiment for your company

---

In [43]:
# Choose your company for the review
# You can list multiple companies in here if you wish to build a larger dataframe
terms = ["AAPL","TSLA","AMZN","TSCO","SIVBQ"]

# Create the DataFrame for our module
combined_df = pd.DataFrame()

# This will give us the closing price for each line from the news pull
def get_closing_price(date, term):
    end_date = date + pd.DateOffset(days=1) # Add 1 day to include the current date
    start_date = date - pd.DateOffset(days=4) # Get a little bit extra in case of weekend
    stock_data = openbb.stocks.load(symbol=term, start_date=start_date, end_date=end_date)
    if not stock_data.empty:
        closing_price = stock_data['Close'].iloc[-1] # Get the closing price for the last row
        return closing_price
    else:
        return None
    
# get_closing_price 3 days later function
def get_closing_price_3d(date, term):
    end_date = date + pd.DateOffset(days=4) # Add 4 day to get close 3 days later
    start_date = date - pd.DateOffset(days=4)
    stock_data = openbb.stocks.load(symbol=term, start_date=start_date, end_date=end_date)
    if not stock_data.empty:
        closing_price_7d = stock_data['Close'].iloc[-1] # Get the closing price for the last row
        return closing_price_7d
    else:
        return None

In [None]:
# If there are multiple tickers it will loop through all of them and combine their news articles into 1 dataframe
for term in terms:
    # OpenBB has a .news() function which will pull in 100 recent news articles related to your Stock
    news = openbb.news(term= term, sort= "published")
    df = pd.DataFrame({'Term': term, 'Title': news['title'], 'Date': news['published'], 'Link': news['link']})
    df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%Y-%m-%d')

    # Concat the current dataframe to the combined_df we defined earlier
    combined_df = pd.concat([combined_df, df], ignore_index=True)

# This makes sure the date stays as DateTime for future use
combined_df['Date'] = combined_df['Date'].astype('datetime64[ns]')
    
# Update combined_df to include 'Close' column
combined_df['Close'] = combined_df.apply(lambda row: get_closing_price(row['Date'], row['Term']), axis=1)
    
# Update combined_df to include 'Close_3D' column
combined_df['Close_3D'] = combined_df.apply(lambda row: get_closing_price_3d(row['Date'], row['Term']), axis=1)

# Start Assigning Sentiment
# Create sentiment_labels list
sentiment_labels = []

# Loop through all of the text and assign labels as 'Sentiment'
for text in combined_df['Title']:
    blob = TextBlob(text)
    sentiment = blob.sentiment.polarity
    if sentiment > 0:
        sentiment_labels.append('positive')
    elif sentiment < 0:
        sentiment_labels.append('negative')
    else:
        sentiment_labels.append('neutral')

combined_df['Sentiment'] = sentiment_labels

# Calculate the difference in the shift for the 3 days following the day of sentiment
combined_df['Difference'] = combined_df['Close_3D'] - combined_df['Close']
combined_df

In [45]:
combined_df

Unnamed: 0,Term,Title,Date,Link,Close,Close_3D,Sentiment,Difference
0,AAPL,New Study Published in Physician Leadership Jo...,2023-04-20,https://news.google.com/rss/articles/CBMiWmh0d...,166.649994,166.649994,positive,0.000000
1,AAPL,"Apple may triple investment in India, deputy t...",2023-04-20,https://news.google.com/rss/articles/CBMi0AFod...,166.649994,166.649994,neutral,0.000000
2,AAPL,Adding to Our Cash Position by Trimming 4 Well...,2023-04-20,https://news.google.com/rss/articles/CBMib2h0d...,166.649994,166.649994,neutral,0.000000
3,AAPL,Guru Fundamental Report for AAPL - Warren Buff...,2023-04-20,https://news.google.com/rss/articles/CBMiUmh0d...,166.649994,166.649994,neutral,0.000000
4,AAPL,Apple Inc.'s (NASDAQ:AAPL) institutional share...,2023-04-20,https://news.google.com/rss/articles/CBMiUWh0d...,166.649994,166.649994,positive,0.000000
...,...,...,...,...,...,...,...,...
497,SIVBQ,"Comerica Inc. stock rises Wednesday, outperfor...",2023-03-29,https://news.google.com/rss/articles/CBMicWh0d...,0.970000,0.905000,neutral,-0.065000
498,SIVBQ,Mphasis Clarifies Having No Exposure to Failed...,2023-03-29,https://news.google.com/rss/articles/CBMibmh0d...,0.970000,0.905000,negative,-0.065000
499,SIVBQ,U.S. stocks lower at close of trade; Dow Jones...,2023-03-28,https://news.google.com/rss/articles/CBMigAFod...,0.400000,0.905000,negative,0.505000
500,SIVBQ,"Citizens Financial, Fifth Third among oversold...",2023-03-28,https://news.google.com/rss/articles/CBMifGh0d...,0.400000,0.905000,neutral,0.505000


---

### Single out the most frequent instance of 'Sentiment' by 'Date'

---

In [46]:
# Group by 'Date', 'Term', and 'Sentiment', and count occurrences
sentiment_counts = combined_df.groupby(['Date', 'Term', 'Sentiment', 'Close', 'Close_3D', 'Difference']).size().reset_index(name='Count')

# Get the sentiment with the highest count for each group
idx = sentiment_counts.groupby(['Date', 'Term'])['Count'].idxmax()

sentiment_max_count = sentiment_counts.loc[idx, ['Date', 'Term', 'Sentiment', 'Close', 'Close_3D', 'Difference']]

sentiment_max_count = sentiment_max_count.sort_values(by='Date', ascending=True)
sentiment_max_count

Unnamed: 0,Date,Term,Sentiment,Close,Close_3D,Difference
0,2022-04-03,TSCO,positive,226.710007,224.919998,-1.790009
1,2022-08-15,TSCO,neutral,198.210007,205.210007,7.000000
2,2022-09-12,TSCO,positive,202.649994,187.960007,-14.689987
3,2022-09-16,TSCO,neutral,190.020004,192.979996,2.959991
4,2022-10-03,TSCO,positive,194.119995,200.080002,5.960007
...,...,...,...,...,...,...
309,2023-04-20,TSCO,neutral,249.000000,249.000000,0.000000
302,2023-04-20,AAPL,neutral,166.649994,166.649994,0.000000
305,2023-04-20,AMZN,neutral,103.809998,103.809998,0.000000
307,2023-04-20,SIVBQ,neutral,0.763250,0.760100,-0.003150


In [47]:
# Convert date object to pandas Timestamp
end_date = pd.Timestamp(date.today()) + pd.DateOffset(days=1)
start_date = end_date - pd.DateOffset(days=31)  # 30 days ago
last_30_days_df = sentiment_max_count.loc[sentiment_max_count['Date'] >= start_date]
last_30_days_df

Unnamed: 0,Date,Term,Sentiment,Close,Close_3D,Difference
123,2023-03-22,AAPL,positive,157.830002,160.250000,2.419998
125,2023-03-22,AMZN,neutral,98.699997,98.129997,-0.570000
127,2023-03-22,TSCO,negative,228.839996,227.210007,-1.629990
128,2023-03-22,TSLA,neutral,191.149994,190.410004,-0.739990
129,2023-03-23,AAPL,neutral,158.929993,160.250000,1.320007
...,...,...,...,...,...,...
309,2023-04-20,TSCO,neutral,249.000000,249.000000,0.000000
302,2023-04-20,AAPL,neutral,166.649994,166.649994,0.000000
305,2023-04-20,AMZN,neutral,103.809998,103.809998,0.000000
307,2023-04-20,SIVBQ,neutral,0.763250,0.760100,-0.003150


In [48]:
# Create a line plot with Plotly
fig = px.line(last_30_days_df, x='Date', y='Difference', color='Term', labels={'Difference': 'Difference'})

# Add sentiment information as annotations
for index, row in sentiment_max_count.iterrows():
    date = row['Date']
    term = row['Term']
    sentiment = row['Sentiment']
    if sentiment in ['positive','negative']:
        matching_rows = last_30_days_df[(last_30_days_df['Date'] == date) & (last_30_days_df['Term'] == term)]
        if not matching_rows.empty:
            closing_price = matching_rows.iloc[0]['Difference']
            fig.add_annotation(x=date, y=closing_price, text=sentiment, showarrow=True)

# Set plot title
fig.update_layout(title='Difference w/ Date with Sentiment (Highest Count)')

# Show the plot
fig.show()

---

### What If You Wanted To Quickly Pull-up Date Information

---

In [50]:
# Create a new variable with the database filtered elements
date_filter = combined_df[(combined_df['Date'] == '2023-04-02') & (combined_df['Term'] == 'TSLA')]

# Quick Trick To Get The Link In Click-able Format
markdown = date_filter.to_markdown()
print(markdown)

|     | Term   | Title                                                                      | Date                | Link                                                                                                                                                                                 |   Close |   Close_3D | Sentiment   |   Difference |
|----:|:-------|:---------------------------------------------------------------------------|:--------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------:|-----------:|:------------|-------------:|
| 140 | TSLA   | Tesla (TSLA) confirms new record deliveries, beats expectations - Electrek | 2023-04-02 00:00:00 | https://news.google.com/rss/articles/CBMiYGh0dHBzOi8vZWxlY3RyZWsuY28vMjAyMy8wNC8wMi90ZXNsYS10c2xhLWNvbmZpcm1zLXExLTIwMjMtcmVjb3JkLWRlbGl2ZXJpZXMtYmVhdHMtZXhwZWN0YXRpb25zL9IBAA?oc=5 |

---

### Lets Build a Model, Why Not?

---

In [51]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(combined_df['Title'], combined_df['Sentiment'], test_size=0.5, random_state=55)

# We vectorize the words into numerical features
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# And train a logistic regression model with those features
lr = LogisticRegression()
lr.fit(X_train_vec, y_train)

# Save your model for later implementation
with open('model.pkl', 'wb') as f:
    pickle.dump(lr, f)

with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

y_pred = lr.predict(X_test_vec)

# Make your predictions and see how we did
def evaluate_model(model, X_test, y_test):    

    # Use a classification report to evaluate the model using the predictions and testing data
    report = classification_report(y_test, y_pred)
    
    return report

report = evaluate_model(lr, X_test_vec, y_test)
print(report)

              precision    recall  f1-score   support

    negative       1.00      0.07      0.14        27
     neutral       0.68      0.83      0.75       142
    positive       0.61      0.56      0.58        82

    accuracy                           0.66       251
   macro avg       0.76      0.49      0.49       251
weighted avg       0.69      0.66      0.63       251



In [52]:
sentiment_counts = pd.Series(y_pred).value_counts()
general_sentiment = sentiment_counts.idxmax()
print(f"The general sentiment for {terms} is:", general_sentiment)
print(sentiment_counts)

The general sentiment for ['AAPL', 'TSLA', 'AMZN', 'TSCO', 'SIVBQ'] is: neutral
neutral     173
positive     76
negative      2
dtype: int64


---

### Thank You For Watching

---