In [82]:
import requests
import json
import csv
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from datetime import datetime, timedelta

Fetches the articles using a NYT API and obtains the title, abstract, lead_paragraph, keywords and publication date and puts it into nyt_articles_details.json

In [83]:

url = 'https://api.nytimes.com/svc/archive/v1/2024/1.json?api-key=a2tKA0T1H3RhXAqw4VlTaSJq5BqejQ7g'

API_KEY = 'a2tKA0T1H3RhXAqw4VlTaSJq5BqejQ7g'

def fetch_articles(month):
    start_date = month.strftime('%Y%m01')
    end_date = (month + timedelta(days=31)).strftime('%Y%m01')
    params = {
        'begin_date': start_date,
        'end_date': end_date,
        'api-key': API_KEY
    }
    response = requests.get(url, params=params)
    return response.json()

def extract_article_details(articles):
    article_details = []
    for article in articles['response']['docs']:
        details = {
            'title' : article.get('headline').get('main'),
            'abstract': article.get('abstract'),
            'lead_paragraph': article.get('lead_paragraph'),
            'keywords': [keyword['value'] for keyword in article.get('keywords', [])],
            'pub_date': article.get('pub_date')
        }
        article_details.append(details)
    return article_details

def save_articles_to_json(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f, indent=4)

# Fetch articles for the past month
current_month = datetime.now().replace(day=1)
past_month = current_month - timedelta(days=30)
data = fetch_articles(past_month)

# Extract specific details
article_details = extract_article_details(data)

# Save the extracted details to a JSON file
save_articles_to_json(article_details, 'nyt_articles_details.json')

print(f'Saved article details to nyt_articles_details.json')




Saved article details to nyt_articles_details.json


Searches for matches in each article of keywords in the title, abstract, lead_paragraph or keywords

In [84]:


def search_articles(articles, search_terms):
    matched_articles = []
    
    for article in articles:
        # Combine all searchable fields into a single string
        searchable_text = (
            (article.get('title') or '') + ' ' +
            ' '.join(article.get('keywords') or []) + ' ' +
            (article.get('abstract') or '') + ' ' +
            (article.get('lead_paragraph') or '')
        ).lower()  # Convert to lowercase for case-insensitive search

        # Check if any of the search terms are in the searchable text
        if any(term.lower() in searchable_text for term in search_terms):
            matched_articles.append(article)

    return matched_articles

def save_articles_to_json(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f, indent=4)

# Load the condensed JSON file with article details
with open('nyt_articles_details.json', 'r') as f:
    articles = json.load(f)

# Define the search terms (e.g., "economy", "COVID-19", etc.)
search_terms = ['cryptocurrency', 'bitcoin', 'ethereum', 'blockchain', 'metaverse', 'web3']

# Search for articles that contain any of the search terms
matched_articles = search_articles(articles, search_terms)

# Save the matched articles to a new JSON file
save_articles_to_json(matched_articles, 'nyt_matched_articles.json')

print(f'Found {len(matched_articles)} articles matching the search terms.')
print(f'Saved matched articles to nyt_matched_articles.json')


Found 14 articles matching the search terms.
Saved matched articles to nyt_matched_articles.json


Get sentiment analysis of each article and inputs that at bottom of each article in nyt_analyzed_articles.json

In [85]:


# Download VADER's lexicon
nltk.download('vader_lexicon')

# Initialize VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    # Get the sentiment scores
    sentiment_scores = sia.polarity_scores(text)
    compound_score = sentiment_scores['compound']
    
    # Determine sentiment based on the compound score
    if compound_score >= 0.05:
        return 'positive'
    elif compound_score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

def perform_sentiment_analysis(articles):
    for article in articles:
        # Combine the relevant text fields for sentiment analysis
        text = (
            (article.get('title') or '') + ' ' +
            (article.get('abstract') or '') + ' ' +
            (article.get('lead_paragraph') or '')
        )
        
        # Perform sentiment analysis
        sentiment = analyze_sentiment(text)
        article['sentiment'] = sentiment

    return articles

def save_articles_to_json(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f, indent=4)

# Load the matched articles JSON file
with open('nyt_matched_articles.json', 'r') as f:
    articles = json.load(f)

# Perform sentiment analysis on the matched articles
analyzed_articles = perform_sentiment_analysis(articles)

# Save the analyzed articles with sentiment to a new JSON file
save_articles_to_json(analyzed_articles, 'nyt_analyzed_articles.json')

print(f'Sentiment analysis complete. Results saved to nyt_analyzed_articles.json')


Sentiment analysis complete. Results saved to nyt_analyzed_articles.json


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/lukesarausad/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Calculates overall sentiment analysis for all articles and if it positively, neutral or negatively impacts the price of given crypto

In [86]:

# Function to simulate impact based on sentiment analysis
def predict_crypto_impact(sentiment_data, crypto_symbol):
    sentiment_counts = {'positive': 0, 'negative': 0, 'neutral': 0}
    
    for article in sentiment_data:
        sentiment_counts[article['sentiment']] += 1

    total_articles = len(sentiment_data)
    
    if total_articles == 0:
        return f"No relevant articles found for {crypto_symbol}."

    positive_ratio = sentiment_counts['positive'] / total_articles
    negative_ratio = sentiment_counts['negative'] / total_articles

    # Simple prediction logic based on sentiment ratio
    if positive_ratio > negative_ratio:
        prediction = f"The sentiment suggests a potential positive impact on {crypto_symbol}."
    elif negative_ratio > positive_ratio:
        prediction = f"The sentiment suggests a potential negative impact on {crypto_symbol}."
    else:
        prediction = f"The sentiment suggests a neutral impact on {crypto_symbol}."

    return prediction

def load_analyzed_articles(filename):
    with open(filename, 'r') as f:
        return json.load(f)

# User input for cryptocurrency symbol (e.g., BTC, ETH)
crypto_symbol = input("Enter the cryptocurrency symbol (e.g., BTC, ETH): ").upper()

# Load the analyzed articles with sentiment data
sentiment_data = load_analyzed_articles('nyt_analyzed_articles.json')

# Predict impact on the chosen cryptocurrency
prediction = predict_crypto_impact(sentiment_data, crypto_symbol)

print(prediction)


The sentiment suggests a potential positive impact on ETH.


In [87]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import json

# Function to fetch historical data using yfinance
def fetch_historical_data(crypto_symbol, period='1mo'):
    # yfinance requires the symbol to have '-USD' at the end for cryptocurrencies
    ticker = f"{crypto_symbol}-USD"
    df = yf.download(ticker, period=period, interval="1d")
    
    if df.empty:
        print(f"Error: No data found for {crypto_symbol}.")
        return pd.DataFrame()
    
    df['price'] = df['Close']  # Use the Close price as the 'price'
    df = df[['price']]  # We only need the price column
    df.reset_index(inplace=True)
    df['date'] = pd.to_datetime(df['Date']).dt.date  # Extract the date only
    df.set_index('date', inplace=True)
    
    return df

# Load sentiment data from JSON file
with open('nyt_analyzed_articles.json', 'r') as f:
    sentiment_data = json.load(f)

# Aggregate sentiment scores by date
sentiment_scores = []
for article in sentiment_data:
    if 'pub_date' not in article:
        continue  # Skip the article if 'pub_date' is missing

    date = pd.to_datetime(article['pub_date']).date()
    score = 1 if article['sentiment'] == 'positive' else -1 if article['sentiment'] == 'negative' else 0
    sentiment_scores.append({'date': date, 'sentiment_score': score})

sentiment_df = pd.DataFrame(sentiment_scores)
sentiment_df = sentiment_df.groupby('date').mean().reset_index()

# Ensure the 'date' column in sentiment_df is datetime type
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])

# User input for cryptocurrency symbol (e.g., BTC, ETH)
crypto_symbol = input("Enter the cryptocurrency symbol (e.g., BTC, ETH): ").upper()

# Fetch historical price data for the chosen cryptocurrency using yfinance
price_df = fetch_historical_data(crypto_symbol, period='1mo')

# Ensure the index of price_df is datetime and reset the index to keep date as a column
if not price_df.empty:
    price_df.index = pd.to_datetime(price_df.index)

    # Merge price data with sentiment scores
    data_df = price_df.merge(sentiment_df, how='left', left_index=True, right_on='date').fillna(0)

    # Prepare features (X) and target (y)
    X = data_df[['sentiment_score']]
    y = data_df['price']

    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Train Linear Regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict future price based on recent sentiment
    recent_sentiment_score = X_test.iloc[-1]['sentiment_score']
    predicted_price = model.predict([[recent_sentiment_score]])

    # Get the corresponding date for the prediction
    prediction_date = X_test.index[-1]  # This corresponds to the last date in X_test

    print(f"Predicted price for {crypto_symbol} on {prediction_date} based on recent sentiment: ${predicted_price[0]:.2f}")
else:
    print("No data available.")


[*********************100%%**********************]  1 of 1 completed

Predicted price for ETH on nan based on recent sentiment: $3031.24



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['Date']).dt.date  # Extract the date only


In [88]:
with open('nyt_analyzed_articles.json', 'r') as f:
    sentiment_data = json.load(f)

# Simulate sentiment scoring (assuming one score per day)
sentiment_scores = []
for article in sentiment_data:
    # Check if 'pub_date' exists
    if 'pub_date' not in article:
        continue  # Skip the article if 'pub_date' is missing

    # Convert the publication date
    date = pd.to_datetime(article['pub_date']).date()
    score = 1 if article['sentiment'] == 'positive' else -1 if article['sentiment'] == 'negative' else 0
    sentiment_scores.append({'date': date, 'sentiment_score': score})

sentiment_df = pd.DataFrame(sentiment_scores)
sentiment_df = sentiment_df.groupby('date').mean().reset_index()

# Now you can proceed with the rest of the script
print(sentiment_df.head())

         date  sentiment_score
0  2024-01-09              1.0
1  2024-01-10              1.0
2  2024-01-11              1.0
3  2024-01-19              1.0
4  2024-01-23              1.0
