In [2]:
import pandas as pd
import numpy as np
import yfinance as yf
import json
import ta


# One time stock data collection
# Define the stock tickers for each sector

tech_stocks = ['AAPL', 'MSFT', 'GOOGL', 'IBM']  # Apple, Microsoft, Alphabet, IBM
finance_stocks = ['JPM', 'BAC', 'WFC', 'GS']  # JPMorgan Chase, Bank of America, Wells Fargo, Goldman Sachs
industrial_stocks = ['MMM', 'GE', 'HON', 'CAT']  # 3M, General Electric, Honeywell, Caterpillar
fmcg_stocks = ['KO', 'PEP', 'PG', 'CL']  # Coca-Cola, PepsiCo, Procter & Gamble, Colgate-Palmolive

# Combine all stock tickers into one list
all_stocks = tech_stocks + finance_stocks + industrial_stocks + fmcg_stocks

# Define the data period
start_date = '2015-01-01'
end_date = '2023-03-31'

# Download the data
data = yf.download(all_stocks, start=start_date, end=end_date)['Adj Close']

# Save the data to a CSV file
data.to_csv('stocks_data.csv')

# Display the data for verification
print(data.head())

In [5]:
# Initialize a dictionary to store all indicators
all_indicators = {}

# Calculate indicators for each stock
for stock in all_stocks:
    stock_data = pd.DataFrame(data[stock])
    stock_data['SMA'] = ta.trend.SMAIndicator(stock_data[stock], window=14).sma_indicator()
    stock_data['EMA'] = ta.trend.EMAIndicator(stock_data[stock], window=14).ema_indicator()
    stock_data['RSI'] = ta.momentum.RSIIndicator(stock_data[stock], window=14).rsi()
    stock_data['MACD'] = ta.trend.MACD(stock_data[stock]).macd()
    stock_data['MACD_Signal'] = ta.trend.MACD(stock_data[stock]).macd_signal()
    stock_data['MACD_Diff'] = ta.trend.MACD(stock_data[stock]).macd_diff()
    stock_data['OBV'] = ta.volume.OnBalanceVolumeIndicator(stock_data[stock], volume=None).on_balance_volume()
    stock_data['ATR'] = ta.volatility.AverageTrueRange(stock_data[stock], high=None, low=None).average_true_range()
    stock_data['Bollinger_High'] = ta.volatility.BollingerBands(stock_data[stock]).bollinger_hband()
    stock_data['Bollinger_Low'] = ta.volatility.BollingerBands(stock_data[stock]).bollinger_lband()
    stock_data['ADX'] = ta.trend.ADXIndicator(stock_data[stock], high=None, low=None).adx()
    stock_data['CCI'] = ta.trend.CCIIndicator(stock_data[stock], high=None, low=None).cci()
    stock_data['Williams %R'] = ta.momentum.WilliamsRIndicator(stock_data[stock], high=None, low=None).williams_r()
    all_indicators[stock] = stock_data

# Combine all indicators into a single DataFrame
combined_indicators = pd.concat(all_indicators, axis=1)

# Display the combined DataFrame
print(combined_indicators)

[*********************100%%**********************]  16 of 16 completed


Ticker           AAPL        BAC        CAT         CL         GE      GOOGL  \
Date                                                                           
2015-01-02  24.402172  14.708727  70.907631  55.241547  99.899513  26.447147   
2015-01-05  23.714722  14.281434  67.164665  54.833481  98.065781  25.943224   
2015-01-06  23.716955  13.854141  66.732491  54.257374  95.952965  25.302961   
2015-01-07  24.049520  13.919881  67.766609  54.689468  95.992828  25.228544   
2015-01-08  24.973562  14.207482  68.461166  55.433590  97.148903  25.316446   

Ticker              GS        HON         IBM        JPM         KO  \
Date                                                                  
2015-01-02  161.304260  78.936691  103.354813  48.268318  31.106474   
2015-01-05  156.267914  77.432449  101.728539  46.769829  31.106474   
2015-01-06  153.106750  77.251305   99.534676  45.557137  31.342688   
2015-01-07  155.388474  77.810463   98.884148  45.626644  31.733925   
2015-01-08  1

In [6]:
data

Ticker,AAPL,BAC,CAT,CL,GE,GOOGL,GS,HON,IBM,JPM,KO,MMM,MSFT,PEP,PG,WFC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2015-01-02,24.402172,14.708727,70.907631,55.241547,99.899513,26.447147,161.304260,78.936691,103.354813,48.268318,31.106474,92.933609,40.378063,71.740623,69.090263,41.823158
2015-01-05,23.714722,14.281434,67.164665,54.833481,98.065781,25.943224,156.267914,77.432449,101.728539,46.769829,31.106474,90.837677,40.006760,71.201271,68.761765,40.676266
2015-01-06,23.716955,13.854141,66.732491,54.257374,95.952965,25.302961,153.106750,77.251305,99.534676,45.557137,31.342688,89.869019,39.419571,70.661919,68.448524,39.827579
2015-01-07,24.049520,13.919881,67.766609,54.689468,95.992828,25.228544,155.388474,77.810463,98.884148,45.626644,31.733925,90.520462,39.920399,72.728157,68.807587,40.064594
2015-01-08,24.973562,14.207482,68.461166,55.433590,97.148903,25.316446,157.869247,79.228073,101.033401,46.646252,32.117771,92.690025,41.094803,74.049934,69.594429,40.951527
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-24,159.183258,26.146852,211.912888,71.179588,72.601448,105.319130,301.167419,184.058487,119.736397,120.737251,58.563774,77.969261,278.286041,172.580551,142.163818,35.108025
2023-03-27,157.226379,27.447451,212.879639,71.887321,74.142944,102.342545,306.910004,183.892181,123.578194,124.197647,58.996506,78.239082,274.130157,172.966003,141.417709,36.309624
2023-03-28,156.600555,27.090990,214.988892,72.226631,74.007858,100.914177,306.823273,183.823700,123.606880,124.574623,59.063824,78.431816,272.989532,172.908173,141.814987,36.028606
2023-03-29,159.699799,27.620863,217.762192,72.197556,74.738892,101.273766,309.020111,184.948654,123.960487,124.825920,59.486946,79.549622,278.226532,174.103119,142.251007,36.794144


In [7]:
data.head(2)

Ticker,AAPL,BAC,CAT,CL,GE,GOOGL,GS,HON,IBM,JPM,KO,MMM,MSFT,PEP,PG,WFC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2015-01-02,24.402172,14.708727,70.907631,55.241547,99.899513,26.447147,161.30426,78.936691,103.354813,48.268318,31.106474,92.933609,40.378063,71.740623,69.090263,41.823158
2015-01-05,23.714722,14.281434,67.164665,54.833481,98.065781,25.943224,156.267914,77.432449,101.728539,46.769829,31.106474,90.837677,40.00676,71.201271,68.761765,40.676266


In [14]:
import requests

api_key = '28838b5ff11a41a6b92520fce55b544a'


def get_news_sentiment(api_key, query, from_date, to_date):
    url = f"https://newsapi.org/v2/everything?q={query}&from={from_date}&to={to_date}&apiKey={api_key}"
    response = requests.get(url)
    articles = response.json().get('articles', [])
    
    return [article['title'] for article in articles]

news_sentiment_data = get_news_sentiment(api_key, 'AAPL', '2023-01-01', '2023-12-31')

In [19]:
from newsapi import NewsApiClient
api = NewsApiClient(api_key='28838b5ff11a41a6b92520fce55b544a')
text = api.get_everything(q='AAPL')



In [21]:
import json
with open('AAPL_news.json', 'w') as fp:
    json.dump(text, fp)

In [1]:
with open('AAPL_news.json', 'r') as file:
    news = json.load(file)


news

NameError: name 'json' is not defined

In [16]:
import requests

api_key = '28838b5ff11a41a6b92520fce55b544a'

def get_news_sentiment(api_key, query, from_date, to_date):
    url = f"https://newsapi.org/v2/everything?q={query}&from={from_date}&to={to_date}&apiKey={api_key}"
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for 4xx or 5xx status codes
        
        articles = response.json().get('articles', [])
        return [article['title'] for article in articles]
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching news data: {e}")
        return []

# Example usage
news_sentiment_data = get_news_sentiment(api_key, 'AAPL', '2023-01-01', '2023-12-31')
print(news_sentiment_data)

Error fetching news data: 426 Client Error: Upgrade Required for url: https://newsapi.org/v2/everything?q=AAPL&from=2023-01-01&to=2023-12-31&apiKey=28838b5ff11a41a6b92520fce55b544a
[]


In [15]:
news_sentiment_data

[]

In [9]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re

nltk.download('stopwords')
nltk.download('punkt')

def clean_text(text):
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^A-Za-z0-9 ]+', '', text)
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word.lower() not in stopwords.words('english')]
    return ' '.join(tokens)

cleaned_news_sentiment_data = [clean_text(article) for article in news_sentiment_data]

ModuleNotFoundError: No module named 'nltk'

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def get_sentiment_scores(text_list):
    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = []
    for text in text_list:
        score = analyzer.polarity_scores(text)
        sentiment_scores.append(score['compound'])
    return sentiment_scores

news_sentiment_scores = get_sentiment_scores(cleaned_news_sentiment_data)

In [None]:

from datetime import datetime

def aggregate_sentiment_by_date(dates, scores):
    df = pd.DataFrame({'date': dates, 'score': scores})
    df['date'] = pd.to_datetime(df['date']).dt.date
    aggregated_sentiment = df.groupby('date').mean()
    return aggregated_sentiment

# Example dates for the sentiment data (should match the length of news_sentiment_scores)
sentiment_dates = ['2023-01-01', '2023-01-01', '2023-01-02', ...]  # Replace with actual dates
aggregated_sentiment = aggregate_sentiment_by_date(sentiment_dates, news_sentiment_scores)

In [None]:
# Load your stock data
stock_data = yf.download(tickers, start='2015-01-01', end='2023-12-31')['Adj Close']

# Convert stock data to a DataFrame and reset the index to use dates as a column
stock_df = stock_data.reset_index()

# Merge stock data with sentiment data on the date column
merged_data = stock_df.merge(aggregated_sentiment, how='left', left_on='Date', right_index=True)