In [18]:
import yfinance as yf
import talib as ta
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from textblob import TextBlob

In [3]:
df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\yfinance_data\AMZN_historical_data.csv")

In [18]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
0,1997-05-15,0.121875,0.125,0.096354,0.097917,0.097917,1443120000,0.0,0.0
1,1997-05-16,0.098438,0.098958,0.085417,0.086458,0.086458,294000000,0.0,0.0
2,1997-05-19,0.088021,0.088542,0.08125,0.085417,0.085417,122136000,0.0,0.0
3,1997-05-20,0.086458,0.0875,0.081771,0.081771,0.081771,109344000,0.0,0.0
4,1997-05-21,0.081771,0.082292,0.06875,0.071354,0.071354,377064000,0.0,0.0


In [19]:
df.shape

(6846, 9)

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6846 entries, 0 to 6845
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          6846 non-null   object 
 1   Open          6846 non-null   float64
 2   High          6846 non-null   float64
 3   Low           6846 non-null   float64
 4   Close         6846 non-null   float64
 5   Adj Close     6846 non-null   float64
 6   Volume        6846 non-null   int64  
 7   Dividends     6846 non-null   float64
 8   Stock Splits  6846 non-null   float64
dtypes: float64(7), int64(1), object(1)
memory usage: 481.5+ KB


In [21]:
# convert the date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [22]:
df.describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
count,6846,6846.0,6846.0,6846.0,6846.0,6846.0,6846.0,6846.0,6846.0
mean,2010-12-21 09:15:30.762489088,37.360464,37.801009,36.882561,37.351096,37.351096,138071300.0,0.0,0.003944
min,1997-05-15 00:00:00,0.070313,0.072396,0.065625,0.069792,0.069792,9744000.0,0.0,0.0
25%,2004-03-05 18:00:00,2.070125,2.120625,2.040156,2.07675,2.07675,65054420.0,0.0,0.0
50%,2010-12-20 12:00:00,8.5745,8.66675,8.4665,8.5915,8.5915,102065000.0,0.0,0.0
75%,2017-10-08 06:00:00,50.453876,50.723249,50.059875,50.4265,50.4265,156756000.0,0.0,0.0
max,2024-07-30 00:00:00,200.089996,201.199997,199.050003,200.0,200.0,2086584000.0,0.0,20.0
std,,53.756955,54.366449,53.087051,53.729913,53.729913,138234600.0,0.0,0.246789


In [23]:
class FinancialAnalyzer:
    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date

    def retrieve_stock_data(self):
        return yf.download(self.ticker, start=self.start_date, end=self.end_date)

    def calculate_moving_average(self, data, window_size):
        return ta.SMA(data, timeperiod=window_size)

    def calculate_technical_indicators(self, data):
        # Calculate various technical indicators
        data['SMA'] = self.calculate_moving_average(data['Close'], 20)
        data['RSI'] = ta.RSI(data['Close'], timeperiod=14)
        data['EMA'] = ta.EMA(data['Close'], timeperiod=20)
        macd, macd_signal, _ = ta.MACD(data['Close'])
        data['MACD'] = macd
        data['MACD_Signal'] = macd_signal
        # Add more indicators as needed
        return data

    def plot_stock_data(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'SMA'], title='Stock Price with Moving Average')
        fig.show()

    def plot_rsi(self, data):
        fig = px.line(data, x=data.index, y='RSI', title='Relative Strength Index (RSI)')
        fig.show()

    def plot_ema(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'EMA'], title='Stock Price with Exponential Moving Average')
        fig.show()

    def plot_macd(self, data):
        fig = px.line(data, x=data.index, y=['MACD', 'MACD_Signal'], title='Moving Average Convergence Divergence (MACD)')
        fig.show()


In [28]:
amzn = yf.Ticker("AMZN")
hist = amzn.history(period="max")

In [29]:
fig = make_subplots(rows=1, cols=4, subplot_titles=['Close', 'Open', 'High', 'Low'])

fig.add_trace(go.Scatter(x=hist.index, y=hist['Close']), row=1, col=1)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Open']), row=1, col=2)
fig.add_trace(go.Scatter(x=hist.index, y=hist['High']), row=1, col=3)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Low']), row=1, col=4)

fig.update_layout(height=400, width=1200, title_text='Amazon Stock Analysis')
fig.show()

Correlation

In [4]:
raw_df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\raw_analyst_ratings.csv")

In [5]:
raw_df.head()

Unnamed: 0.1,Unnamed: 0,headline,url,publisher,date,stock
0,0,Stocks That Hit 52-Week Highs On Friday,https://www.benzinga.com/news/20/06/16190091/s...,Benzinga Insights,2020-06-05 10:30:54-04:00,A
1,1,Stocks That Hit 52-Week Highs On Wednesday,https://www.benzinga.com/news/20/06/16170189/s...,Benzinga Insights,2020-06-03 10:45:20-04:00,A
2,2,71 Biggest Movers From Friday,https://www.benzinga.com/news/20/05/16103463/7...,Lisa Levin,2020-05-26 04:30:07-04:00,A
3,3,46 Stocks Moving In Friday's Mid-Day Session,https://www.benzinga.com/news/20/05/16095921/4...,Lisa Levin,2020-05-22 12:45:06-04:00,A
4,4,B of A Securities Maintains Neutral on Agilent...,https://www.benzinga.com/news/20/05/16095304/b...,Vick Meyer,2020-05-22 11:38:59-04:00,A


In [10]:
# convert the date column to datetime
raw_df['date'] = pd.to_datetime(raw_df['date'], format='ISO8601')

In [11]:
# Define the ticker and the time period
ticker = 'AMZN'
start_date = '1997-05-15'
end_date = '2024-07-30'
# Fetch the stock data
amzn = yf.download(ticker, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


In [13]:
amzn.columns = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

amzn.columns

Index(['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [15]:
# Create a DataFrame with dates from the AMZN data
date = raw_df.index
text_data = pd.DataFrame(date, columns=['date'])

In [16]:
# Generate random headlines
headline_samples = [
    "AAPL hits record high",
    "Concerns over AAPL's future growth",
    "AAPL to unveil new product next month",
    "AAPL reports earnings that exceed forecasts",
    "Market downturn affects AAPL",
    "AAPL invests in renewable energy",
    "New AAPL CEO announced",
    "AAPL faces regulatory scrutiny",
    "AAPL rumored to acquire a tech startup",
    "AAPL's market share grows"
]

# Assign a random headline to each date
text_data['Headline'] = np.random.choice(headline_samples, size=len(text_data))

# Display the first few rows of the updated DataFrame
text_data.head()

Unnamed: 0,date,Headline
0,0,Market downturn affects AAPL
1,1,AAPL reports earnings that exceed forecasts
2,2,Market downturn affects AAPL
3,3,New AAPL CEO announced
4,4,AAPL reports earnings that exceed forecasts


In [19]:
def calculate_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Applying the sentiment analysis function to your headlines
text_data['Sentiment'] = text_data['Headline'].apply(calculate_sentiment)

In [20]:
text_data.head()

Unnamed: 0,date,Headline,Sentiment
0,0,Market downturn affects AAPL,0.0
1,1,AAPL reports earnings that exceed forecasts,0.0
2,2,Market downturn affects AAPL,0.0
3,3,New AAPL CEO announced,0.136364
4,4,AAPL reports earnings that exceed forecasts,0.0


In [21]:
# Aligning sentiment scores with the stock closing prices on the same date
combined_data = text_data.set_index('date').join(amzn['Close'])
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Market downturn affects AAPL,0.0,
1,AAPL reports earnings that exceed forecasts,0.0,
2,Market downturn affects AAPL,0.0,
3,New AAPL CEO announced,0.136364,
4,AAPL reports earnings that exceed forecasts,0.0,


In [22]:
combined_data['Daily Returns'] = combined_data['Close'].pct_change()

  combined_data['Daily Returns'] = combined_data['Close'].pct_change()


In [23]:
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close,Daily Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Market downturn affects AAPL,0.0,,
1,AAPL reports earnings that exceed forecasts,0.0,,
2,Market downturn affects AAPL,0.0,,
3,New AAPL CEO announced,0.136364,,
4,AAPL reports earnings that exceed forecasts,0.0,,


In [24]:
correlation = combined_data['Sentiment'].corr(combined_data['Daily Returns'])
print("Correlation between sentiment and daily stock returns:", correlation)

Correlation between sentiment and daily stock returns: nan


In [25]:
correlation = combined_data['Sentiment'].corr(combined_data['Close'])
print("Correlation between sentiment and closing:", correlation)c

Correlation between sentiment and closing: nan
