In [3]:
import yfinance as yf
import talib as ta
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from textblob import TextBlob

In [15]:
df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\yfinance_data\GOOG_historical_data.csv")

In [16]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
0,2004-08-19,2.490664,2.591785,2.390042,2.499133,2.496292,897427216,0.0,0.0
1,2004-08-20,2.51582,2.716817,2.503118,2.697639,2.694573,458857488,0.0,0.0
2,2004-08-23,2.758411,2.826406,2.71607,2.724787,2.72169,366857939,0.0,0.0
3,2004-08-24,2.770615,2.779581,2.579581,2.61196,2.608991,306396159,0.0,0.0
4,2004-08-25,2.614201,2.689918,2.587302,2.640104,2.637103,184645512,0.0,0.0


In [11]:
df.shape

(5020, 9)

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5020 entries, 0 to 5019
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          5020 non-null   object 
 1   Open          5020 non-null   float64
 2   High          5020 non-null   float64
 3   Low           5020 non-null   float64
 4   Close         5020 non-null   float64
 5   Adj Close     5020 non-null   float64
 6   Volume        5020 non-null   int64  
 7   Dividends     5020 non-null   float64
 8   Stock Splits  5020 non-null   float64
dtypes: float64(7), int64(1), object(1)
memory usage: 353.1+ KB


In [13]:
# convert the date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [14]:
df.describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
count,5020,5020.0,5020.0,5020.0,5020.0,5020.0,5020.0,5020.0,5020.0
mean,2014-08-07 16:36:31.553784832,45.251315,45.728793,44.800587,45.274515,45.224493,115753900.0,4e-05,0.004583
min,2004-08-19 00:00:00,2.47049,2.534002,2.390042,2.490913,2.488082,158434.0,0.0,0.0
25%,2009-08-12 18:00:00,13.034705,13.145415,12.881592,13.03209,13.017277,27339500.0,0.0,0.0
50%,2014-08-07 12:00:00,26.98221,27.194902,26.791387,27.005927,26.97523,55854800.0,0.0,0.0
75%,2019-08-02 18:00:00,60.215126,60.758375,59.811811,60.280252,60.211734,142533300.0,0.0,0.0
max,2024-07-30 00:00:00,191.75,193.309998,190.619995,192.660004,192.660004,1650833000.0,0.2,20.0
std,,43.338635,43.818298,42.918167,43.377299,43.332534,149793600.0,0.002823,0.284034


In [15]:
class FinancialAnalyzer:
    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date

    def retrieve_stock_data(self):
        return yf.download(self.ticker, start=self.start_date, end=self.end_date)

    def calculate_moving_average(self, data, window_size):
        return ta.SMA(data, timeperiod=window_size)

    def calculate_technical_indicators(self, data):
        # Calculate various technical indicators
        data['SMA'] = self.calculate_moving_average(data['Close'], 20)
        data['RSI'] = ta.RSI(data['Close'], timeperiod=14)
        data['EMA'] = ta.EMA(data['Close'], timeperiod=20)
        macd, macd_signal, _ = ta.MACD(data['Close'])
        data['MACD'] = macd
        data['MACD_Signal'] = macd_signal
        # Add more indicators as needed
        return data

    def plot_stock_data(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'SMA'], title='Stock Price with Moving Average')
        fig.show()

    def plot_rsi(self, data):
        fig = px.line(data, x=data.index, y='RSI', title='Relative Strength Index (RSI)')
        fig.show()

    def plot_ema(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'EMA'], title='Stock Price with Exponential Moving Average')
        fig.show()

    def plot_macd(self, data):
        fig = px.line(data, x=data.index, y=['MACD', 'MACD_Signal'], title='Moving Average Convergence Divergence (MACD)')
        fig.show()


In [17]:
goog = yf.Ticker("GOOG")
hist = goog.history(period="max")

In [18]:
fig = make_subplots(rows=1, cols=4, subplot_titles=['Close', 'Open', 'High', 'Low'])

fig.add_trace(go.Scatter(x=hist.index, y=hist['Close']), row=1, col=1)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Open']), row=1, col=2)
fig.add_trace(go.Scatter(x=hist.index, y=hist['High']), row=1, col=3)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Low']), row=1, col=4)

fig.update_layout(height=400, width=1200, title_text='Apple Stock Analysis')
fig.show()

Correlation

In [4]:
raw_df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\raw_analyst_ratings.csv")

In [5]:
# convert the date column to datetime
raw_df['date'] = pd.to_datetime(raw_df['date'], format='ISO8601')

In [22]:
# Define the ticker and the time period
ticker = 'GOOG'
start_date = '2002-08-19'
end_date = '2024-07-30'
# Fetch the stock data
goog = yf.download(ticker, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


In [23]:
goog.columns = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

goog.columns

Index(['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [24]:
# Create a DataFrame with dates from the GOOG data
date = raw_df.index
text_data = pd.DataFrame(date, columns=['date'])

In [27]:
# Generate random headlines
headline_samples = [
    "AAPL hits record high",
    "Concerns over AAPL's future growth",
    "AAPL to unveil new product next month",
    "AAPL reports earnings that exceed forecasts",
    "Market downturn affects AAPL",
    "AAPL invests in renewable energy",
    "New AAPL CEO announced",
    "AAPL faces regulatory scrutiny",
    "AAPL rumored to acquire a tech startup",
    "AAPL's market share grows"
]

# Assign a random headline to each date
text_data['Headline'] = np.random.choice(headline_samples, size=len(text_data))

# Display the first few rows of the updated DataFrame
text_data.head()

Unnamed: 0,date,Headline
0,0,AAPL rumored to acquire a tech startup
1,1,AAPL hits record high
2,2,New AAPL CEO announced
3,3,AAPL hits record high
4,4,AAPL invests in renewable energy


In [28]:
def calculate_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Applying the sentiment analysis function to your headlines
text_data['Sentiment'] = text_data['Headline'].apply(calculate_sentiment)

In [12]:
text_data.head()

Unnamed: 0,date,Headline,Sentiment
0,0,AAPL faces regulatory scrutiny,0.0
1,1,AAPL reports earnings that exceed forecasts,0.0
2,2,AAPL's market share grows,0.0
3,3,AAPL invests in renewable energy,0.0
4,4,Market downturn affects AAPL,0.0


In [29]:
# Aligning sentiment scores with the stock closing prices on the same date
combined_data = text_data.set_index('date').join(goog['Close'])
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,AAPL rumored to acquire a tech startup,0.0,
1,AAPL hits record high,0.16,
2,New AAPL CEO announced,0.136364,
3,AAPL hits record high,0.16,
4,AAPL invests in renewable energy,0.0,


In [30]:
combined_data['Daily Returns'] = combined_data['Close'].pct_change()

  combined_data['Daily Returns'] = combined_data['Close'].pct_change()


In [32]:
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close,Daily Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,AAPL rumored to acquire a tech startup,0.0,,
1,AAPL hits record high,0.16,,
2,New AAPL CEO announced,0.136364,,
3,AAPL hits record high,0.16,,
4,AAPL invests in renewable energy,0.0,,


In [33]:
correlation = combined_data['Sentiment'].corr(combined_data['Daily Returns'])
print("Correlation between sentiment and daily stock returns:", correlation)

Correlation between sentiment and daily stock returns: nan


In [34]:
correlation = combined_data['Sentiment'].corr(combined_data['Close'])
print("Correlation between sentiment and closing:", correlation)

Correlation between sentiment and closing: nan
