In [13]:
import yfinance as yf
import talib as ta
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from textblob import TextBlob

In [14]:
df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\yfinance_data\AAPL_historical_data.csv")

In [15]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
0,1980-12-12,0.128348,0.128906,0.128348,0.128348,0.098943,469033600,0.0,0.0
1,1980-12-15,0.12221,0.12221,0.121652,0.121652,0.093781,175884800,0.0,0.0
2,1980-12-16,0.113281,0.113281,0.112723,0.112723,0.086898,105728000,0.0,0.0
3,1980-12-17,0.115513,0.116071,0.115513,0.115513,0.089049,86441600,0.0,0.0
4,1980-12-18,0.118862,0.11942,0.118862,0.118862,0.09163,73449600,0.0,0.0


In [16]:
df.shape

(10998, 9)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10998 entries, 0 to 10997
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          10998 non-null  object 
 1   Open          10998 non-null  float64
 2   High          10998 non-null  float64
 3   Low           10998 non-null  float64
 4   Close         10998 non-null  float64
 5   Adj Close     10998 non-null  float64
 6   Volume        10998 non-null  int64  
 7   Dividends     10998 non-null  float64
 8   Stock Splits  10998 non-null  float64
dtypes: float64(7), int64(1), object(1)
memory usage: 773.4+ KB


In [18]:
# convert the date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [19]:
df.describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
count,10998,10998.0,10998.0,10998.0,10998.0,10998.0,10998.0,10998.0,10998.0
mean,2002-09-26 14:14:28.085106304,22.29916,22.539055,22.068367,22.312754,21.494496,318080600.0,0.000752,0.001546
min,1980-12-12 00:00:00,0.049665,0.049665,0.049107,0.049107,0.037857,0.0,0.0,0.0
25%,1991-10-28 06:00:00,0.299107,0.30371,0.290179,0.298549,0.24184,113254500.0,0.0,0.0
50%,2002-09-23 12:00:00,0.53125,0.537946,0.522321,0.53125,0.432079,205741200.0,0.0,0.0
75%,2013-08-25 06:00:00,20.204196,20.389554,19.998036,20.19616,17.253029,398527500.0,0.0,0.0
max,2024-07-30 00:00:00,236.479996,237.229996,233.089996,234.820007,234.548523,7421641000.0,0.25,7.0
std,,46.003685,46.496031,45.540929,46.03766,45.561916,335452300.0,0.011753,0.083663


In [20]:
class FinancialAnalyzer:
    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date

    def retrieve_stock_data(self):
        return yf.download(self.ticker, start=self.start_date, end=self.end_date)

    def calculate_moving_average(self, data, window_size):
        return ta.SMA(data, timeperiod=window_size)

    def calculate_technical_indicators(self, data):
        # Calculate various technical indicators
        data['SMA'] = self.calculate_moving_average(data['Close'], 20)
        data['RSI'] = ta.RSI(data['Close'], timeperiod=14)
        data['EMA'] = ta.EMA(data['Close'], timeperiod=20)
        macd, macd_signal, _ = ta.MACD(data['Close'])
        data['MACD'] = macd
        data['MACD_Signal'] = macd_signal
        # Add more indicators as needed
        return data

    def plot_stock_data(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'SMA'], title='Stock Price with Moving Average')
        fig.show()

    def plot_rsi(self, data):
        fig = px.line(data, x=data.index, y='RSI', title='Relative Strength Index (RSI)')
        fig.show()

    def plot_ema(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'EMA'], title='Stock Price with Exponential Moving Average')
        fig.show()

    def plot_macd(self, data):
        fig = px.line(data, x=data.index, y=['MACD', 'MACD_Signal'], title='Moving Average Convergence Divergence (MACD)')
        fig.show()


In [21]:
aapl = yf.Ticker("AAPL")
hist = aapl.history(period="max")

In [22]:
fig = make_subplots(rows=1, cols=4, subplot_titles=['Close', 'Open', 'High', 'Low'])

fig.add_trace(go.Scatter(x=hist.index, y=hist['Close']), row=1, col=1)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Open']), row=1, col=2)
fig.add_trace(go.Scatter(x=hist.index, y=hist['High']), row=1, col=3)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Low']), row=1, col=4)

fig.update_layout(height=400, width=1200, title_text='Apple Stock Analysis')
fig.show()

## Correlation

In [12]:
raw_df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\raw_analyst_ratings.csv")

In [23]:
raw_df.head()

Unnamed: 0.1,Unnamed: 0,headline,url,publisher,date,stock
0,0,Stocks That Hit 52-Week Highs On Friday,https://www.benzinga.com/news/20/06/16190091/s...,Benzinga Insights,2020-06-05 10:30:54-04:00,A
1,1,Stocks That Hit 52-Week Highs On Wednesday,https://www.benzinga.com/news/20/06/16170189/s...,Benzinga Insights,2020-06-03 10:45:20-04:00,A
2,2,71 Biggest Movers From Friday,https://www.benzinga.com/news/20/05/16103463/7...,Lisa Levin,2020-05-26 04:30:07-04:00,A
3,3,46 Stocks Moving In Friday's Mid-Day Session,https://www.benzinga.com/news/20/05/16095921/4...,Lisa Levin,2020-05-22 12:45:06-04:00,A
4,4,B of A Securities Maintains Neutral on Agilent...,https://www.benzinga.com/news/20/05/16095304/b...,Vick Meyer,2020-05-22 11:38:59-04:00,A


In [26]:
# convert the date column to datetime
raw_df['date'] = pd.to_datetime(raw_df['date'], format='ISO8601')

In [27]:
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1407328 entries, 0 to 1407327
Data columns (total 6 columns):
 #   Column      Non-Null Count    Dtype                    
---  ------      --------------    -----                    
 0   Unnamed: 0  1407328 non-null  int64                    
 1   headline    1407328 non-null  object                   
 2   url         1407328 non-null  object                   
 3   publisher   1407328 non-null  object                   
 4   date        1407328 non-null  datetime64[ns, UTC-04:00]
 5   stock       1407328 non-null  object                   
dtypes: datetime64[ns, UTC-04:00](1), int64(1), object(4)
memory usage: 64.4+ MB


In [31]:
raw_df['stock'].unique()

array(['A', 'AA', 'AAC', ..., 'ZU', 'ZUMZ', 'ZX'],
      shape=(6204,), dtype=object)

In [39]:
# Define the ticker and the time period
ticker = 'AAPL'
start_date = '1980-12-12'
end_date = '2024-07-30'
# Fetch the stock data
aapl_data = yf.download(ticker, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


In [40]:
aapl_data.columns = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

aapl_data.columns

Index(['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [41]:
aapl_data

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-12-12,0.098834,0.128348,0.128906,0.128348,0.128348,469033600
1980-12-15,0.093678,0.121652,0.122210,0.121652,0.122210,175884800
1980-12-16,0.086802,0.112723,0.113281,0.112723,0.113281,105728000
1980-12-17,0.088951,0.115513,0.116071,0.115513,0.115513,86441600
1980-12-18,0.091530,0.118862,0.119420,0.118862,0.118862,73449600
...,...,...,...,...,...,...
2024-07-23,224.502853,225.009995,226.940002,222.679993,224.369995,39960300
2024-07-24,218.047424,218.539993,224.800003,217.130005,224.000000,61777600
2024-07-25,216.999817,217.490005,220.850006,214.619995,218.929993,51391200
2024-07-26,217.468750,217.960007,219.490005,216.009995,218.699997,41601300


In [None]:
# Create a DataFrame with dates from the AAPL data
date = raw_df.index
text_data = pd.DataFrame(date, columns=['date'])

In [42]:
# Generate random headlines
headline_samples = [
    "AAPL hits record high",
    "Concerns over AAPL's future growth",
    "AAPL to unveil new product next month",
    "AAPL reports earnings that exceed forecasts",
    "Market downturn affects AAPL",
    "AAPL invests in renewable energy",
    "New AAPL CEO announced",
    "AAPL faces regulatory scrutiny",
    "AAPL rumored to acquire a tech startup",
    "AAPL's market share grows"
]

# Assign a random headline to each date
text_data['Headline'] = np.random.choice(headline_samples, size=len(text_data))

# Display the first few rows of the updated DataFrame
text_data.head()

Unnamed: 0,date,Headline
0,0,AAPL reports earnings that exceed forecasts
1,1,Market downturn affects AAPL
2,2,AAPL reports earnings that exceed forecasts
3,3,New AAPL CEO announced
4,4,AAPL reports earnings that exceed forecasts


In [43]:
def calculate_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Applying the sentiment analysis function to your headlines
text_data['Sentiment'] = text_data['Headline'].apply(calculate_sentiment)

In [44]:
text_data.head()

Unnamed: 0,date,Headline,Sentiment
0,0,AAPL reports earnings that exceed forecasts,0.0
1,1,Market downturn affects AAPL,0.0
2,2,AAPL reports earnings that exceed forecasts,0.0
3,3,New AAPL CEO announced,0.136364
4,4,AAPL reports earnings that exceed forecasts,0.0


In [46]:
# Aligning sentiment scores with the stock closing prices on the same date
combined_data = text_data.set_index('date').join(aapl_data['Close'])
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,AAPL reports earnings that exceed forecasts,0.0,
1,Market downturn affects AAPL,0.0,
2,AAPL reports earnings that exceed forecasts,0.0,
3,New AAPL CEO announced,0.136364,
4,AAPL reports earnings that exceed forecasts,0.0,


In [47]:
combined_data['Daily Returns'] = combined_data['Close'].pct_change()


The default fill_method='pad' in Series.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.



In [48]:
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close,Daily Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,AAPL reports earnings that exceed forecasts,0.0,,
1,Market downturn affects AAPL,0.0,,
2,AAPL reports earnings that exceed forecasts,0.0,,
3,New AAPL CEO announced,0.136364,,
4,AAPL reports earnings that exceed forecasts,0.0,,


In [49]:
correlation = combined_data['Sentiment'].corr(combined_data['Daily Returns'])
print("Correlation between sentiment and daily stock returns:", correlation)

Correlation between sentiment and daily stock returns: nan


In [50]:
correlation = combined_data['Sentiment'].corr(combined_data['Close'])
print("Correlation between sentiment and closing:", correlation)

Correlation between sentiment and closing: nan


In [38]:
text_data

Unnamed: 0,date
0,0
1,1
2,2
3,3
4,4
...,...
1407323,1407323
1407324,1407324
1407325,1407325
1407326,1407326
