In [1]:
import yfinance as yf
import talib as ta
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from textblob import TextBlob

In [2]:
df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\yfinance_data\NVDA_historical_data.csv")

In [3]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
0,1999-01-22,0.04375,0.048828,0.038802,0.041016,0.037621,2714688000,0.0,0.0
1,1999-01-25,0.044271,0.045833,0.041016,0.045313,0.041562,510480000,0.0,0.0
2,1999-01-26,0.045833,0.046745,0.041146,0.041797,0.038337,343200000,0.0,0.0
3,1999-01-27,0.041927,0.042969,0.039583,0.041667,0.038218,244368000,0.0,0.0
4,1999-01-28,0.041667,0.041927,0.041276,0.041536,0.038098,227520000,0.0,0.0


In [4]:
df.shape

(6421, 9)

In [5]:
# convert the date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6421 entries, 0 to 6420
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          6421 non-null   datetime64[ns]
 1   Open          6421 non-null   float64       
 2   High          6421 non-null   float64       
 3   Low           6421 non-null   float64       
 4   Close         6421 non-null   float64       
 5   Adj Close     6421 non-null   float64       
 6   Volume        6421 non-null   int64         
 7   Dividends     6421 non-null   float64       
 8   Stock Splits  6421 non-null   float64       
dtypes: datetime64[ns](1), float64(7), int64(1)
memory usage: 451.6 KB


In [7]:
df.describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
count,6421,6421.0,6421.0,6421.0,6421.0,6421.0,6421.0,6421.0,6421.0
mean,2011-10-25 21:12:15.025697024,6.207884,6.320466,6.085095,6.207702,6.181334,606079100.0,2.5e-05,0.003348
min,1999-01-22 00:00:00,0.034896,0.035547,0.033333,0.034115,0.031291,19680000.0,0.0,0.0
25%,2005-06-10 00:00:00,0.2775,0.28625,0.2695,0.27675,0.253843,345635000.0,0.0,0.0
50%,2011-10-24 00:00:00,0.45725,0.46475,0.45075,0.457,0.427637,507000000.0,0.0,0.0
75%,2018-03-13 00:00:00,4.1825,4.23575,4.1165,4.175,4.130434,736200000.0,0.0,0.0
max,2024-07-30 00:00:00,139.800003,140.759995,132.419998,135.580002,135.580002,9230856000.0,0.01,10.0
std,,16.138328,16.424862,15.794914,16.118517,16.121352,432348200.0,0.000316,0.142397


In [11]:
class FinancialAnalyzer:
    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date

    def retrieve_stock_data(self):
        return yf.download(self.ticker, start=self.start_date, end=self.end_date)

    def calculate_moving_average(self, data, window_size):
        return ta.SMA(data, timeperiod=window_size)

    def calculate_technical_indicators(self, data):
        # Calculate various technical indicators
        data['SMA'] = self.calculate_moving_average(data['Close'], 20)
        data['RSI'] = ta.RSI(data['Close'], timeperiod=14)
        data['EMA'] = ta.EMA(data['Close'], timeperiod=20)
        macd, macd_signal, _ = ta.MACD(data['Close'])
        data['MACD'] = macd
        data['MACD_Signal'] = macd_signal
        # Add more indicators as needed
        return data

    def plot_stock_data(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'SMA'], title='Stock Price with Moving Average')
        fig.show()

    def plot_rsi(self, data):
        fig = px.line(data, x=data.index, y='RSI', title='Relative Strength Index (RSI)')
        fig.show()

    def plot_ema(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'EMA'], title='Stock Price with Exponential Moving Average')
        fig.show()

    def plot_macd(self, data):
        fig = px.line(data, x=data.index, y=['MACD', 'MACD_Signal'], title='Moving Average Convergence Divergence (MACD)')
        fig.show()


In [14]:
nvda = yf.Ticker("NVDA")
hist = nvda.history(period="max")

In [15]:
fig = make_subplots(rows=1, cols=4, subplot_titles=['Close', 'Open', 'High', 'Low'])

fig.add_trace(go.Scatter(x=hist.index, y=hist['Close']), row=1, col=1)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Open']), row=1, col=2)
fig.add_trace(go.Scatter(x=hist.index, y=hist['High']), row=1, col=3)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Low']), row=1, col=4)

fig.update_layout(height=400, width=1200, title_text='Apple Stock Analysis')
fig.show()

Correlation

In [3]:
raw_df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\raw_analyst_ratings.csv")

In [4]:
# convert the date column to datetime
raw_df['date'] = pd.to_datetime(raw_df['date'], format='ISO8601')

In [5]:
# Define the ticker and the time period
ticker = 'NVDA'
start_date = '1999-01-22'
end_date = '2024-07-30'
# Fetch the stock data
nvda = yf.download(ticker, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


In [6]:
nvda.columns = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

nvda.columns

Index(['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [8]:
# Create a DataFrame with dates from the NVDA data
date = raw_df.index
text_data = pd.DataFrame(date, columns=['date'])

In [9]:
# Generate random headlines
headline_samples = [
    "AAPL hits record high",
    "Concerns over AAPL's future growth",
    "AAPL to unveil new product next month",
    "AAPL reports earnings that exceed forecasts",
    "Market downturn affects AAPL",
    "AAPL invests in renewable energy",
    "New AAPL CEO announced",
    "AAPL faces regulatory scrutiny",
    "AAPL rumored to acquire a tech startup",
    "AAPL's market share grows"
]

# Assign a random headline to each date
text_data['Headline'] = np.random.choice(headline_samples, size=len(text_data))

# Display the first few rows of the updated DataFrame
text_data.head()

Unnamed: 0,date,Headline
0,0,Market downturn affects AAPL
1,1,AAPL's market share grows
2,2,Market downturn affects AAPL
3,3,AAPL reports earnings that exceed forecasts
4,4,Concerns over AAPL's future growth


In [10]:
def calculate_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Applying the sentiment analysis function to your headlines
text_data['Sentiment'] = text_data['Headline'].apply(calculate_sentiment)

In [11]:
text_data.head()

Unnamed: 0,date,Headline,Sentiment
0,0,Market downturn affects AAPL,0.0
1,1,AAPL's market share grows,0.0
2,2,Market downturn affects AAPL,0.0
3,3,AAPL reports earnings that exceed forecasts,0.0
4,4,Concerns over AAPL's future growth,0.0


In [12]:
# Aligning sentiment scores with the stock closing prices on the same date
combined_data = text_data.set_index('date').join(nvda['Close'])
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Market downturn affects AAPL,0.0,
1,AAPL's market share grows,0.0,
2,Market downturn affects AAPL,0.0,
3,AAPL reports earnings that exceed forecasts,0.0,
4,Concerns over AAPL's future growth,0.0,


In [13]:
combined_data['Daily Returns'] = combined_data['Close'].pct_change()

  combined_data['Daily Returns'] = combined_data['Close'].pct_change()


In [14]:
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close,Daily Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Market downturn affects AAPL,0.0,,
1,AAPL's market share grows,0.0,,
2,Market downturn affects AAPL,0.0,,
3,AAPL reports earnings that exceed forecasts,0.0,,
4,Concerns over AAPL's future growth,0.0,,


In [15]:
correlation = combined_data['Sentiment'].corr(combined_data['Daily Returns'])
print("Correlation between sentiment and daily stock returns:", correlation)

Correlation between sentiment and daily stock returns: nan


In [16]:
correlation = combined_data['Sentiment'].corr(combined_data['Close'])
print("Correlation between sentiment and closing:", correlation)

Correlation between sentiment and closing: nan
