In [3]:
import yfinance as yf
import talib as ta
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from textblob import TextBlob

In [4]:
df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\yfinance_data\TSLA_historical_data.csv")

In [5]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
0,2010-06-29,1.266667,1.666667,1.169333,1.592667,1.592667,281494500,0.0,0.0
1,2010-06-30,1.719333,2.028,1.553333,1.588667,1.588667,257806500,0.0,0.0
2,2010-07-01,1.666667,1.728,1.351333,1.464,1.464,123282000,0.0,0.0
3,2010-07-02,1.533333,1.54,1.247333,1.28,1.28,77097000,0.0,0.0
4,2010-07-06,1.333333,1.333333,1.055333,1.074,1.074,103003500,0.0,0.0


In [6]:
df.shape

(3545, 9)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3545 entries, 0 to 3544
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          3545 non-null   object 
 1   Open          3545 non-null   float64
 2   High          3545 non-null   float64
 3   Low           3545 non-null   float64
 4   Close         3545 non-null   float64
 5   Adj Close     3545 non-null   float64
 6   Volume        3545 non-null   int64  
 7   Dividends     3545 non-null   float64
 8   Stock Splits  3545 non-null   float64
dtypes: float64(7), int64(1), object(1)
memory usage: 249.4+ KB


In [8]:
# convert the date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [9]:
df.describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
count,3545,3545.0,3545.0,3545.0,3545.0,3545.0,3545.0,3545.0,3545.0
mean,2017-07-12 17:17:02.623413248,75.505658,77.159237,73.743492,75.487264,75.487264,96997080.0,0.0,0.002257
min,2010-06-29 00:00:00,1.076,1.108667,0.998667,1.053333,1.053333,1777500.0,0.0,0.0
25%,2014-01-06 00:00:00,11.516667,11.861333,11.216667,11.565333,11.565333,47706000.0,0.0,0.0
50%,2017-07-13 00:00:00,17.463333,17.688667,17.1,17.459333,17.459333,82506000.0,0.0,0.0
75%,2021-01-20 00:00:00,161.880005,165.333328,158.360001,161.479996,161.479996,122889000.0,0.0,0.0
max,2024-07-30 00:00:00,411.470001,414.496674,405.666656,409.970001,409.970001,914082000.0,0.0,5.0
std,,102.530489,104.806516,100.051787,102.456615,102.456615,78668280.0,0.0,0.097921


In [10]:
class FinancialAnalyzer:
    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date

    def retrieve_stock_data(self):
        return yf.download(self.ticker, start=self.start_date, end=self.end_date)

    def calculate_moving_average(self, data, window_size):
        return ta.SMA(data, timeperiod=window_size)

    def calculate_technical_indicators(self, data):
        # Calculate various technical indicators
        data['SMA'] = self.calculate_moving_average(data['Close'], 20)
        data['RSI'] = ta.RSI(data['Close'], timeperiod=14)
        data['EMA'] = ta.EMA(data['Close'], timeperiod=20)
        macd, macd_signal, _ = ta.MACD(data['Close'])
        data['MACD'] = macd
        data['MACD_Signal'] = macd_signal
        # Add more indicators as needed
        return data

    def plot_stock_data(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'SMA'], title='Stock Price with Moving Average')
        fig.show()

    def plot_rsi(self, data):
        fig = px.line(data, x=data.index, y='RSI', title='Relative Strength Index (RSI)')
        fig.show()

    def plot_ema(self, data):
        fig = px.line(data, x=data.index, y=['Close', 'EMA'], title='Stock Price with Exponential Moving Average')
        fig.show()

    def plot_macd(self, data):
        fig = px.line(data, x=data.index, y=['MACD', 'MACD_Signal'], title='Moving Average Convergence Divergence (MACD)')
        fig.show()


In [11]:
tesla = yf.Ticker("TSLA")
hist = tesla.history(period="max")

$TSLA: possibly delisted; no price data found  (1d 1926-01-12 -> 2024-12-18)


In [12]:
fig = make_subplots(rows=1, cols=4, subplot_titles=['Close', 'Open', 'High', 'Low'])

fig.add_trace(go.Scatter(x=hist.index, y=hist['Close']), row=1, col=1)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Open']), row=1, col=2)
fig.add_trace(go.Scatter(x=hist.index, y=hist['High']), row=1, col=3)
fig.add_trace(go.Scatter(x=hist.index, y=hist['Low']), row=1, col=4)

fig.update_layout(height=400, width=1200, title_text='Tesla Stock Analysis')
fig.show()

Correlation

In [4]:
raw_df = pd.read_csv(r"C:\Users\Maintenant prêt\Downloads\10x\stock\raw_analyst_ratings.csv")

In [5]:
# convert the date column to datetime
raw_df['date'] = pd.to_datetime(raw_df['date'], format='ISO8601')

In [6]:
raw_df.head()

Unnamed: 0.1,Unnamed: 0,headline,url,publisher,date,stock
0,0,Stocks That Hit 52-Week Highs On Friday,https://www.benzinga.com/news/20/06/16190091/s...,Benzinga Insights,2020-06-05 10:30:54-04:00,A
1,1,Stocks That Hit 52-Week Highs On Wednesday,https://www.benzinga.com/news/20/06/16170189/s...,Benzinga Insights,2020-06-03 10:45:20-04:00,A
2,2,71 Biggest Movers From Friday,https://www.benzinga.com/news/20/05/16103463/7...,Lisa Levin,2020-05-26 04:30:07-04:00,A
3,3,46 Stocks Moving In Friday's Mid-Day Session,https://www.benzinga.com/news/20/05/16095921/4...,Lisa Levin,2020-05-22 12:45:06-04:00,A
4,4,B of A Securities Maintains Neutral on Agilent...,https://www.benzinga.com/news/20/05/16095304/b...,Vick Meyer,2020-05-22 11:38:59-04:00,A


In [7]:
# Define the ticker and the time period
ticker = 'TSLA'
start_date = '2010-06-29'
end_date = '2024-07-30'
# Fetch the stock data
tsla = yf.download(ticker, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


In [8]:
tsla.columns = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

tsla.columns

Index(['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [9]:
# Create a DataFrame with dates from the TSLA data
date = raw_df.index
text_data = pd.DataFrame(date, columns=['date'])

In [10]:
# Generate random headlines
headline_samples = [
    "AAPL hits record high",
    "Concerns over AAPL's future growth",
    "AAPL to unveil new product next month",
    "AAPL reports earnings that exceed forecasts",
    "Market downturn affects AAPL",
    "AAPL invests in renewable energy",
    "New AAPL CEO announced",
    "AAPL faces regulatory scrutiny",
    "AAPL rumored to acquire a tech startup",
    "AAPL's market share grows"
]

# Assign a random headline to each date
text_data['Headline'] = np.random.choice(headline_samples, size=len(text_data))

# Display the first few rows of the updated DataFrame
text_data.head()

Unnamed: 0,date,Headline
0,0,AAPL rumored to acquire a tech startup
1,1,AAPL rumored to acquire a tech startup
2,2,AAPL's market share grows
3,3,Market downturn affects AAPL
4,4,AAPL to unveil new product next month


In [11]:
def calculate_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Applying the sentiment analysis function to your headlines
text_data['Sentiment'] = text_data['Headline'].apply(calculate_sentiment)

In [12]:
text_data.head()

Unnamed: 0,date,Headline,Sentiment
0,0,AAPL rumored to acquire a tech startup,0.0
1,1,AAPL rumored to acquire a tech startup,0.0
2,2,AAPL's market share grows,0.0
3,3,Market downturn affects AAPL,0.0
4,4,AAPL to unveil new product next month,0.068182


In [13]:
# Aligning sentiment scores with the stock closing prices on the same date
combined_data = text_data.set_index('date').join(tsla['Close'])
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,AAPL rumored to acquire a tech startup,0.0,
1,AAPL rumored to acquire a tech startup,0.0,
2,AAPL's market share grows,0.0,
3,Market downturn affects AAPL,0.0,
4,AAPL to unveil new product next month,0.068182,


In [14]:
combined_data['Daily Returns'] = combined_data['Close'].pct_change()

  combined_data['Daily Returns'] = combined_data['Close'].pct_change()


In [15]:
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close,Daily Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,AAPL rumored to acquire a tech startup,0.0,,
1,AAPL rumored to acquire a tech startup,0.0,,
2,AAPL's market share grows,0.0,,
3,Market downturn affects AAPL,0.0,,
4,AAPL to unveil new product next month,0.068182,,


In [16]:
correlation = combined_data['Sentiment'].corr(combined_data['Daily Returns'])
print("Correlation between sentiment and daily stock returns:", correlation)

Correlation between sentiment and daily stock returns: nan


In [17]:
correlation = combined_data['Sentiment'].corr(combined_data['Close'])
print("Correlation between sentiment and closing:", correlation)

Correlation between sentiment and closing: nan
