# Import

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
import plotly.graph_objects as go
import plotly.express as px



In [2]:
!pip install yfinance 
import yfinance as yf

Collecting yfinance
  Downloading yfinance-0.2.28-py2.py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.3/65.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Installing collected packages: multitasking, yfinance
Successfully installed multitasking-0.0.11 yfinance-0.2.28


# Data-set download

In [3]:
stock_ticker = 'TSLA'
start_date, end_date = '2010-01-01', '2023-01-01'

In [4]:
stock = yf.download(stock_ticker, start=start_date, end=end_date)
stock.reset_index(inplace=True)
stock.index = pd.to_datetime(stock.index)  # Convert index to DateTime format

[*********************100%%**********************]  1 of 1 completed


# Analysis

In [5]:
stock.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3150 entries, 1970-01-01 00:00:00 to 1970-01-01 00:00:00.000003149
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       3150 non-null   datetime64[ns]
 1   Open       3150 non-null   float64       
 2   High       3150 non-null   float64       
 3   Low        3150 non-null   float64       
 4   Close      3150 non-null   float64       
 5   Adj Close  3150 non-null   float64       
 6   Volume     3150 non-null   int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 196.9 KB


- Here we can see that there are no null values, all data is in correct format. So no need to do data cleaning.

- Other things to note is that there are total 3150 rows and 7 columns.

In [6]:
# Calculate statistical metrics
mean_price = stock['Adj Close'].mean()
median_price = stock['Adj Close'].median()
std_dev = stock['Adj Close'].std()
max_price = stock['Adj Close'].max()
min_price = stock['Adj Close'].min()

print(f"Mean Price: {mean_price:.2f}")
print(f"Median Price: {median_price:.2f}")
print(f"Standard Deviation: {std_dev:.2f}")
print(f"Max Price: {max_price:.2f}")
print(f"Min Price: {min_price:.2f}")

Mean Price: 58.81
Median Price: 16.22
Standard Deviation: 95.53
Max Price: 409.97
Min Price: 1.05


- the standard deviation is quite high at 95.53, which suggests a significant spread of prices around the mean.

- we can conclude that TESLA stock changes a lot during the given time frame. 

In [7]:
def plot_candlestick_chart(data_frame):
    fig_candlestick = go.Figure(data=[go.Candlestick(x=data_frame['Date'],
                                                     open=data_frame['Open'],
                                                     high=data_frame['High'],
                                                     low=data_frame['Low'],
                                                     close=data_frame['Close'])])

    fig_candlestick.update_layout(
        title=f'{stock_ticker} Candlestick Chart',
        xaxis_title='Date',
        yaxis_title='Price',
        xaxis_rangeslider_visible=False
    )

    fig_candlestick.show()


def plot_moving_average(data_frame, window_sizes):
    for window_size in window_sizes:
        data_frame[f'{window_size}_MA'] = data_frame['Close'].rolling(window=window_size).mean()

    fig_timeseries = go.Figure()

    fig_timeseries.add_trace(go.Scatter(x=data_frame['Date'], y=data_frame['Close'], mode='lines', name=f'{stock_ticker} Close'))
    
    for window_size in window_sizes:
        fig_timeseries.add_trace(go.Scatter(x=data_frame['Date'], y=data_frame[f'{window_size}_MA'], mode='lines', name=f'{window_size}-Day Moving Average'))

    fig_timeseries.update_layout(
        title=f'{stock_ticker} Price and {window_size}-Day Moving Average',
        xaxis_title='Date',
        yaxis_title='Price'
    )

    fig_timeseries.show()

    
def plot_volatility(data, window_size, upper_limit=None, lower_limit=None):
    # Calculate daily returns
    data['Daily_Return'] = data['Close'].pct_change()

    # Calculate rolling volatility (standard deviation of returns)
    data['Volatility'] = data['Daily_Return'].rolling(window=window_size).std()

    # Adjust data to be symmetric around zero
    data['Adjusted_Volatility'] = data['Volatility'] - data['Volatility'].mean()

    fig_volatility = go.Figure()

    fig_volatility.add_trace(go.Scatter(x=data['Date'], y=data['Adjusted_Volatility'], mode='lines', name='Adjusted Volatility'))

    if upper_limit is not None:
        fig_volatility.add_trace(go.Scatter(x=data['Date'], y=[upper_limit] * len(data), mode='lines', name='Upper Limit'))

    if lower_limit is not None:
        fig_volatility.add_trace(go.Scatter(x=data['Date'], y=[lower_limit] * len(data), mode='lines', name='Lower Limit'))

    fig_volatility.update_layout(
        title=f'{stock_ticker} Volatility',
        xaxis_title='Date',
        yaxis_title='Adjusted Volatility'
    )

    fig_volatility.show()

def plot_return_frequency_day_of_week(data, threshold):
    # Calculate daily returns
    data['Daily_Return'] = data['Close'].pct_change()

    # Filter days where returns are higher than the threshold
    data['Positive_Return'] = data['Daily_Return'] > threshold

    # Convert the 'Date' column to datetime format
    data['Date'] = pd.to_datetime(data['Date'])

    # Extract the day of the week (0: Monday, 6: Sunday)
    data['Day_of_Week'] = data['Date'].dt.dayofweek

    # Group data by day of the week and calculate the count of days with positive returns above the threshold
    positive_return_counts = data.groupby('Day_of_Week')['Positive_Return'].sum()

    days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

    fig = go.Figure(go.Bar(x=days_of_week, y=positive_return_counts))
    fig.update_layout(
        title=f'Number of Days with Positive Returns Above {threshold*100}%',
        xaxis_title='Day of the Week',
        yaxis_title='Number of Days',
    )
    fig.show()

def plot_return_frequency_month(data, threshold):
    # Calculate daily returns
    data['Daily_Return'] = data['Close'].pct_change()

    # Filter days where returns are higher than the threshold
    data['Positive_Return'] = data['Daily_Return'] > threshold

    # Convert the 'Date' column to datetime format
    data['Date'] = pd.to_datetime(data['Date'])

    # Extract the month
    data['Month'] = data['Date'].dt.month

    # Group data by month and calculate the count of months with positive returns above the threshold
    positive_return_counts = data.groupby('Month')['Positive_Return'].sum()

    months = [
        'January', 'February', 'March', 'April', 'May', 'June',
        'July', 'August', 'September', 'October', 'November', 'December'
    ]

    fig = go.Figure(go.Bar(x=months, y=positive_return_counts))
    fig.update_layout(
        title=f'Number of Months with Positive Returns Above {threshold*100}%',
        xaxis_title='Month',
        yaxis_title='Number of Months',
    )
    fig.show()

In [8]:
plot_candlestick_chart(stock)

In [9]:
plot_moving_average(stock, window_sizes=[10, 50])

- we can see observer the trend from 10-Day and 50-Day Moving Average, that the TESLA stock increased hugles after 2020. It keep on increasing till the starting of 2022. And then started to deacrese mostly.

- Reason is that TESLA stock got over-valued dur to its increasing sales and influence of Elon Musk. But soon market realised that it is over-valued, so market started to adjusted the stock price to it actual market value.  

In [10]:
plot_volatility(stock, window_size=30, upper_limit=.02, lower_limit=-1*.02)

- Here we can clearly see that TESLA stock is not stable, as it keep on fluctuating a lot through out it listing.

- Intersting point to see here is that it got fluctuated a lot during 2020-2022. Again it is clear from the Average-moving graph.

In [11]:
plot_return_frequency_day_of_week(stock, threshold=.05)

- Okay, if someone if intraday trader then investing on TESLA on Friday is best and then exiting on Monday. 

- As the the number of times the trader will get return of atleast 5% is higher on Monday than any other day.

In [12]:
plot_return_frequency_month(stock, threshold=.05)

- Here we can see that November and April are best month to buy TESLA stock as we have higher chance of getting atleast 5% return in these 2 months as comapre to other months.