# Import

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
import plotly.graph_objects as go
import plotly.express as px



In [2]:
!pip install yfinance 
import yfinance as yf

Collecting yfinance
  Downloading yfinance-0.2.28-py2.py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.3/65.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Installing collected packages: multitasking, yfinance
Successfully installed multitasking-0.0.11 yfinance-0.2.28


# Data-set download

In [3]:
stock_ticker = 'TSLA'
start_date, end_date = '2020-01-01', '2023-01-01'

In [4]:
stock = yf.download(stock_ticker, start=start_date, end=end_date)
stock.reset_index(inplace=True)
stock.index = pd.to_datetime(stock.index)  # Convert index to DateTime format

[*********************100%%**********************]  1 of 1 completed


# Analysis

In [5]:
stock.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 756 entries, 1970-01-01 00:00:00 to 1970-01-01 00:00:00.000000755
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       756 non-null    datetime64[ns]
 1   Open       756 non-null    float64       
 2   High       756 non-null    float64       
 3   Low        756 non-null    float64       
 4   Close      756 non-null    float64       
 5   Adj Close  756 non-null    float64       
 6   Volume     756 non-null    int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 47.2 KB


In [6]:
# Calculate statistical metrics
mean_price = stock['Adj Close'].mean()
median_price = stock['Adj Close'].median()
std_dev = stock['Adj Close'].std()
max_price = stock['Adj Close'].max()
min_price = stock['Adj Close'].min()

print(f"Mean Price: {mean_price:.2f}")
print(f"Median Price: {median_price:.2f}")
print(f"Standard Deviation: {std_dev:.2f}")
print(f"Max Price: {max_price:.2f}")
print(f"Min Price: {min_price:.2f}")

Mean Price: 206.37
Median Price: 223.65
Standard Deviation: 95.86
Max Price: 409.97
Min Price: 24.08


In [7]:
def plot_candlestick_chart(data_frame):
    fig_candlestick = go.Figure(data=[go.Candlestick(x=data_frame['Date'],
                                                     open=data_frame['Open'],
                                                     high=data_frame['High'],
                                                     low=data_frame['Low'],
                                                     close=data_frame['Close'])])

    fig_candlestick.update_layout(
        title='Stock Candlestick Chart',
        xaxis_title='Date',
        yaxis_title='Price',
        xaxis_rangeslider_visible=False
    )

    fig_candlestick.show()


def plot_moving_average(data_frame, window_size):
    data_frame[f'{window_size}_MA'] = data_frame['Close'].rolling(window=window_size).mean()

    fig_timeseries = go.Figure()

    fig_timeseries.add_trace(go.Scatter(x=data_frame['Date'], y=data_frame['Close'], mode='lines', name='stock Close'))
    fig_timeseries.add_trace(go.Scatter(x=data_frame['Date'], y=data_frame[f'{window_size}_MA'], mode='lines', name=f'{window_size}-Day Moving Average'))

    fig_timeseries.update_layout(
        title=f'Stock Price and {window_size}-Day Moving Average',
        xaxis_title='Date',
        yaxis_title='Price'
    )

    fig_timeseries.show()

    
def plot_volatility(data, window_size, upper_limit=None, lower_limit=None):
    # Calculate daily returns
    data['Daily_Return'] = data['Close'].pct_change()

    # Calculate rolling volatility (standard deviation of returns)
    data['Volatility'] = data['Daily_Return'].rolling(window=window_size).std()

    # Adjust data to be symmetric around zero
    data['Adjusted_Volatility'] = data['Volatility'] - data['Volatility'].mean()

    fig_volatility = go.Figure()

    fig_volatility.add_trace(go.Scatter(x=data['Date'], y=data['Adjusted_Volatility'], mode='lines', name='Adjusted Volatility'))

    if upper_limit is not None:
        fig_volatility.add_trace(go.Scatter(x=data['Date'], y=[upper_limit] * len(data), mode='lines', name='Upper Limit'))

    if lower_limit is not None:
        fig_volatility.add_trace(go.Scatter(x=data['Date'], y=[lower_limit] * len(data), mode='lines', name='Lower Limit'))

    fig_volatility.update_layout(
        title='Stock Volatility',
        xaxis_title='Date',
        yaxis_title='Adjusted Volatility'
    )

    fig_volatility.show()

def plot_return_frequency_day_of_week(data, threshold):
    # Calculate daily returns
    data['Daily_Return'] = data['Close'].pct_change()

    # Filter days where returns are higher than the threshold
    data['Positive_Return'] = data['Daily_Return'] > threshold

    # Convert the 'Date' column to datetime format
    data['Date'] = pd.to_datetime(data['Date'])

    # Extract the day of the week (0: Monday, 6: Sunday)
    data['Day_of_Week'] = data['Date'].dt.dayofweek

    # Group data by day of the week and calculate the count of days with positive returns above the threshold
    positive_return_counts = data.groupby('Day_of_Week')['Positive_Return'].sum()

    days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

    fig = go.Figure(go.Bar(x=days_of_week, y=positive_return_counts))
    fig.update_layout(
        title=f'Number of Days with Positive Returns Above {threshold*100}%',
        xaxis_title='Day of the Week',
        yaxis_title='Number of Days',
    )
    fig.show()

def plot_return_frequency_month(data, threshold):
    # Calculate daily returns
    data['Daily_Return'] = data['Close'].pct_change()

    # Filter days where returns are higher than the threshold
    data['Positive_Return'] = data['Daily_Return'] > threshold

    # Convert the 'Date' column to datetime format
    data['Date'] = pd.to_datetime(data['Date'])

    # Extract the month
    data['Month'] = data['Date'].dt.month

    # Group data by month and calculate the count of months with positive returns above the threshold
    positive_return_counts = data.groupby('Month')['Positive_Return'].sum()

    months = [
        'January', 'February', 'March', 'April', 'May', 'June',
        'July', 'August', 'September', 'October', 'November', 'December'
    ]

    fig = go.Figure(go.Bar(x=months, y=positive_return_counts))
    fig.update_layout(
        title=f'Number of Months with Positive Returns Above {threshold*100}%',
        xaxis_title='Month',
        yaxis_title='Number of Months',
    )
    fig.show()

In [8]:
plot_candlestick_chart(stock)
plot_moving_average(stock, window_size=30)
plot_volatility(stock, window_size=30, upper_limit=.02, lower_limit=-1*.02)
plot_return_frequency_day_of_week(stock, threshold=.1)
plot_return_frequency_month(stock, threshold=.1)