# Downward Jump Analysis
This script is used to analyze jumps on my stocks data and rescale them looking at the jumps on the SPX.

In [None]:
import numpy as np
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf
from datetime import datetime
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import matplotlib.pyplot as plt
import warnings
from scipy.stats import shapiro
from scipy.stats import norm

from IPython.display import display
pio.templates.default = "seaborn"
plt.style.use('seaborn')
warnings.simplefilter(action='ignore', category=FutureWarning)
np.random.seed(27)

We want to analyze data for an underlying (**META**, **TSLA**, **AAPL**) and the S&P500 index. This because option prices are available for the options on the SPX index, but they aren't for options on the stock. Thus, we will compare the jumps magnitude of the two financial objects to obtain plausible market prices for stock options.

Let's start by retrieving data from Google Finance, in the data range between 20 Jan 2016 and 20 Jan 2018.

In [None]:
# Define the instruments to download.
# #tickers = ['TSLA', '^GSPC']
stock_ticker = 'META'           # AAPL, META, TSLA, MSFT, NVDA, GOOG
index_ticker = '^GSPC'

######## FIRST TIME, THEN SAVED INTO A CSV FILE ##########
# Take all available data from 20/01/2016 until 20/01/2018.
start_date = datetime(2016, 1, 20)
end_date = datetime(2018, 1, 20)

# Use pandas_reader.data.DataReader to load the data.
stock_data = yf.download(stock_ticker, start=start_date, end=end_date, auto_adjust=True)
sp500_data = yf.download(index_ticker, start=start_date, end=end_date, auto_adjust=True)
stock_data.to_csv(f'data/{stock_ticker}_data.csv', index=True)
#sp500_data.to_csv(f'data/SPX_data.csv', index=True)

#stock_data = pd.read_csv(f'data/{stock_ticker}_data.csv')
#sp500_data = pd.read_csv(f'data/SPX_data.csv')

Let's add the columns for returns and log returns in both dataframes.

In [None]:
# Find daily returns St/St-1 and Log Returns ln(St/St-1)
stock_data['Returns'] = stock_data['Close'] / stock_data['Close'].shift()
stock_data['Log Returns'] = np.log(stock_data['Returns'])

sp500_data['Returns'] = sp500_data['Close'] / sp500_data['Close'].shift()
sp500_data['Log Returns'] = np.log(sp500_data['Returns'])

stock_data['Returns (%)'] = (stock_data['Returns'] -1) * 100
sp500_data['Returns (%)'] = (sp500_data['Returns'] -1) * 100

# Show the first rows for the stock data
stock_data.head(6)

# Show the first rows for the sp500 data
sp500_data.head(9)


Stock price evolution with RangeSlider and Selectors

In [None]:
# VISUALIZE STOCK PRICE OVER THE YEARS
fig = px.line(stock_data, x=stock_data.index, y = 'Close', title=f'{stock_ticker} stock price with Selectors')

# Add the selectors
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.show()

In [None]:
fig = go.Figure(data=
    [go.Candlestick(x=stock_data.index,
                    open=stock_data["Open"],
                    high=stock_data["High"],
                    low=stock_data["Low"],
                    close=stock_data["Close"])]
)

fig.update_layout(
    title=f"{stock_ticker}'s adjusted stock price",
    yaxis_title="Price ($)",
)

fig.show()

In [None]:
fig = go.Figure(data=go.Scatter(
    x=stock_data.index,
    y=stock_data['Returns'],
    mode='lines',
    name='Stock Returns'
))

fig.update_layout(
    title=f'{stock_ticker} Returns',
    xaxis_title='Date',
    yaxis_title='Returns',
)

fig.show()

## Method 1. Set a threshold
We say that JUMPS are all returns worth less than a set threshold (here: 5%)

In [None]:
threshold = 0.95
stock_jumps = stock_data[stock_data['Returns'] < threshold].copy()
print(f'Days with less than {- (100 - threshold*100)}% returns:\n {stock_jumps}')

In [None]:
fig = go.Figure(data=go.Scatter(
    x=stock_data.index,
    y=stock_data['Log Returns'],
    mode='lines',
    name='Stock Returns'
))

# Add the horizontal line
fig.add_shape(
    type='line',
    x0=0, x1=1,
    y0=np.log(threshold), y1=np.log(threshold),
    xref='paper',
    yref='y',
    line=dict(color='red', width=2),
    name='5% jumps threshold '
)


fig.update_layout(
    title=f'{stock_ticker} Log Returns',
    xaxis_title='Date',
    yaxis_title='Returns',
)

fig.show()

Let's show returns for the SP500 index.

In [None]:
fig = go.Figure(data=go.Scatter(
    x=sp500_data.index,
    y=sp500_data['Returns'],
    mode='lines',
    name='Stock Returns'
))

fig.update_layout(
    title=f'{index_ticker} Returns',
    xaxis_title='Date',
    yaxis_title='Returns',
)

fig.show()

Then we filter the S&P500 data in the selected jump days

In [None]:
sp500_jumps = sp500_data[sp500_data.index.isin(stock_jumps.index)]
print(sp500_jumps)

and create a separate dataframe where **Returns** and **Log returns** of both stock and index (in the selected jump days) are stored.

In [None]:
joined_jumps = stock_jumps[['Returns (%)']].join(sp500_jumps[['Returns (%)']], lsuffix=f'_{stock_ticker}', rsuffix='_SP500')
print(joined_jumps)

*Let's analyze days in which we found jumps and compare the behaviour of the index wrt the stock.*
The following plot contains the Close prices for both objects, highlighting the days when jumps have been observed.

In [None]:
highlight_dates = joined_jumps.index.tolist()

start_date = datetime(2016, 1, 20)
end_date = datetime(2018, 1, 20)

resized_stock_df = stock_data.loc[start_date:end_date]
resized_sp500_df = sp500_data.loc[start_date:end_date]

# Create the figure
fig = go.Figure()

# Add STOCK data to the figure
fig.add_trace(go.Scatter(x=resized_stock_df.index, y=resized_stock_df['Close'], name=stock_ticker))

# Add S&P 500 index data to the figure
fig.add_trace(go.Scatter(x=resized_sp500_df.index, y=resized_sp500_df['Close'], name=index_ticker, yaxis='y2'))

# Set the layout with the secondary y-axis
fig.update_layout(
    title=f'Comparison: {stock_ticker} vs S&P 500 Index',
    xaxis_title='Date',
    yaxis=dict(
        title='TSLA Price',
        anchor='free',
        side='left',
        position=0.05
    ),
    yaxis2=dict(
        title='S&P 500 Price',
        overlaying='y',
        anchor='x',
        side='right',
        position=0.95
    ),
    legend_title='Symbol',
)

# Highlight the specific days
for date in highlight_dates:
    fig.add_annotation(
        x=date, y=resized_stock_df.loc[date, 'Close'],
        showarrow=True,
        arrowhead=1,
        arrowsize=1.5,
        arrowwidth=2,
        arrowcolor='red',
        ax=20,
        ay=-40,
        xanchor='center',
        font=dict(color='red')
    )

# Display the figure
fig.show()

In [None]:
# Create the figure
fig = go.Figure()

# Add STOCK data to the figure
fig.add_trace(go.Scatter(x=resized_stock_df.index, y=resized_stock_df['Returns'], name=stock_ticker))

# Add S&P 500 index data to the figure
fig.add_trace(go.Scatter(x=resized_sp500_df.index, y=resized_sp500_df['Returns'], name=index_ticker))

# Set the layout with the secondary y-axis
fig.update_layout(
    title=f'Comparison: {stock_ticker} vs S&P 500 Index',
    xaxis_title='Date',
    yaxis_title='Returns',
    legend=dict(
        title='Symbol',
        x=1,
        y=1,
        traceorder='normal',
        font=dict(size=12),
    )
)

# Highlight the specific days
for date in highlight_dates:
    fig.add_annotation(
        x=date, y=resized_stock_df.loc[date, 'Returns'],
        showarrow=True,
        arrowhead=1,
        arrowsize=1.5,
        arrowwidth=2,
        arrowcolor='red',
        ax=20,
        ay=-40,
        xanchor='center',
        font=dict(color='red')
    )

# Display the figure
fig.show()


By doing so, let's find the distance (difference of returns) between the stock returns and the SP500 returns in the selected days.

In [None]:
joined_jumps['Distance'] = joined_jumps[f'Returns (%)_{stock_ticker}'] / joined_jumps['Returns (%)_SP500']

avg_distance = round(joined_jumps['Distance'].mean(), 4)

print(f'Average scale factor: {avg_distance}' )

## METHOD 2. 99-th percentile

In [None]:
# Calculate the 99th percentile
threshold = np.nanpercentile(stock_data['Returns (%)'].values, 1)

# Filter the returns above the threshold
stock_jumps2 = stock_data[stock_data['Returns (%)'] < threshold]

# Plot the distribution of returns
# Create a histogram trace for all returns
hist_trace_all = go.Histogram(x=stock_data['Returns (%)'], nbinsx=30, opacity=0.5, name='All Returns')

# Create a histogram trace for low returns
hist_trace_low = go.Histogram(x=stock_jumps2['Returns (%)'], nbinsx=30, opacity=0.9, name='Low Returns')

# Create the layout
layout = go.Layout(
    title='Distribution of Returns',
    xaxis=dict(title='Returns in (%)'),
    yaxis=dict(title='Frequency'),
    barmode='overlay'
)

# Create the figure
figure = go.Figure(data=[hist_trace_all, hist_trace_low], layout=layout)

# Display the histogram
figure.show()

In [None]:
print(threshold)
print(stock_jumps2.shape, stock_jumps2)

In [None]:
print(f'99-th percentile of returns distribution: moves bigger than {round(threshold, 2)} % are jumps.')

The code snippet below gets the current stock ticker and show its weight in the SP500 index.

In [None]:
# yf.pdr_override()
# start_date = "2020-01-01"
# end_date = "2023-01-31"
#
# # Fetch S&P 500 data from Yahoo Finance
# sp500_data = pdr.get_data_yahoo(index_ticker, start=start_date, end=end_date)
# reference_date = '2021-05-23'  # Choose a specific date in 2016
#
# # Retrieve the market capitalization data for each constituent company
# market_cap_data = pdr.get_data_yahoo(index_ticker, start=reference_date, end=reference_date)
#
# # Normalize the market capitalization to obtain the weights
# sp500_weights = market_cap_data['MarketCap'] / market_cap_data['MarketCap'].sum()
#
# # Sort the companies by weight in descending order
# sp500_weights = sp500_weights.sort_values(ascending=False)
#
# # Display the list of companies with their weights
# print(sp500_weights)

In [None]:
# Join jumps pf STOCK and INDEX in a table
sp500_jumps2 = sp500_data[sp500_data.index.isin(stock_jumps2.index)]
print(sp500_jumps2)

In [None]:
joined_jumps2 = stock_jumps2[['Returns (%)']].join(sp500_jumps2[['Returns (%)']],
                                                            lsuffix=f'_{stock_ticker}', rsuffix='_SP500')
print(joined_jumps2)

Let's find the average distance between jumps in stock price and jumps in index value.

In [None]:
joined_jumps2['Distance'] = joined_jumps2[f'Returns (%)_{stock_ticker}'] / joined_jumps2[f'Returns (%)_SP500']

avg_distance2 = round(joined_jumps2['Distance'].mean(), 4)

print(f'Average scale factor: {avg_distance2}' )

## Method 3. Perform a normality test

In [None]:
fig = px.histogram(stock_data['Log Returns'], x=stock_data['Log Returns'], nbins=30)
mean = stock_data['Log Returns'].mean()
std = stock_data['Log Returns'].std()
x = np.linspace(stock_data['Log Returns'].min(), stock_data['Log Returns'].max())
y = norm.pdf(x, mean, std)

y = (1 / (std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std) ** 2)

fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='Normal Distribution'))

fig.update_layout(title='Histogram of Log Returns',
                  xaxis_title='Returns (%)',
                  yaxis_title='Frequency')

fig.show()

In [None]:
# Perform Shapiro-Wilk normality test on STOCK Log Returns
res_test = shapiro(stock_data['Log Returns'].dropna())

print(f'Shapiro-Wilk Test - Log Returns_{stock_ticker}')
print("Statistic:", res_test.statistic)
print('P-value:', res_test.pvalue)

Since the p-value is less than the conventional significance level of 0.05 (or 5%), we have sufficient evidence to reject the null hypothesis that the data follows a normal distribution. The **Shapiro-Wilk test** statistic indicates how data distribution departures from normality. A value closer to 1 suggests that the data closely follows a normal distribution, whereas a value closer to 0 suggests a significant deviation from normality.
Due to these reasons, we will remove extreme jumps in order to make the **LOG RETURNS** column pass the normality test.

In [None]:
stock_returns_sorted= stock_data[['Returns', 'Log Returns', 'Returns (%)']].dropna().sort_values('Log Returns')

print('Original dimensions stock dataframe: ', stock_returns_sorted.shape)
# print(stock_returns_sorted.head(5),'\n', stock_returns_sorted.tail(5))

# Remove rows affecting the normality of the Log Returns distribution
downward_jumps = stock_returns_sorted[:15]
upward_jumps = stock_returns_sorted[-14:]
stock_returns_sorted.drop(downward_jumps.index, inplace=True)
stock_returns_sorted.drop(upward_jumps.index, inplace=True)
print(downward_jumps)
print(upward_jumps)
print(stock_returns_sorted.head(10), '\n', stock_returns_sorted.tail(10))
print('Final dimensions stock dataframe: ', stock_returns_sorted.shape)

In [None]:
# Perform Shapiro-Wilk normality test on STOCK Log Returns
res_test = shapiro(stock_returns_sorted['Log Returns'])

print(f'Shapiro-Wilk Test - Log Returns_{stock_ticker}')
print("Statistic:", res_test.statistic)
print('P-value:', res_test.pvalue)

##### NOTE SUL SHAPIRO-WILK TEST DI NORMALITà (su META)
Togliere valori upward e downward simultaneamente (stesso numero, di pari passo) non dà ottimi risultati, perchè: ad esempio con -16 e -9 ho un pvalue di 0.04598 mentre con -16 e -10 ho un pvalue più basso di 0.0379. Quindi bisogna insistere sul togliere salti downward più che upward. Infatti se notiamo, i salti verso l'alto, a partire dal settimo in poi, non sono superiori al 3%. Invece quelli verso il basso sono inferiori al 3% a partire dal 12esimo.

### META:
pvalue "ottimo" = 0.0497 (not so good) per #downward jumps = 17, #upward jumps = 9. Questo ci porta ad accettare l'ipotesi nulla che i dati siano distribuiti secondo una normale e facciamo un'analisi di distanza sui downward jumps così ottenuti.
### TESLA:
pvalue ottimo = 0.0862 best values to remove are #downward jumps = 5, #upward jumps = 1
### AAPL:
pvalue ottimo = 0.020 (molto basso, possiamo imporre un significance level $\alpha = 0.02$) ma poi se tolgo altro la situa peggiora.
Trovato con #downward jumps = 15, #upward jumps = 14

In [None]:
fig = px.histogram(stock_returns_sorted['Log Returns'], x='Log Returns', nbins=30)
mean = stock_returns_sorted['Log Returns'].mean()
std = stock_returns_sorted['Log Returns'].std()
x = np.linspace(stock_returns_sorted['Log Returns'].min(), stock_returns_sorted['Log Returns'].max(), 100)
y = (1 / (std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std) ** 2)

fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='Normal Distribution'))

fig.update_layout(title=f'{stock_ticker} Log Returns after high-valued jumps cleaning',
                  xaxis_title='Log Returns',
                  yaxis_title='Frequency')

fig.show()

Filter the S&P data according do the just found downward jumps and then join the two dataframes finally, find the distance between the performances of stock and index.

In [None]:
# Filter s&p data to find jumps
sp500_jumps3 = sp500_data[sp500_data.index.isin(downward_jumps.index)]

# Join dataframes
joined_jumps3 = downward_jumps[['Returns', 'Log Returns', 'Returns (%)']].join(sp500_jumps3[['Returns', 'Log Returns', 'Returns (%)']],
                                                            lsuffix=f'_{stock_ticker}', rsuffix='_SP500')

print(joined_jumps3)

# Find average scale factor
joined_jumps3['Distance'] = joined_jumps3[f'Returns (%)_{stock_ticker}'] / joined_jumps3[f'Returns (%)_SP500']

avg_distance3 = round(joined_jumps3['Distance'].mean(), 4)

print(f'Average scale factor: {avg_distance3}' )

In [None]:
print(f'Average {stock_ticker} - SP500 jumps distance for the three different methods:')
print(f'> Fixed threshold: {avg_distance}\n> 99th percentile: {avg_distance2}\n> Normality test: {avg_distance3}')


##### OPPURE: calcolo la media dei salti nello stock, la media dei salti nell'S&P e poi faccio il rapporto

In [None]:
# METODO 1
avg_stock_jumps = joined_jumps[f'Returns (%)_{stock_ticker}'].mean()
avg_spx_jumps = joined_jumps[f'Returns (%)_SP500'].mean()
scale_factor1 = avg_stock_jumps / avg_spx_jumps

# METODO 2
avg_stock_jumps = joined_jumps2[f'Returns (%)_{stock_ticker}'].mean()
avg_spx_jumps = joined_jumps2[f'Returns (%)_SP500'].mean()
scale_factor2 = avg_stock_jumps / avg_spx_jumps

# METODO 3
avg_stock_jumps = joined_jumps3[f'Returns (%)_{stock_ticker}'].mean()
avg_spx_jumps = joined_jumps3[f'Returns (%)_SP500'].mean()
scale_factor3 = avg_stock_jumps / avg_spx_jumps

print(f'> Fixed threshold: {scale_factor1}\n> 99th percentile: {scale_factor2}\n> Normality test: {scale_factor3}')



The scale factor obtained with the normality test is the one used to multiply the option prices, to find the prices for stock options.

In [None]:
scale_factors = {
    'TSLA': 10.35,
    'META': 6.23,
    'AAPL': 7.84
}