# Downward Jump Analysis
This script is used to analyze jumps on my stocks data and rescale them looking at the jumps on the SPX.

In [34]:
import numpy as np
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf
from datetime import datetime
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import matplotlib.pyplot as plt
import warnings
from scipy.stats import shapiro
from scipy.stats import norm

from IPython.display import display
pio.templates.default = "seaborn"
plt.style.use('seaborn')
warnings.simplefilter(action='ignore', category=FutureWarning)
np.random.seed(27)


The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.



We want to analyze data for an underlying (**META**, **TSLA**, **AAPL**, **NVDA**) and the S&P500 index. This because option prices are available for the options on the SPX index, but they aren't for options on the stock. Thus, we will compare the jumps magnitude of the two financial objects to obtain plausible market prices for stock options.

Let's start by retrieving data from Google Finance, in the data range between 20 Jan 2016 and 20 Jan 2018.

In [35]:
# Define the instruments to download.
# #tickers = ['TSLA', '^GSPC']
stock_ticker = 'AAPL'           # AAPL, META, TSLA, MSFT, NVDA, GOOG
index_ticker = '^GSPC'

######## FIRST TIME, THEN SAVED INTO A CSV FILE ##########
# Take all available data from 20/01/2016 until 20/01/2018.
start_date = datetime(2016, 1, 20)
end_date = datetime(2018, 1, 20)

# Use pandas_reader.data.DataReader to load the data.
stock_data = yf.download(stock_ticker, start=start_date, end=end_date, auto_adjust=True)
sp500_data = yf.download(index_ticker, start=start_date, end=end_date, auto_adjust=True)
#stock_data.to_csv(f'data/{stock_ticker}_data.csv', index=True)
#sp500_data.to_csv(f'data/SPX_data.csv', index=True)

#stock_data = pd.read_csv(f'data/{stock_ticker}_data.csv')
#sp500_data = pd.read_csv(f'data/SPX_data.csv')
print(stock_data)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
                 Open       High        Low      Close     Volume
Date                                                             
2016-01-20  21.673112  22.377318  21.290243  22.058260  289337600
2016-01-21  22.119790  22.306666  21.636647  21.946589  208646000
2016-01-22  22.477594  23.122546  22.418342  23.113430  263202000
2016-01-25  23.136216  23.138496  22.609772  22.662189  207178000
2016-01-26  22.773850  22.990352  22.349959  22.787523  300308000
...               ...        ...        ...        ...        ...
2018-01-12  41.704446  41.983771  41.578987  41.919857  101672400
2018-01-16  42.111591  42.464297  41.694974  41.706810  118263600
2018-01-17  41.697344  42.431162  41.441695  42.395657  137547200
2018-01-18  42.459568  42.632372  42.194448  42.433529  124773600
2018-01-19  42.279666  42.509279  41.995608  42.244160  129700400

[50

Let's add the **Returns** and **Log Returns** columns to both dataframes.

In [36]:
# Find daily returns St/St-1 and Log Returns ln(St/St-1)
stock_data['Returns'] = stock_data['Close'] / stock_data['Close'].shift()
stock_data['Log Returns'] = np.log(stock_data['Returns'])

sp500_data['Returns'] = sp500_data['Close'] / sp500_data['Close'].shift()
sp500_data['Log Returns'] = np.log(sp500_data['Returns'])

stock_data['Returns (%)'] = (stock_data['Returns'] -1) * 100
sp500_data['Returns (%)'] = (sp500_data['Returns'] -1) * 100

# Show the first rows for the stock data
print(stock_data.head(6))
#Save the formatted data into a csv file
stock_data.to_csv(f'data/{stock_ticker}/{stock_ticker}_data.csv')

# Show the first rows for the sp500 data
print(sp500_data.head(9))

# SPX value at 20/01/2016
SPX_S0 = sp500_data.loc['2016-01-20', 'Close']
SPX_St = sp500_data.loc['2017-01-18', 'Close']
print(f'\nSPX value on the 20th Jan, 2016: {SPX_S0}')
print(f'SPX value on the 18th Jan, 2017 (after 252 days): {SPX_St}')
#sp500_data.to_csv('data/sp500_data.csv', index=True)

                 Open       High        Low      Close     Volume   Returns   
Date                                                                          
2016-01-20  21.673112  22.377318  21.290243  22.058260  289337600       NaN  \
2016-01-21  22.119790  22.306666  21.636647  21.946589  208646000  0.994937   
2016-01-22  22.477594  23.122546  22.418342  23.113430  263202000  1.053167   
2016-01-25  23.136216  23.138496  22.609772  22.662189  207178000  0.980477   
2016-01-26  22.773850  22.990352  22.349959  22.787523  300308000  1.005531   
2016-01-27  21.887331  22.021790  21.272005  21.290237  533478800  0.934294   

            Log Returns  Returns (%)  
Date                                  
2016-01-20          NaN          NaN  
2016-01-21    -0.005075    -0.506257  
2016-01-22     0.051802     5.316733  
2016-01-25    -0.019716    -1.952287  
2016-01-26     0.005515     0.553052  
2016-01-27    -0.067965    -6.570639  
                   Open         High          Low      

### Interactive Stock Chart and Analysis
Using RangeSlider and Selectors we plot an interactive chart for the underlying stock considered.

In [37]:
# VISUALIZE STOCK PRICE OVER THE YEARS
fig = px.line(stock_data, x=stock_data.index, y = 'Close', title=f'{stock_ticker} stock price with Selectors')

# Add the selectors
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.show()

In [38]:
fig = go.Figure(data=
    [go.Candlestick(x=stock_data.index,
                    open=stock_data["Open"],
                    high=stock_data["High"],
                    low=stock_data["Low"],
                    close=stock_data["Close"])]
)

fig.update_layout(
    title=f"{stock_ticker} adjusted stock price",
    yaxis_title="Price ($)",
)

fig.show()

In [39]:
fig = go.Figure(data=go.Scatter(
    x=stock_data.index,
    y=stock_data['Returns'],
    mode='lines',
    name='Stock Returns'
))

fig.update_layout(
    title=f'{stock_ticker} Returns',
    xaxis_title='Date',
    yaxis_title='Returns',
)

fig.show()

#### Interactive S&P Index Chart and Analysis

In [40]:
# VISUALIZE STOCK PRICE OVER THE YEARS
fig = px.line(sp500_data, x=sp500_data.index, y = 'Close', title=f'S&P500 price with Selectors')

# Add the selectors
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.show()

Now we find historical parameters of SP500 in order to compare results of the calibration process.

In [41]:
# historic volatility 1 year
start_date = pd.to_datetime('2016-01-20')
end_date = pd.to_datetime('2017-01-20')

selected_data = sp500_data.loc[start_date:end_date, 'Log Returns']
sp_vola_1y = selected_data.std() * np.sqrt(252)
print(f'\nHistorical volatility: {round(sp_vola_1y, 4)}, {round(sp_vola_1y*100,3)}%')

# historic volatility 2 years
start_date = pd.to_datetime('2016-01-20')
end_date = pd.to_datetime('2018-01-20')

selected_data = sp500_data.loc[start_date:end_date, 'Log Returns']
sp_vola_2y = selected_data.std() * np.sqrt(252*2)
print(f'\nHistorical volatility: {round(sp_vola_2y, 4)}, {round(sp_vola_2y*100,3)}%')


Historical volatility: 0.1203, 12.034%

Historical volatility: 0.1383, 13.826%


## Method 1. Set a threshold
We say that JUMPS are all returns worth less than a set threshold (here: 5%)

In [42]:
threshold = 0.95
stock_jumps = stock_data[stock_data['Returns'] < threshold].copy()
print(f'Days with less than {- (100 - threshold*100)}% returns:\n {stock_jumps}')

Days with less than -5.0% returns:
                  Open       High        Low      Close     Volume   Returns   
Date                                                                          
2016-01-27  21.887331  22.021790  21.272005  21.290237  533478800  0.934294  \
2016-04-27  21.996934  22.617889  21.923611  22.413960  458408400  0.937422   

            Log Returns  Returns (%)  
Date                                  
2016-01-27    -0.067965    -6.570639  
2016-04-27    -0.064622    -6.257795  


In [43]:
fig = go.Figure(data=go.Scatter(
    x=stock_data.index,
    y=stock_data['Log Returns'],
    mode='lines',
    name='Stock Returns'
))

# Add the horizontal line
fig.add_shape(
    type='line',
    x0=0, x1=1,
    y0=np.log(threshold), y1=np.log(threshold),
    xref='paper',
    yref='y',
    line=dict(color='red', width=2),
    name='5% jumps threshold '
)


fig.update_layout(
    title=f'{stock_ticker} Log Returns',
    xaxis_title='Date',
    yaxis_title='Returns',
)

fig.show()

Let's show returns for the SP500 index.

In [44]:
fig = go.Figure(data=go.Scatter(
    x=sp500_data.index,
    y=sp500_data['Returns'],
    mode='lines',
    name='Stock Returns'
))

fig.update_layout(
    title=f'{index_ticker} Returns',
    xaxis_title='Date',
    yaxis_title='Returns',
)

fig.show()

Then we filter the S&P500 data in the selected jump days

In [45]:
sp500_jumps = sp500_data[sp500_data.index.isin(stock_jumps.index)]
print(sp500_jumps)

                   Open         High          Low        Close      Volume   
Date                                                                         
2016-01-27  1902.520020  1916.989990  1872.699951  1882.949951  4754040000  \
2016-04-27  2092.330078  2099.889893  2082.310059  2095.149902  4100110000   

             Returns  Log Returns  Returns (%)  
Date                                            
2016-01-27  0.989137    -0.010923    -1.086348  
2016-04-27  1.001649     0.001648     0.164935  


and create a separate dataframe where **Returns** and **Log returns** of both stock and index (in the selected jump days) are stored.

In [46]:
joined_jumps = stock_jumps[['Returns (%)']].join(sp500_jumps[['Returns (%)']], lsuffix=f'_{stock_ticker}', rsuffix='_SP500')
print(joined_jumps)

            Returns (%)_AAPL  Returns (%)_SP500
Date                                           
2016-01-27         -6.570639          -1.086348
2016-04-27         -6.257795           0.164935


*Let's analyze days in which we found jumps and compare the behaviour of the index wrt the stock.*
The following plot contains the Close prices for both objects, highlighting the days when jumps have been observed.

In [47]:
highlight_dates = joined_jumps.index.tolist()

start_date = datetime(2016, 1, 20)
end_date = datetime(2018, 1, 20)

resized_stock_df = stock_data.loc[start_date:end_date]
resized_sp500_df = sp500_data.loc[start_date:end_date]

# Create the figure
fig = go.Figure()

# Add STOCK data to the figure
fig.add_trace(go.Scatter(x=resized_stock_df.index, y=resized_stock_df['Close'], name=stock_ticker))

# Add S&P 500 index data to the figure
fig.add_trace(go.Scatter(x=resized_sp500_df.index, y=resized_sp500_df['Close'], name=index_ticker, yaxis='y2'))

# Set the layout with the secondary y-axis
fig.update_layout(
    title=f'Comparison: {stock_ticker} vs S&P 500 Index',
    xaxis_title='Date',
    yaxis=dict(
        title='TSLA Price',
        anchor='free',
        side='left',
        position=0.05
    ),
    yaxis2=dict(
        title='S&P 500 Price',
        overlaying='y',
        anchor='x',
        side='right',
        position=0.95
    ),
    legend_title='Symbol',
)

# Highlight the specific days
for date in highlight_dates:
    fig.add_annotation(
        x=date, y=resized_stock_df.loc[date, 'Close'],
        showarrow=True,
        arrowhead=1,
        arrowsize=1.5,
        arrowwidth=2,
        arrowcolor='red',
        ax=20,
        ay=-40,
        xanchor='center',
        font=dict(color='red')
    )

# Display the figure
fig.show()

In [48]:
# Create the figure
fig = go.Figure()

# Add STOCK data to the figure
fig.add_trace(go.Scatter(x=resized_stock_df.index, y=resized_stock_df['Returns'], name=stock_ticker))

# Add S&P 500 index data to the figure
fig.add_trace(go.Scatter(x=resized_sp500_df.index, y=resized_sp500_df['Returns'], name=index_ticker))

# Set the layout with the secondary y-axis
fig.update_layout(
    title=f'Comparison: {stock_ticker} vs S&P 500 Index',
    xaxis_title='Date',
    yaxis_title='Returns',
    legend=dict(
        title='Symbol',
        x=1,
        y=1,
        traceorder='normal',
        font=dict(size=12),
    )
)

# Highlight the specific days
for date in highlight_dates:
    fig.add_annotation(
        x=date, y=resized_stock_df.loc[date, 'Returns'],
        showarrow=True,
        arrowhead=1,
        arrowsize=1.5,
        arrowwidth=2,
        arrowcolor='red',
        ax=20,
        ay=-40,
        xanchor='center',
        font=dict(color='red')
    )

# Display the figure
fig.show()


By doing so, let's find the distance (difference of returns) between the stock returns and the SP500 returns in the selected days.

In [49]:
joined_jumps['Distance'] = joined_jumps[f'Returns (%)_{stock_ticker}'] / joined_jumps['Returns (%)_SP500']

avg_distance = round(joined_jumps['Distance'].mean(), 4)

print(f'Average scale factor: {avg_distance}' )

Average scale factor: -15.9463


## METHOD 2. 99-th percentile

In [50]:
# Calculate the 99th percentile
threshold = np.nanpercentile(stock_data['Returns (%)'].values, 1)

# Filter the returns above the threshold
stock_jumps2 = stock_data[stock_data['Returns (%)'] < threshold]

# Plot the distribution of returns
# Create a histogram trace for all returns
hist_trace_all = go.Histogram(x=stock_data['Returns (%)'], nbinsx=30, opacity=0.5, name='All Returns')

# Create a histogram trace for low returns
hist_trace_low = go.Histogram(x=stock_jumps2['Returns (%)'], nbinsx=30, opacity=0.9, name='Low Returns')

# Create the layout
layout = go.Layout(
    title='Distribution of Returns',
    xaxis=dict(title='Returns in (%)'),
    yaxis=dict(title='Frequency'),
    barmode='overlay'
)

# Create the figure
figure = go.Figure(data=[hist_trace_all, hist_trace_low], layout=layout)

# Display the histogram
figure.show()

In [51]:
print(threshold)
print(stock_jumps2.shape, stock_jumps2)

-3.049235417139372
(6, 8)                  Open       High        Low      Close     Volume   Returns   
Date                                                                          
2016-01-27  21.887331  22.021790  21.272005  21.290237  533478800  0.934294  \
2016-04-27  21.996934  22.617889  21.923611  22.413960  458408400  0.937422   
2016-04-28  22.365838  22.427703  21.595945  21.728844  328970800  0.969434   
2017-05-17  36.087473  36.315369  35.173539  35.300407  203070800  0.966424   
2017-06-09  36.461028  36.461028  34.306588  35.002022  259530800  0.961223   
2017-08-10  37.715152  37.738740  36.472135  36.634884  163217200  0.968148   

            Log Returns  Returns (%)  
Date                                  
2016-01-27    -0.067965    -6.570639  
2016-04-27    -0.064622    -6.257795  
2016-04-28    -0.031043    -3.056648  
2017-05-17    -0.034152    -3.357550  
2017-06-09    -0.039549    -3.877694  
2017-08-10    -0.032370    -3.185178  


In [52]:
print(f'99-th percentile of returns distribution: moves bigger than {round(threshold, 2)} % are jumps.')

99-th percentile of returns distribution: moves bigger than -3.05 % are jumps.


The code snippet below gets the current stock ticker and show its weight in the SP500 index.

In [53]:
# yf.pdr_override()
# start_date = "2020-01-01"
# end_date = "2023-01-31"
#
# # Fetch S&P 500 data from Yahoo Finance
# sp500_data = pdr.get_data_yahoo(index_ticker, start=start_date, end=end_date)
# reference_date = '2021-05-23'  # Choose a specific date in 2016
#
# # Retrieve the market capitalization data for each constituent company
# market_cap_data = pdr.get_data_yahoo(index_ticker, start=reference_date, end=reference_date)
#
# # Normalize the market capitalization to obtain the weights
# sp500_weights = market_cap_data['MarketCap'] / market_cap_data['MarketCap'].sum()
#
# # Sort the companies by weight in descending order
# sp500_weights = sp500_weights.sort_values(ascending=False)
#
# # Display the list of companies with their weights
# print(sp500_weights)

In [54]:
# Join jumps pf STOCK and INDEX in a table
sp500_jumps2 = sp500_data[sp500_data.index.isin(stock_jumps2.index)]
print(sp500_jumps2)

                   Open         High          Low        Close      Volume   
Date                                                                         
2016-01-27  1902.520020  1916.989990  1872.699951  1882.949951  4754040000  \
2016-04-27  2092.330078  2099.889893  2082.310059  2095.149902  4100110000   
2016-04-28  2090.929932  2099.300049  2071.620117  2075.810059  4309840000   
2017-05-17  2382.949951  2384.870117  2356.209961  2357.030029  4164760000   
2017-06-09  2436.389893  2446.199951  2415.699951  2431.770020  4029860000   
2017-08-10  2465.379883  2465.379883  2437.750000  2438.209961  3635820000   

             Returns  Log Returns  Returns (%)  
Date                                            
2016-01-27  0.989137    -0.010923    -1.086348  
2016-04-27  1.001649     0.001648     0.164935  
2016-04-28  0.990769    -0.009274    -0.923077  
2017-05-17  0.981822    -0.018345    -1.817821  
2017-06-09  0.999170    -0.000830    -0.082999  
2017-08-10  0.985526    -0.01458

In [55]:
joined_jumps2 = stock_jumps2[['Returns (%)']].join(sp500_jumps2[['Returns (%)']],
                                                            lsuffix=f'_{stock_ticker}', rsuffix='_SP500')
print(joined_jumps2)

            Returns (%)_AAPL  Returns (%)_SP500
Date                                           
2016-01-27         -6.570639          -1.086348
2016-04-27         -6.257795           0.164935
2016-04-28         -3.056648          -0.923077
2017-05-17         -3.357550          -1.817821
2017-06-09         -3.877694          -0.082999
2017-08-10         -3.185178          -1.447444


Let's find the average distance between jumps in stock price and jumps in index value.

In [56]:
joined_jumps2['Distance'] = joined_jumps2[f'Returns (%)_{stock_ticker}'] / joined_jumps2[f'Returns (%)_SP500']

avg_distance2 = round(joined_jumps2['Distance'].mean(), 4)

print(f'Average scale factor: {avg_distance2}' )

Average scale factor: 3.6977


## Method 3. Perform a normality test

In [57]:
fig = px.histogram(stock_data['Log Returns'], x=stock_data['Log Returns'], nbins=30)
mean = stock_data['Log Returns'].mean()
std = stock_data['Log Returns'].std()
x = np.linspace(stock_data['Log Returns'].min(), stock_data['Log Returns'].max())
y = norm.pdf(x, mean, std)

y = (1 / (std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std) ** 2)

fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='Normal Distribution'))

fig.update_layout(title='Histogram of Log Returns',
                  xaxis_title='Returns (%)',
                  yaxis_title='Frequency')

fig.show()

In [58]:
# Perform Shapiro-Wilk normality test on STOCK Log Returns
res_test = shapiro(stock_data['Log Returns'].dropna())

print(f'Shapiro-Wilk Test - Log Returns_{stock_ticker}')
print("Statistic:", res_test.statistic)
print('P-value:', res_test.pvalue)

Shapiro-Wilk Test - Log Returns_AAPL
Statistic: 0.9256923794746399
P-value: 4.335434378985098e-15


Since the p-value is less than the conventional significance level of 0.05 (or 5%), we have sufficient evidence to reject the null hypothesis that the data follows a normal distribution. The **Shapiro-Wilk test** statistic indicates how data distribution departures from normality. A value closer to 1 suggests that the data closely follows a normal distribution, whereas a value closer to 0 suggests a significant deviation from normality.
Due to these reasons, we will remove extreme jumps in order to make the **LOG RETURNS** column pass the normality test.

In [59]:
stock_returns_sorted= stock_data[['Returns', 'Log Returns', 'Returns (%)']].dropna().sort_values('Log Returns')

print('Original dimensions stock dataframe: ', stock_returns_sorted.shape)
# print(stock_returns_sorted.head(5),'\n', stock_returns_sorted.tail(5))

# Remove rows affecting the normality of the Log Returns distribution
downward_jumps = stock_returns_sorted[:16]
upward_jumps = stock_returns_sorted[-15:]
stock_returns_sorted.drop(downward_jumps.index, inplace=True)
stock_returns_sorted.drop(upward_jumps.index, inplace=True)
print(downward_jumps)
print(upward_jumps)
print(stock_returns_sorted.head(10), '\n', stock_returns_sorted.tail(10))
print('Final dimensions stock dataframe: ', stock_returns_sorted.shape)

Original dimensions stock dataframe:  (504, 3)
             Returns  Log Returns  Returns (%)
Date                                          
2016-01-27  0.934294    -0.067965    -6.570639
2016-04-27  0.937422    -0.064622    -6.257795
2017-06-09  0.961223    -0.039549    -3.877694
2017-05-17  0.966424    -0.034152    -3.357550
2017-08-10  0.968148    -0.032370    -3.185178
2016-04-28  0.969434    -0.031043    -3.056648
2016-06-24  0.971904    -0.028498    -2.809566
2016-11-10  0.972132    -0.028264    -2.786805
2016-02-05  0.973292    -0.027071    -2.670773
2016-09-08  0.973791    -0.026559    -2.620906
2017-12-26  0.974630    -0.025697    -2.536996
2016-11-14  0.974915    -0.025405    -2.508538
2017-06-12  0.976104    -0.024186    -2.389574
2017-10-19  0.976339    -0.023946    -2.366112
2016-05-12  0.976543    -0.023737    -2.345711
2016-06-17  0.977243    -0.023020    -2.275738
             Returns  Log Returns  Returns (%)
Date                                          
2017-11-03  1

In [60]:
# Perform Shapiro-Wilk normality test on STOCK Log Returns
res_test = shapiro(stock_returns_sorted['Log Returns'])

print(f'Shapiro-Wilk Test - Log Returns_{stock_ticker}')
print("Statistic:", res_test.statistic)
print('P-value:', res_test.pvalue)

Shapiro-Wilk Test - Log Returns_AAPL
Statistic: 0.9926077127456665
P-value: 0.019486144185066223


##### NOTE SUL SHAPIRO-WILK TEST DI NORMALITà (su META)
Togliere valori upward e downward simultaneamente (stesso numero, di pari passo) non dà ottimi risultati, perchè: ad esempio con -16 e -9 ho un pvalue di 0.04598 mentre con -16 e -10 ho un pvalue più basso di 0.0379. Quindi bisogna insistere sul togliere salti downward più che upward. Infatti se notiamo, i salti verso l'alto, a partire dal settimo in poi, non sono superiori al 3%. Invece quelli verso il basso sono inferiori al 3% a partire dal 12esimo.

### META:
pvalue "ottimo" = 0.0497 (not so good) per #downward jumps = 17, #upward jumps = 9. Questo ci porta ad accettare l'ipotesi nulla che i dati siano distribuiti secondo una normale e facciamo un'analisi di distanza sui downward jumps così ottenuti.
### TESLA:
pvalue ottimo = 0.0862 best values to remove are #downward jumps = 5, #upward jumps = 1
### AAPL:
pvalue ottimo = 0.020 (molto basso, possiamo imporre un significance level $\alpha = 0.02$) ma poi se tolgo altro la situa peggiora.
Trovato con #downward jumps = 15, #upward jumps = 14
### NVDA:
pvalue ottimo = 0.0822 che va benissimo ma si devono togliere #downward jumps = 16 e #upward jumps = 15 che sono comunque tanti (N.B. : ci sono stati grandi movimenti)

In [61]:
fig = px.histogram(stock_returns_sorted['Log Returns'], x='Log Returns', nbins=30)
mean = stock_returns_sorted['Log Returns'].mean()
std = stock_returns_sorted['Log Returns'].std()
x = np.linspace(stock_returns_sorted['Log Returns'].min(), stock_returns_sorted['Log Returns'].max(), 100)
y = (1 / (std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std) ** 2)

fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='Normal Distribution'))

fig.update_layout(title=f'{stock_ticker} Log Returns after high-valued jumps cleaning',
                  xaxis_title='Log Returns',
                  yaxis_title='Frequency')

fig.show()

Filter the S&P data according do the just found downward jumps and then join the two dataframes finally, find the distance between the performances of stock and index.

In [62]:
# Filter s&p data to find jumps
sp500_jumps3 = sp500_data[sp500_data.index.isin(downward_jumps.index)]

# Join dataframes
joined_jumps3 = downward_jumps[['Returns', 'Log Returns', 'Returns (%)']].join(sp500_jumps3[['Returns', 'Log Returns', 'Returns (%)']],
                                                            lsuffix=f'_{stock_ticker}', rsuffix='_SP500')

print(joined_jumps3)

# Find average scale factor
joined_jumps3['Distance'] = joined_jumps3[f'Returns (%)_{stock_ticker}'] / joined_jumps3[f'Returns (%)_SP500']

avg_distance3 = round(joined_jumps3['Distance'].mean(), 4)

print(f'Average scale factor: {avg_distance3}' )

            Returns_AAPL  Log Returns_AAPL  Returns (%)_AAPL  Returns_SP500   
Date                                                                          
2016-01-27      0.934294         -0.067965         -6.570639       0.989137  \
2016-04-27      0.937422         -0.064622         -6.257795       1.001649   
2017-06-09      0.961223         -0.039549         -3.877694       0.999170   
2017-05-17      0.966424         -0.034152         -3.357550       0.981822   
2017-08-10      0.968148         -0.032370         -3.185178       0.985526   
2016-04-28      0.969434         -0.031043         -3.056648       0.990769   
2016-06-24      0.971904         -0.028498         -2.809566       0.964080   
2016-11-10      0.972132         -0.028264         -2.786805       1.001951   
2016-02-05      0.973292         -0.027071         -2.670773       0.981519   
2016-09-08      0.973791         -0.026559         -2.620906       0.997777   
2017-12-26      0.974630         -0.025697         -

In [63]:
print(f'Average {stock_ticker} - SP500 jumps distance for the three different methods:')
print(f'> Fixed threshold: {avg_distance}\n> 99th percentile: {avg_distance2}\n> Normality test: {avg_distance3}')


Average AAPL - SP500 jumps distance for the three different methods:
> Fixed threshold: -15.9463
> 99th percentile: 3.6977
> Normality test: 22.547


##### OPPURE: calcolo la media dei salti nello stock, la media dei salti nell'S&P e poi faccio il rapporto

In [64]:
# METODO 1
avg_stock_jumps = joined_jumps[f'Returns (%)_{stock_ticker}'].mean()
avg_spx_jumps = joined_jumps[f'Returns (%)_SP500'].mean()
scale_factor1 = avg_stock_jumps / avg_spx_jumps

# METODO 2
avg_stock_jumps = joined_jumps2[f'Returns (%)_{stock_ticker}'].mean()
avg_spx_jumps = joined_jumps2[f'Returns (%)_SP500'].mean()
scale_factor2 = avg_stock_jumps / avg_spx_jumps

# METODO 3
avg_stock_jumps = joined_jumps3[f'Returns (%)_{stock_ticker}'].mean()
avg_spx_jumps = joined_jumps3[f'Returns (%)_SP500'].mean()
scale_factor3 = avg_stock_jumps / avg_spx_jumps

print(f'> Fixed threshold: {scale_factor1}\n> 99th percentile: {scale_factor2}\n> Normality test: {scale_factor3}')



> Fixed threshold: 13.922565184520373
> 99th percentile: 5.065809383674981
> Normality test: 4.614649335030596


##### NOTA: per quanto riguarda NVDA ho fatto il 98-th percentile e ottenuto 6.727. Con il 99th percentile invece ottengo 16.092. Come gestire???

In [65]:
result = (scale_factor1+scale_factor2+scale_factor3)/3
print(result)

7.86767463440865


The scale factor obtained with the normality test is the one used to multiply the option prices, to find the prices for stock options.

In [66]:
scale_factors = {
    'TSLA': 10.35,
    'META': 6.23,
    'AAPL': 7.84,
    'NVDA': 10.7
}