# Downward Jump Analysis
This script is used to analyze jumps on my stocks data and rescale them looking at the jumps on the SPX.

In [114]:
import numpy as np
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf
from datetime import datetime
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import matplotlib.pyplot as plt
import warnings
from scipy.stats import shapiro
from scipy.stats import norm

from IPython.display import display
pio.templates.default = "seaborn"
plt.style.use('seaborn')
warnings.simplefilter(action='ignore', category=FutureWarning)
np.random.seed(27)


The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.



We want to analyze data for an underlying (**META**, **TSLA**, **AAPL**, **NVDA**) and the S&P500 index. This because option prices are available for the options on the SPX index, but they aren't for options on the stock. Thus, we will compare the jumps magnitude of the two financial objects to obtain plausible market prices for stock options.

Let's start by retrieving data from Google Finance, in the data range between 20 Jan 2016 and 20 Jan 2018.

In [115]:
# Define the instruments to download.
# #tickers = ['TSLA', '^GSPC']
stock_ticker = 'NVDA'           # AAPL, META, TSLA, MSFT, NVDA, GOOG
index_ticker = '^GSPC'

######## FIRST TIME, THEN SAVED INTO A CSV FILE ##########
# Take all available data from 20/01/2016 until 20/01/2018.
start_date = datetime(2016, 1, 20)
end_date = datetime(2018, 1, 20)

# Use pandas_reader.data.DataReader to load the data.
stock_data = yf.download(stock_ticker, start=start_date, end=end_date, auto_adjust=True)
sp500_data = yf.download(index_ticker, start=start_date, end=end_date, auto_adjust=True)
#stock_data.to_csv(f'data/{stock_ticker}_data.csv', index=True)
#sp500_data.to_csv(f'data/SPX_data.csv', index=True)

#stock_data = pd.read_csv(f'data/{stock_ticker}_data.csv')
#sp500_data = pd.read_csv(f'data/SPX_data.csv')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Let's add the **Returns** and **Log Returns** columns to both dataframes.

In [116]:
# Find daily returns St/St-1 and Log Returns ln(St/St-1)
stock_data['Returns'] = stock_data['Close'] / stock_data['Close'].shift()
stock_data['Log Returns'] = np.log(stock_data['Returns'])

sp500_data['Returns'] = sp500_data['Close'] / sp500_data['Close'].shift()
sp500_data['Log Returns'] = np.log(sp500_data['Returns'])

stock_data['Returns (%)'] = (stock_data['Returns'] -1) * 100
sp500_data['Returns (%)'] = (sp500_data['Returns'] -1) * 100

# Show the first rows for the stock data
print(stock_data.head(6))

# Show the first rows for the sp500 data
print(sp500_data.head(9))

# SPX value at 20/01/2016
SPX_S0 = sp500_data.loc['2016-01-20', 'Close']
SPX_St = sp500_data.loc['2017-01-18', 'Close']
print(f'\nSPX value on the 20th Jan, 2016: {SPX_S0}')
print(f'SPX value on the 18th Jan, 2017 (after 252 days): {SPX_St}')
#sp500_data.to_csv('data/sp500_data.csv', index=True)

                Open      High       Low     Close    Volume   Returns   
Date                                                                     
2016-01-20  6.544409  6.793394  6.456532  6.707958  48118000       NaN  \
2016-01-21  6.781187  6.891033  6.642048  6.786068  48145600  1.011644   
2016-01-22  6.915445  6.991117  6.900799  6.944737  25799600  1.023382   
2016-01-25  6.942297  7.022851  6.922769  6.937415  27072800  0.998946   
2016-01-26  6.952060  7.035055  6.881270  7.005763  22794400  1.009852   
2016-01-27  6.995998  7.017968  6.790952  6.922768  23170000  0.988153   

            Log Returns  Returns (%)  
Date                                  
2016-01-20          NaN          NaN  
2016-01-21     0.011577     1.164448  
2016-01-22     0.023112     2.338158  
2016-01-25    -0.001055    -0.105444  
2016-01-26     0.009804     0.985208  
2016-01-27    -0.011917    -1.184667  
                   Open         High          Low        Close      Volume   
Date             

### Interactive Stock Chart and Analysis
Using RangeSlider and Selectors we plot an interactive chart for the underlying stock considered.

In [117]:
# VISUALIZE STOCK PRICE OVER THE YEARS
fig = px.line(stock_data, x=stock_data.index, y = 'Close', title=f'{stock_ticker} stock price with Selectors')

# Add the selectors
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.show()

In [118]:
fig = go.Figure(data=
    [go.Candlestick(x=stock_data.index,
                    open=stock_data["Open"],
                    high=stock_data["High"],
                    low=stock_data["Low"],
                    close=stock_data["Close"])]
)

fig.update_layout(
    title=f"{stock_ticker} adjusted stock price",
    yaxis_title="Price ($)",
)

fig.show()

In [119]:
fig = go.Figure(data=go.Scatter(
    x=stock_data.index,
    y=stock_data['Returns'],
    mode='lines',
    name='Stock Returns'
))

fig.update_layout(
    title=f'{stock_ticker} Returns',
    xaxis_title='Date',
    yaxis_title='Returns',
)

fig.show()

#### Interactive S&P Index Chart and Analysis

In [120]:
# VISUALIZE STOCK PRICE OVER THE YEARS
fig = px.line(sp500_data, x=sp500_data.index, y = 'Close', title=f'S&P500 price with Selectors')

# Add the selectors
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.show()

Now we find historical parameters of SP500 in order to compare results of the calibration process.

In [121]:
# historic volatility 1 year
start_date = pd.to_datetime('2016-01-20')
end_date = pd.to_datetime('2017-01-20')

selected_data = sp500_data.loc[start_date:end_date, 'Log Returns']
sp_vola_1y = selected_data.std() * np.sqrt(252)
print(f'\nHistorical volatility: {round(sp_vola_1y, 4)}, {round(sp_vola_1y*100,3)}%')

# historic volatility 2 years
start_date = pd.to_datetime('2016-01-20')
end_date = pd.to_datetime('2018-01-20')

selected_data = sp500_data.loc[start_date:end_date, 'Log Returns']
sp_vola_2y = selected_data.std() * np.sqrt(252*2)
print(f'\nHistorical volatility: {round(sp_vola_2y, 4)}, {round(sp_vola_2y*100,3)}%')


Historical volatility: 0.1203, 12.034%

Historical volatility: 0.1383, 13.826%


## Method 1. Set a threshold
We say that JUMPS are all returns worth less than a set threshold (here: 5%)

In [122]:
threshold = 0.95
stock_jumps = stock_data[stock_data['Returns'] < threshold].copy()
print(f'Days with less than {- (100 - threshold*100)}% returns:\n {stock_jumps}')

Days with less than -5.0% returns:
                  Open       High        Low      Close     Volume   Returns   
Date                                                                          
2016-02-05   6.839773   6.915445   6.432121   6.451649   60042800  0.936902  \
2016-06-24  11.421738  11.632979  11.126984  11.232604  101768400  0.943081   
2016-12-28  29.475398  29.556727  26.786631  26.924643  229576400  0.931214   
2017-02-23  26.033275  26.252893  24.597127  24.797003  159288800  0.907277   
2017-04-04  25.515071  25.766766  24.759982  24.868557  127128000  0.929876   
2017-05-17  33.090634  33.278171  31.474349  31.516298  127136800  0.933557   
2017-06-09  40.694198  41.622994  35.262210  36.954304  369292800  0.935351   
2017-08-11  38.816843  39.276302  37.771946  38.525360  149719600  0.946704   
2017-11-29  51.958572  51.985788  47.312213  48.596272  139678800  0.932182   
2017-12-04  49.494377  49.556229  45.647150  46.181557  124084400  0.944253   

            Log

In [123]:
fig = go.Figure(data=go.Scatter(
    x=stock_data.index,
    y=stock_data['Log Returns'],
    mode='lines',
    name='Stock Returns'
))

# Add the horizontal line
fig.add_shape(
    type='line',
    x0=0, x1=1,
    y0=np.log(threshold), y1=np.log(threshold),
    xref='paper',
    yref='y',
    line=dict(color='red', width=2),
    name='5% jumps threshold '
)


fig.update_layout(
    title=f'{stock_ticker} Log Returns',
    xaxis_title='Date',
    yaxis_title='Returns',
)

fig.show()

Let's show returns for the SP500 index.

In [124]:
fig = go.Figure(data=go.Scatter(
    x=sp500_data.index,
    y=sp500_data['Returns'],
    mode='lines',
    name='Stock Returns'
))

fig.update_layout(
    title=f'{index_ticker} Returns',
    xaxis_title='Date',
    yaxis_title='Returns',
)

fig.show()

Then we filter the S&P500 data in the selected jump days

In [125]:
sp500_jumps = sp500_data[sp500_data.index.isin(stock_jumps.index)]
print(sp500_jumps)

                   Open         High          Low        Close      Volume   
Date                                                                         
2016-02-05  1913.069946  1913.069946  1872.650024  1880.050049  4929940000  \
2016-06-24  2103.810059  2103.810059  2032.569946  2037.410034  7597450000   
2016-12-28  2270.229980  2271.310059  2249.110107  2249.919922  2402750000   
2017-02-23  2367.500000  2368.260010  2355.090088  2363.810059  4021770000   
2017-04-04  2354.760010  2360.530029  2350.719971  2360.159912  3208340000   
2017-05-17  2382.949951  2384.870117  2356.209961  2357.030029  4164760000   
2017-06-09  2436.389893  2446.199951  2415.699951  2431.770020  4029860000   
2017-08-11  2441.040039  2448.090088  2437.850098  2441.320068  3161830000   
2017-11-29  2627.820068  2634.889893  2620.320068  2626.070068  4090630000   
2017-12-04  2657.189941  2665.189941  2639.030029  2639.439941  4025840000   

             Returns  Log Returns  Returns (%)  
Date          

and create a separate dataframe where **Returns** and **Log returns** of both stock and index (in the selected jump days) are stored.

In [126]:
joined_jumps = stock_jumps[['Returns (%)']].join(sp500_jumps[['Returns (%)']], lsuffix=f'_{stock_ticker}', rsuffix='_SP500')
print(joined_jumps)

            Returns (%)_NVDA  Returns (%)_SP500
Date                                           
2016-02-05         -6.309796          -1.848125
2016-06-24         -5.691889          -3.591980
2016-12-28         -6.878634          -0.835653
2017-02-23         -9.272290           0.041899
2017-04-04         -7.012374           0.055952
2017-05-17         -6.644280          -1.817821
2017-06-09         -6.464935          -0.082999
2017-08-11         -5.329610           0.127557
2017-11-29         -6.781838          -0.036923
2017-12-04         -5.574661          -0.105216


*Let's analyze days in which we found jumps and compare the behaviour of the index wrt the stock.*
The following plot contains the Close prices for both objects, highlighting the days when jumps have been observed.

In [127]:
highlight_dates = joined_jumps.index.tolist()

start_date = datetime(2016, 1, 20)
end_date = datetime(2018, 1, 20)

resized_stock_df = stock_data.loc[start_date:end_date]
resized_sp500_df = sp500_data.loc[start_date:end_date]

# Create the figure
fig = go.Figure()

# Add STOCK data to the figure
fig.add_trace(go.Scatter(x=resized_stock_df.index, y=resized_stock_df['Close'], name=stock_ticker))

# Add S&P 500 index data to the figure
fig.add_trace(go.Scatter(x=resized_sp500_df.index, y=resized_sp500_df['Close'], name=index_ticker, yaxis='y2'))

# Set the layout with the secondary y-axis
fig.update_layout(
    title=f'Comparison: {stock_ticker} vs S&P 500 Index',
    xaxis_title='Date',
    yaxis=dict(
        title='TSLA Price',
        anchor='free',
        side='left',
        position=0.05
    ),
    yaxis2=dict(
        title='S&P 500 Price',
        overlaying='y',
        anchor='x',
        side='right',
        position=0.95
    ),
    legend_title='Symbol',
)

# Highlight the specific days
for date in highlight_dates:
    fig.add_annotation(
        x=date, y=resized_stock_df.loc[date, 'Close'],
        showarrow=True,
        arrowhead=1,
        arrowsize=1.5,
        arrowwidth=2,
        arrowcolor='red',
        ax=20,
        ay=-40,
        xanchor='center',
        font=dict(color='red')
    )

# Display the figure
fig.show()

In [128]:
# Create the figure
fig = go.Figure()

# Add STOCK data to the figure
fig.add_trace(go.Scatter(x=resized_stock_df.index, y=resized_stock_df['Returns'], name=stock_ticker))

# Add S&P 500 index data to the figure
fig.add_trace(go.Scatter(x=resized_sp500_df.index, y=resized_sp500_df['Returns'], name=index_ticker))

# Set the layout with the secondary y-axis
fig.update_layout(
    title=f'Comparison: {stock_ticker} vs S&P 500 Index',
    xaxis_title='Date',
    yaxis_title='Returns',
    legend=dict(
        title='Symbol',
        x=1,
        y=1,
        traceorder='normal',
        font=dict(size=12),
    )
)

# Highlight the specific days
for date in highlight_dates:
    fig.add_annotation(
        x=date, y=resized_stock_df.loc[date, 'Returns'],
        showarrow=True,
        arrowhead=1,
        arrowsize=1.5,
        arrowwidth=2,
        arrowcolor='red',
        ax=20,
        ay=-40,
        xanchor='center',
        font=dict(color='red')
    )

# Display the figure
fig.show()


By doing so, let's find the distance (difference of returns) between the stock returns and the SP500 returns in the selected days.

In [129]:
joined_jumps['Distance'] = joined_jumps[f'Returns (%)_{stock_ticker}'] / joined_jumps['Returns (%)_SP500']

avg_distance = round(joined_jumps['Distance'].mean(), 4)

print(f'Average scale factor: {avg_distance}' )

Average scale factor: -5.6975


## METHOD 2. 99-th percentile

In [130]:
# Calculate the 99th percentile
threshold = np.nanpercentile(stock_data['Returns (%)'].values, 1)

# Filter the returns above the threshold
stock_jumps2 = stock_data[stock_data['Returns (%)'] < threshold]

# Plot the distribution of returns
# Create a histogram trace for all returns
hist_trace_all = go.Histogram(x=stock_data['Returns (%)'], nbinsx=30, opacity=0.5, name='All Returns')

# Create a histogram trace for low returns
hist_trace_low = go.Histogram(x=stock_jumps2['Returns (%)'], nbinsx=30, opacity=0.9, name='Low Returns')

# Create the layout
layout = go.Layout(
    title='Distribution of Returns',
    xaxis=dict(title='Returns in (%)'),
    yaxis=dict(title='Frequency'),
    barmode='overlay'
)

# Create the figure
figure = go.Figure(data=[hist_trace_all, hist_trace_low], layout=layout)

# Display the histogram
figure.show()

In [131]:
print(threshold)
print(stock_jumps2.shape, stock_jumps2)

-6.460281289781944
(6, 8)                  Open       High        Low      Close     Volume   Returns   
Date                                                                          
2016-12-28  29.475398  29.556727  26.786631  26.924643  229576400  0.931214  \
2017-02-23  26.033275  26.252893  24.597127  24.797003  159288800  0.907277   
2017-04-04  25.515071  25.766766  24.759982  24.868557  127128000  0.929876   
2017-05-17  33.090634  33.278171  31.474349  31.516298  127136800  0.933557   
2017-06-09  40.694198  41.622994  35.262210  36.954304  369292800  0.935351   
2017-11-29  51.958572  51.985788  47.312213  48.596272  139678800  0.932182   

            Log Returns  Returns (%)  
Date                                  
2016-12-28    -0.071267    -6.878634  
2017-02-23    -0.097307    -9.272290  
2017-04-04    -0.072704    -7.012374  
2017-05-17    -0.068753    -6.644280  
2017-06-09    -0.066834    -6.464935  
2017-11-29    -0.070228    -6.781838  


In [132]:
print(f'99-th percentile of returns distribution: moves bigger than {round(threshold, 2)} % are jumps.')

99-th percentile of returns distribution: moves bigger than -6.46 % are jumps.


The code snippet below gets the current stock ticker and show its weight in the SP500 index.

In [133]:
# yf.pdr_override()
# start_date = "2020-01-01"
# end_date = "2023-01-31"
#
# # Fetch S&P 500 data from Yahoo Finance
# sp500_data = pdr.get_data_yahoo(index_ticker, start=start_date, end=end_date)
# reference_date = '2021-05-23'  # Choose a specific date in 2016
#
# # Retrieve the market capitalization data for each constituent company
# market_cap_data = pdr.get_data_yahoo(index_ticker, start=reference_date, end=reference_date)
#
# # Normalize the market capitalization to obtain the weights
# sp500_weights = market_cap_data['MarketCap'] / market_cap_data['MarketCap'].sum()
#
# # Sort the companies by weight in descending order
# sp500_weights = sp500_weights.sort_values(ascending=False)
#
# # Display the list of companies with their weights
# print(sp500_weights)

In [134]:
# Join jumps pf STOCK and INDEX in a table
sp500_jumps2 = sp500_data[sp500_data.index.isin(stock_jumps2.index)]
print(sp500_jumps2)

                   Open         High          Low        Close      Volume   
Date                                                                         
2016-12-28  2270.229980  2271.310059  2249.110107  2249.919922  2402750000  \
2017-02-23  2367.500000  2368.260010  2355.090088  2363.810059  4021770000   
2017-04-04  2354.760010  2360.530029  2350.719971  2360.159912  3208340000   
2017-05-17  2382.949951  2384.870117  2356.209961  2357.030029  4164760000   
2017-06-09  2436.389893  2446.199951  2415.699951  2431.770020  4029860000   
2017-11-29  2627.820068  2634.889893  2620.320068  2626.070068  4090630000   

             Returns  Log Returns  Returns (%)  
Date                                            
2016-12-28  0.991643    -0.008392    -0.835653  
2017-02-23  1.000419     0.000419     0.041899  
2017-04-04  1.000560     0.000559     0.055952  
2017-05-17  0.981822    -0.018345    -1.817821  
2017-06-09  0.999170    -0.000830    -0.082999  
2017-11-29  0.999631    -0.00036

In [135]:
joined_jumps2 = stock_jumps2[['Returns (%)']].join(sp500_jumps2[['Returns (%)']],
                                                            lsuffix=f'_{stock_ticker}', rsuffix='_SP500')
print(joined_jumps2)

            Returns (%)_NVDA  Returns (%)_SP500
Date                                           
2016-12-28         -6.878634          -0.835653
2017-02-23         -9.272290           0.041899
2017-04-04         -7.012374           0.055952
2017-05-17         -6.644280          -1.817821
2017-06-09         -6.464935          -0.082999
2017-11-29         -6.781838          -0.036923


Let's find the average distance between jumps in stock price and jumps in index value.

In [136]:
joined_jumps2['Distance'] = joined_jumps2[f'Returns (%)_{stock_ticker}'] / joined_jumps2[f'Returns (%)_SP500']

avg_distance2 = round(joined_jumps2['Distance'].mean(), 4)

print(f'Average scale factor: {avg_distance2}' )

Average scale factor: -12.1958


## Method 3. Perform a normality test

In [137]:
fig = px.histogram(stock_data['Log Returns'], x=stock_data['Log Returns'], nbins=30)
mean = stock_data['Log Returns'].mean()
std = stock_data['Log Returns'].std()
x = np.linspace(stock_data['Log Returns'].min(), stock_data['Log Returns'].max())
y = norm.pdf(x, mean, std)

y = (1 / (std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std) ** 2)

fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='Normal Distribution'))

fig.update_layout(title='Histogram of Log Returns',
                  xaxis_title='Returns (%)',
                  yaxis_title='Frequency')

fig.show()

In [138]:
# Perform Shapiro-Wilk normality test on STOCK Log Returns
res_test = shapiro(stock_data['Log Returns'].dropna())

print(f'Shapiro-Wilk Test - Log Returns_{stock_ticker}')
print("Statistic:", res_test.statistic)
print('P-value:', res_test.pvalue)

Shapiro-Wilk Test - Log Returns_NVDA
Statistic: 0.8272442817687988
P-value: 6.456865005646156e-23


Since the p-value is less than the conventional significance level of 0.05 (or 5%), we have sufficient evidence to reject the null hypothesis that the data follows a normal distribution. The **Shapiro-Wilk test** statistic indicates how data distribution departures from normality. A value closer to 1 suggests that the data closely follows a normal distribution, whereas a value closer to 0 suggests a significant deviation from normality.
Due to these reasons, we will remove extreme jumps in order to make the **LOG RETURNS** column pass the normality test.

In [145]:
stock_returns_sorted= stock_data[['Returns', 'Log Returns', 'Returns (%)']].dropna().sort_values('Log Returns')

print('Original dimensions stock dataframe: ', stock_returns_sorted.shape)
# print(stock_returns_sorted.head(5),'\n', stock_returns_sorted.tail(5))

# Remove rows affecting the normality of the Log Returns distribution
downward_jumps = stock_returns_sorted[:16]
upward_jumps = stock_returns_sorted[-15:]
stock_returns_sorted.drop(downward_jumps.index, inplace=True)
stock_returns_sorted.drop(upward_jumps.index, inplace=True)
print(downward_jumps)
print(upward_jumps)
print(stock_returns_sorted.head(10), '\n', stock_returns_sorted.tail(10))
print('Final dimensions stock dataframe: ', stock_returns_sorted.shape)

Original dimensions stock dataframe:  (504, 3)
             Returns  Log Returns  Returns (%)
Date                                          
2017-02-23  0.907277    -0.097307    -9.272290
2017-04-04  0.929876    -0.072704    -7.012374
2016-12-28  0.931214    -0.071267    -6.878634
2017-11-29  0.932182    -0.070228    -6.781838
2017-05-17  0.933557    -0.068753    -6.644280
2017-06-09  0.935351    -0.066834    -6.464935
2016-02-05  0.936902    -0.065177    -6.309796
2016-06-24  0.943081    -0.058603    -5.691889
2017-12-04  0.944253    -0.057361    -5.574661
2017-08-11  0.946704    -0.054769    -5.329610
2016-09-09  0.950191    -0.051092    -4.980865
2016-12-01  0.950542    -0.050722    -4.945755
2016-11-14  0.950779    -0.050474    -4.922142
2017-02-13  0.953881    -0.047216    -4.611877
2016-02-08  0.954219    -0.046862    -4.578137
2017-09-25  0.955307    -0.045722    -4.469264
             Returns  Log Returns  Returns (%)
Date                                          
2017-05-15  1

In [146]:
# Perform Shapiro-Wilk normality test on STOCK Log Returns
res_test = shapiro(stock_returns_sorted['Log Returns'])

print(f'Shapiro-Wilk Test - Log Returns_{stock_ticker}')
print("Statistic:", res_test.statistic)
print('P-value:', res_test.pvalue)

Shapiro-Wilk Test - Log Returns_NVDA
Statistic: 0.9944191575050354
P-value: 0.08219316601753235


##### NOTE SUL SHAPIRO-WILK TEST DI NORMALITà (su META)
Togliere valori upward e downward simultaneamente (stesso numero, di pari passo) non dà ottimi risultati, perchè: ad esempio con -16 e -9 ho un pvalue di 0.04598 mentre con -16 e -10 ho un pvalue più basso di 0.0379. Quindi bisogna insistere sul togliere salti downward più che upward. Infatti se notiamo, i salti verso l'alto, a partire dal settimo in poi, non sono superiori al 3%. Invece quelli verso il basso sono inferiori al 3% a partire dal 12esimo.

### META:
pvalue "ottimo" = 0.0497 (not so good) per #downward jumps = 17, #upward jumps = 9. Questo ci porta ad accettare l'ipotesi nulla che i dati siano distribuiti secondo una normale e facciamo un'analisi di distanza sui downward jumps così ottenuti.
### TESLA:
pvalue ottimo = 0.0862 best values to remove are #downward jumps = 5, #upward jumps = 1
### AAPL:
pvalue ottimo = 0.020 (molto basso, possiamo imporre un significance level $\alpha = 0.02$) ma poi se tolgo altro la situa peggiora.
Trovato con #downward jumps = 15, #upward jumps = 14
### NVDA:
pvalue ottimo = 0.0822 che va benissimo ma si devono togliere #downward jumps = 16 e #upward jumps = 15 che sono comunque tanti (N.B. : ci sono stati grandi movimenti)

In [147]:
fig = px.histogram(stock_returns_sorted['Log Returns'], x='Log Returns', nbins=30)
mean = stock_returns_sorted['Log Returns'].mean()
std = stock_returns_sorted['Log Returns'].std()
x = np.linspace(stock_returns_sorted['Log Returns'].min(), stock_returns_sorted['Log Returns'].max(), 100)
y = (1 / (std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std) ** 2)

fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='Normal Distribution'))

fig.update_layout(title=f'{stock_ticker} Log Returns after high-valued jumps cleaning',
                  xaxis_title='Log Returns',
                  yaxis_title='Frequency')

fig.show()

Filter the S&P data according do the just found downward jumps and then join the two dataframes finally, find the distance between the performances of stock and index.

In [148]:
# Filter s&p data to find jumps
sp500_jumps3 = sp500_data[sp500_data.index.isin(downward_jumps.index)]

# Join dataframes
joined_jumps3 = downward_jumps[['Returns', 'Log Returns', 'Returns (%)']].join(sp500_jumps3[['Returns', 'Log Returns', 'Returns (%)']],
                                                            lsuffix=f'_{stock_ticker}', rsuffix='_SP500')

print(joined_jumps3)

# Find average scale factor
joined_jumps3['Distance'] = joined_jumps3[f'Returns (%)_{stock_ticker}'] / joined_jumps3[f'Returns (%)_SP500']

avg_distance3 = round(joined_jumps3['Distance'].mean(), 4)

print(f'Average scale factor: {avg_distance3}' )

            Returns_NVDA  Log Returns_NVDA  Returns (%)_NVDA  Returns_SP500   
Date                                                                          
2017-02-23      0.907277         -0.097307         -9.272290       1.000419  \
2017-04-04      0.929876         -0.072704         -7.012374       1.000560   
2016-12-28      0.931214         -0.071267         -6.878634       0.991643   
2017-11-29      0.932182         -0.070228         -6.781838       0.999631   
2017-05-17      0.933557         -0.068753         -6.644280       0.981822   
2017-06-09      0.935351         -0.066834         -6.464935       0.999170   
2016-02-05      0.936902         -0.065177         -6.309796       0.981519   
2016-06-24      0.943081         -0.058603         -5.691889       0.964080   
2017-12-04      0.944253         -0.057361         -5.574661       0.998948   
2017-08-11      0.946704         -0.054769         -5.329610       1.001276   
2016-09-09      0.950191         -0.051092         -

In [149]:
print(f'Average {stock_ticker} - SP500 jumps distance for the three different methods:')
print(f'> Fixed threshold: {avg_distance}\n> 99th percentile: {avg_distance2}\n> Normality test: {avg_distance3}')


Average NVDA - SP500 jumps distance for the three different methods:
> Fixed threshold: -5.6975
> 99th percentile: -12.1958
> Normality test: 24.9894


##### OPPURE: calcolo la media dei salti nello stock, la media dei salti nell'S&P e poi faccio il rapporto

In [150]:
# METODO 1
avg_stock_jumps = joined_jumps[f'Returns (%)_{stock_ticker}'].mean()
avg_spx_jumps = joined_jumps[f'Returns (%)_SP500'].mean()
scale_factor1 = avg_stock_jumps / avg_spx_jumps

# METODO 2
avg_stock_jumps = joined_jumps2[f'Returns (%)_{stock_ticker}'].mean()
avg_spx_jumps = joined_jumps2[f'Returns (%)_SP500'].mean()
scale_factor2 = avg_stock_jumps / avg_spx_jumps

# METODO 3
avg_stock_jumps = joined_jumps3[f'Returns (%)_{stock_ticker}'].mean()
avg_spx_jumps = joined_jumps3[f'Returns (%)_SP500'].mean()
scale_factor3 = avg_stock_jumps / avg_spx_jumps

print(f'> Fixed threshold: {scale_factor1}\n> 99th percentile: {scale_factor2}\n> Normality test: {scale_factor3}')



> Fixed threshold: 8.149980902089178
> 99th percentile: 16.091806556837042
> Normality test: 7.858196377695408


##### NOTA: per quanto riguarda NVDA ho fatto il 98-th percentile e ottenuto 6.727. Con il 99th percentile invece ottengo 16.092. Come gestire???

In [152]:
result = (scale_factor1+scale_factor2+scale_factor3)/3
print(result)

10.699994612207208


The scale factor obtained with the normality test is the one used to multiply the option prices, to find the prices for stock options.

In [None]:
scale_factors = {
    'TSLA': 10.35,
    'META': 6.23,
    'AAPL': 7.84,
    'NVDA': 10.7
}