In [None]:
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import yfinance as yf

In [None]:
ticker_Symbol = 'URTH'
start = '2010-01-01'
end = '2024-03-01'

def get_data(ticker_Symbol, start, end, freq='1d'):
    """
    Get stock data from Yahoo Finance for a given stock ticker symbol and date range
    :param ticker_Symbol: str: stock ticker symbol
    :param start: str: start date
    :param end: str: end date
    :param freq: str: frequency of data
    :return: pd.DataFrame: stock data
    """
    ticker_data = yf.Ticker(ticker_Symbol)
    ticker_df = ticker_data.history(period=freq, start=start, end=end)
    # get ticker info
    ticker_info = ticker_data.info
    return ticker_df, ticker_info

ticker_df, ticker_info = get_data(ticker_Symbol, start, end)
print(ticker_info)
ticker_df["returns"] = ticker_df["Close"].pct_change()

In [None]:
ticker_df["returns"].plot(figsize=(12, 7), title=f"{ticker_Symbol} Returns")

In [None]:
sns.distplot(ticker_df['returns'], hist=True, kde=True, bins=30, color='blue', hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 4})

In [None]:
returns.describe()

In [None]:
# boxplot of returns
# sns.boxplot(data=ticker_df['returns'], orient='h', color='blue')

# replace returns that are greater than the 95th percentile with the 95th percentile value
returns = ticker_df['returns']
returns_clean = returns.copy()
outlier_threshold = 0.01
returns_clean[returns_clean > returns_clean.quantile(1-outlier_threshold)] = returns_clean.quantile(1-outlier_threshold)
returns_clean[returns_clean < returns_clean.quantile(outlier_threshold)] = returns_clean.quantile(outlier_threshold)

sns.boxplot(data=returns_clean, orient='h', color='blue')
returns_clean.describe()

## are returns normally distributed?

scipy.stats.normaltest:
- H_0: The sample comes from a normal distribution

Returns:

    statistic: float or array

        s^2 + k^2, where s is the z-score returned by skewtest and k is the z-score returned by kurtosistest.
        Because the normal distribution has zero skewness and zero (“excess” or “Fisher”) kurtosis, the value of this statistic tends to be low for samples drawn from a normal distribution.
    
    pvalue: float or array

        A 2-sided chi squared probability for the hypothesis test.



In [None]:
# are returns normally distributed?
# normaltest_result = stats.normaltest(ticker_df['returns'].dropna())
normaltest_result = stats.normaltest(returns_clean.dropna())
print(normaltest_result)

### interpretation

- The statistic is quite high, which points to a higher kurtosis and skweness of the sample distribution
- The pvalue is very close to zero, i.e. the probability of having a statistic as extreme as this when sampling from the null distribution (normal dist) is basically zero!
=> The null hypothesis can be rejected: The sample is most likely not normally distributed

Even after handling the most extreme 1 percent above at both ends of the distribution, the result stays (even if less extreme)