# Automated Functions

- ### get_stock_data(ticker, start, end)
- ### daily_returns(data) #only pass the df
- ### zscore_returns(data) #you must use daily_returns() before zscore_returns()
- ### ema_and_distances(data) #only pass the df
- ### ema_zscore_distances(data) #you must use ema_and_distances() before ema_zscore_distances()
- ### dist_stats_plots_returns(data) #use one column for it
- ### zscore_stats_plots_returns(data) #use one column for it
- ### price_zscore_overtime() #use only the df, but remember to run zscore_returns() and zscore_stats_plots_returns() before running it 

In [1]:
#=============================================================================
# GET STOCK DATA AND SUMMARY STATISTICS
#=============================================================================
def get_stock_data(ticker, start, end):
    stock_data = yf.download(ticker, start, end)
    print(stock_data.info(), '\n', '--------------------------', '\n')
    print(stock_data.head(), '\n', '--------------------------', '\n')
    print(stock_data.tail(), '\n''--------------------------', '\n')
    print(stock_data.describe(), '\n''--------------------------', '\n')

    # Define the candlestick data
    candlestick = go.Candlestick(
        x=stock_data.index,
        open=stock_data['Open'],
        high=stock_data['High'],
        low=stock_data['Low'],
        close=stock_data['Close'])

    # Create a candlestick figure
    fig = go.Figure(data=[candlestick])
    fig.update_layout(title=ticker)

    # Show the plot
    fig.show()
    return stock_data



#=============================================================================
# DAILY RETURNS (PCT.CHANGE())
#=============================================================================
def daily_returns(data):
    x = data['Adj Close'].pct_change().dropna() * 100
    data['daily_returns'] = x
    return data['daily_returns']



#=============================================================================
# Z-SCORE CALCULATION
#=============================================================================
def zscore_returns(data):
    #data = dataframe
    #x = columns of daily returns from the dataframe
    mu = np.mean(data['daily_returns'])
    sigma = np.std(data['daily_returns'])
    z_score = (data['daily_returns'] - mu) / sigma
    data['zscore_returns'] = z_score
    return data['zscore_returns']



#=============================================================================
# EMA CALCULATION
#=============================================================================
def ema_and_distances(data):
    data['EMA_20'] = data['Adj Close'].ewm(span=20, adjust=False).mean()
    #data['EMA_20_distance'] = (1 - (data['Adj Close'] / data['EMA_20'])) * 100 # Distance between Close and EMA_20'.
    data['EMA_20_distance'] = (1 - (data['EMA_20'] / data['Adj Close'])) * 100 # Distance between EMA_20 and Close'. Meu preço 
    
    return data['EMA_20_distance']



#=============================================================================
# Z-SCORE EMA CALCULATION
#=============================================================================
def ema_zscore_distances(data):
    #data = dataframe
    #x = columns of daily returns from the dataframe
    mu = np.mean(data['EMA_20_distance'])
    sigma = np.std(data['EMA_20_distance'])
    z_score = (data['EMA_20_distance'] - mu) / sigma
    data['EMA20_zscore_distance'] = z_score
    
    return data['EMA20_zscore_distance']



#=============================================================================
# DIST STATS PLOTS
#=============================================================================
def dist_stats_plots_returns(data):
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # Histogram plot
    hist_returns = sns.histplot(data, kde=True, bins=30, ax=axes[0], color='lightblue')
    hist_returns.set(xlabel="Returns in %", ylabel="Frequency (count)")
    hist_returns.get_lines()[0].set_color('black')
    axes[0].set_title('Distribution Plot of Returns')

    # Box plot
    boxplot_returns = sns.boxplot(data, ax=axes[1], color='orange')
    axes[1].set_title('Boxplot of Returns')

    plt.show()



#=============================================================================
# Z-SCORE STATS PLOTS
#=============================================================================
def zscore_stats_plots_returns(data):
    #plotting with sns
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    z_hist_returns = sns.histplot(data, kde=True, bins=30, color='green', ax=axes[0])
    z_hist_returns.get_lines()[0].set_color('black')
    z_hist_returns.set(xlabel="X-values in terms of standard deviations, known as Z-Scores", ylabel="Frequency (count)")

    z_boxplot_returns = sns.boxplot(data, ax=axes[1], color='orange')
    axes[1].set_title('Boxplot of Z-Scores')
    plt.show()



#=============================================================================
# CONFIDENCE INTERVALS WITH 95% OF CONFIDENCE
#=============================================================================
def confidence_interval(sample, confidence=0.95):
    """
    Calculate the confidence interval for a given sample and confidence level.

    Parameters:
    - sample (array-like): The sample data points
    - confidence (float): The confidence level (default is 0.95 for 95% confidence)

    Returns:
    - (tuple): Lower and upper bounds of the confidence interval
    """
    # Calculate the sample mean and standard deviation
    sample_mean = np.mean(sample)
    sample_std = np.std(sample, ddof=1)  # Using ddof=1 for unbiased estimation

    # Get the sample size
    n = len(sample)

    # Calculate the Z-value for the given confidence level
    z_value = {
        0.90: 1.645,
        0.95: 1.960,
        0.99: 2.576
    }.get(confidence, 1.960)  # Default to 1.960 (95% confidence) if the confidence level is not recognized

    # Calculate the margin of error
    moe = z_value * (sample_std / np.sqrt(n))

    # Calculate the lower and upper bounds of the confidence interval
    lower_bound = sample_mean - moe
    upper_bound = sample_mean + moe

    return lower_bound, upper_bound

# Test the function
#lower, upper = confidence_interval(sample_data, 0.95)
#print(f"The 95% confidence interval is from {lower} to {upper}")



#=============================================================================
# PRICE AND Z-SCORE OVERTIME
#=============================================================================
def price_zscore_overtime(data):
    #remeber that the returns of the z-score must be a column called data['z_score_returns']

    fig, axes = plt.subplots(3, 1, figsize=(20, 20))
    price_plot = sns.lineplot(data['Adj Close'], ax=axes[0], color='orange')
    price_plot.set(ylabel='Closing Price')
    axes[0].axhline(data['Adj Close'][0], color='green', linestyle='--', label='First Close of the Time Series')
    axes[0].axhline(data['Adj Close'][-1], color='purple', linestyle='--', label='Last Close of the Time Series')
    axes[0].set_title('Price and Z-Score over time')
    axes[0].legend()

    z_plot = sns.lineplot(data['zscore_returns'], ax=axes[1], color='orange')
    z_plot.set(ylabel='Z-Score Returns')
    # Add horizontal lines for 95% and 99% confidence intervals
    axes[1].axhline(1.96, color='g', linestyle='--', label='95% CI')
    axes[1].axhline(-1.96, color='g', linestyle='--')
    axes[1].axhline(2.576, color='r', linestyle='--', label='99% CI')
    axes[1].axhline(-2.576, color='r', linestyle='--')
    axes[1].legend()
    
    ema_z_plot = sns.lineplot(data['EMA20_zscore_distance'], ax=axes[2], color='orange')
    ema_z_plot.set(ylabel='EMA Z-Score Returns')
    # Add horizontal lines for 95% and 99% confidence intervals
    axes[2].axhline(1.96, color='g', linestyle='--', label='95% CI')
    axes[2].axhline(-1.96, color='g', linestyle='--')
    axes[2].axhline(2.576, color='r', linestyle='--', label='99% CI')
    axes[2].axhline(-2.576, color='r', linestyle='--')
    axes[2].legend()

    # Show the plot
    plt.show()