In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import numpy as np
from math import sqrt, pi, exp
import matplotlib.pyplot as plt

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
USE SCHEMA MOVIES.PUBLIC;

In [None]:
gas = session.table("gas_daily").to_pandas().sort_values("DT")
gas["log_ret"] = np.log(gas["PRICE"]).diff()
rets = gas["log_ret"].dropna().to_numpy()

mu, sigma = rets.mean(), rets.std(ddof=1)


In [None]:
def sample_means(n, B=5000):
    idx = np.random.randint(0, len(rets), size=(B, n))
    means = rets[idx].mean(axis=1)
    return means

In [None]:
def hist(n):
    means = sample_means(n)

    plt.figure()
    plt.hist(means, bins=40, density=True, alpha=0.6)
    xs = np.linspace(mu-4*sigma/np.sqrt(n), mu+4*sigma/np.sqrt(n), 400)
    
    pdf = (1/(sqrt(2*pi)*(sigma/np.sqrt(n)))) * np.exp(-(xs-mu)**2/(2*(sigma**2/n)))
    plt.plot(xs, pdf)
    plt.title(f"Sampling distribution of mean log-return (n={n})")

In [None]:
hist(5)
plt.show()

In [None]:
hist(10)
plt.show()

In [None]:
hist(100)
plt.show()

In [None]:
means5 = sample_means(5)

In [None]:
import altair as alt

st.altair_chart(
    alt.Chart(pd.DataFrame({"means": means5})).mark_bar().encode(
        alt.X("means", bin=alt.Bin(maxbins=40)),
        y='count()'
    )
)