### 1. Basic Statistics 

In [None]:
import random
import pandas as pd

# sample from some kind hidden structure

def sample_from_hidden_structures(samples=100, structure='coin'):    
    if structure == 'coin':
        # Coin has two outcomes: 'Heads' or 'Tails'
        outcomes = ['Heads', 'Tails']
        data = [random.choice(outcomes) for _ in range(samples)]

    elif structure == 'dice':
        # Dice has six outcomes: 1, 2, 3, 4, 5, 6
        outcomes = [1, 2, 3, 4, 5, 6]
        data = [random.choice(outcomes) for _ in range(samples)]

    return pd.DataFrame(data, columns=['Outcome'])


df_samples = sample_from_hidden_structures(samples=100000, structure='dice')
print(df_samples)

In [None]:
import matplotlib.pyplot as plt

def plot_distribution(df, structure='coin'):

    if structure == 'coin':
        # For coin flips, use a bar plot to show the frequency of each outcome
        counts = df['Outcome'].value_counts()
        counts.plot(kind='bar', color=['blue', 'green'])
        plt.title('Coin Flip Distribution')
        plt.xlabel('Outcome')
        plt.ylabel('Frequency')
        plt.xticks(rotation=0)

    elif structure == 'dice':
        # For dice rolls, use a histogram to show the distribution of outcomes
        df['Outcome'].plot(kind='hist', bins=range(1, 8), align='left', rwidth=0.5)
        plt.title('Dice Roll Distribution')
        plt.xlabel('Outcome')
        plt.ylabel('Frequency')
        plt.xticks(range(1, 7))

    plt.show()


plot_distribution(df_samples, 'dice')

### some insights
- statistical observations <- hidden structure + randomness (observations are samples)
- distribution is a good way to describe random varibles
- sometimes we know the structure, most of time we only have the samples (stock market). samples -> (infer) structure -> then (make predictions) -> new obsavtions 
- random sampling (how to lie with facts. narratives)

### 2. Normal Distribution


In [None]:
import random
import pandas as pd

# heights of population
def sample_normal_distribution(samples=100000, mu=170, sigma=10):
    data = [random.gauss(mu, sigma) for _ in range(samples)]
    return pd.DataFrame(data, columns=['Value'])

df_normal = sample_normal_distribution(samples=100000, mu=165, sigma=15)

import matplotlib.pyplot as plt

def plot_distribution(df, title='Data Distribution', bins=30):

    plt.figure(figsize=(10, 6))
    plt.hist(df['Value'], bins=bins, edgecolor='black', alpha=0.7)
    plt.title(title)
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.show()

plot_distribution(df_normal, title='Normal Distribution', bins=40)

### some insights
- mean, std to describe a bell curlve
- std x 1 = 68%, std x 2 = 95%. if aaple stock, mean annual return 8%, std 8%, what's probablity if buy apple, I will lose money this year. 
- risk vs std (narrow? wide? more risky)
- normal distribution is a dangeous assumption. black swan
- stable structure vs unstable vs non-stationary 
- balance return and risk - sharp ratio. risk/reward trade off

### 3. std, sharp ratio - finding the best risk/reward (trade off) stock
- activity 15

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
%matplotlib inline

In [None]:
# activity 15 calulate daily (not annual) return std 
# stock price df -> daily return df -> daily return std df -> scale up daily return (good proxy) std to yearly std df 
file_path = Path("./Resources/tech_stocks_closing_value_2018.csv")
tech_stocks = pd.read_csv(file_path, index_col='Date', parse_dates=True)
tech_stocks.head()

In [None]:
# daily returns df
daily_returns = tech_stocks.pct_change()
daily_returns.head()

In [None]:
# Daily std df
daily_std = daily_returns.std()
daily_std.head()

In [None]:
# daily return std to annualized std (252 trading days)
annualized_std = daily_std * np.sqrt(252)
annualized_std.head()

### sharp ratio
- activity 16

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
%matplotlib inline

In [None]:
portfolio_a_path = Path("./Resources/tech_stocks_2018_a.csv")
portfolio_b_path = Path("./Resources/tech_stocks_2018_b.csv")
risk_free_rate_path= Path("./Resources/risk_free_rate.csv")

portfolio_a = pd.read_csv(portfolio_a_path, index_col='Date', parse_dates=True)
portfolio_b = pd.read_csv(portfolio_b_path, index_col='Date', parse_dates=True)
risk_free_rate = pd.read_csv(risk_free_rate_path, index_col='Date', parse_dates=True)

portfolio_a.head()

In [None]:
# Calculate daily returns
portfolio_a_returns = portfolio_a.pct_change().dropna()
portfolio_b_returns = portfolio_b.pct_change().dropna()

# Concat returns into one DataFrame
all_portfolios_returns = pd.concat([portfolio_a_returns, portfolio_b_returns, risk_free_rate], axis='columns', join='inner')
all_portfolios_returns.head()

In [None]:
# Calculate Sharpe Ratio

sharpe_ratios = ((all_portfolios_returns.mean()-all_portfolios_returns['rf_rate'].mean()) * 252) / (all_portfolios_returns.std() * np.sqrt(252))
sharpe_ratios

In [None]:
# Plot sharpe ratios
sharpe_ratios.plot(kind="bar", title="Sharpe Ratios")

### 4. groupby object (the structure inside of a df, one to many)
- activity 5

### 5 multi-index