In [2]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
import scipy as sp
from scipy import stats
import numpy as np


pio.renderers.default = 'iframe'
pio.templates.default = 'plotly'

In [3]:
aapl_df = pd.read_csv('AAPL.csv')
btc_df = pd.read_csv('BTC.csv')
gas_df = pd.read_csv('GAS.csv')
aapl_df['date'] = aapl_df['date'].apply(lambda x: pd.to_datetime(x))
btc_df['date'] = btc_df['date'].apply(lambda x: pd.to_datetime(x))
gas_df['Date'] = gas_df['Date'].apply(lambda x: pd.to_datetime(x))

In [4]:
aapl_df

Unnamed: 0,date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
0,2019-09-23,53.383072,53.600066,53.066113,53.326996,53.326996,76662000,0.0,0.0
1,2019-09-24,53.890204,54.246174,52.953959,53.073425,53.073425,124763200,0.0,0.0
2,2019-09-25,53.285552,54.004802,52.941773,53.890209,53.890209,87613600,0.0,0.0
3,2019-09-26,53.639082,53.868268,53.353820,53.612263,53.612263,75334000,0.0,0.0
4,2019-09-27,53.770740,53.873145,52.975908,53.351383,53.351383,101408000,0.0,0.0
...,...,...,...,...,...,...,...,...,...
863,2023-02-27,147.710007,149.169998,147.449997,147.919998,147.919998,44998500,0.0,0.0
864,2023-02-28,147.050003,149.080002,146.830002,147.410004,147.410004,50547000,0.0,0.0
865,2023-03-01,146.830002,147.229996,145.009995,145.309998,145.309998,55479000,0.0,0.0
866,2023-03-02,144.380005,146.710007,143.899994,145.910004,145.910004,52279800,0.0,0.0


In [5]:
fig = px.scatter(data_frame=aapl_df, x='date', y='Adj Close', trendline='ols', title='AAPL')
fig.show()

In [6]:
fig = px.scatter(data_frame=btc_df, x='date', y='Close', trendline='ols', title='BTC')
fig.show()

In [7]:
fig = px.scatter(data_frame=gas_df, x='Date', y='Close', trendline='ols', title='GAS(N.NG)')
fig.show()


- на основе Close извлеките логарифмическую доходность BTC task1, ответ – мю и сигма
- проверьте распределение получившихся данных BTC task2, ответ yes/no
- возможно попробуйте их (BTC) нормализовать task3, ответ – mu и sigma
- попробуйте предположить по подвыборке размером 634 дней среднее значение для доходностей (BTC) на уровне 90 95 99 – task 4, ответ – val + [a;b]
- проверьте корреляцию доходностей BTC & AAPL – task5, ответ – yes/no
- проверьте гипотезу о равенствен средних и дисперсии доходностей BTC & AAPL & GAS – task6, ответ – yes/no

In [8]:
# 1
btc_lr = pd.Series(btc_df['Close']/btc_df['Open']).apply(lambda x: np.log(x))
btc_lr_mean = btc_lr.mean()
btc_lr_std = btc_lr.std()
btc_lr_mean, btc_lr_std

(0.0006404770261511352, 0.03868217781397536)

In [9]:
fig = px.histogram(data_frame = btc_lr)
fig.show()

Для проверки на нормальность распределения воспользуемся тестом Шапиро

In [10]:
# 2

from scipy.stats import shapiro

stat, p = shapiro(btc_lr)

alpha = 0.05
print('normally distributed' if p > alpha else 'not normally distributed')

not normally distributed


In [11]:
# 3

# Приводим btc profit к стандартному распределению
btc_lr_standart = ((btc_lr - btc_lr_mean) / btc_lr_std).dropna()
btc_lrs_mean = np.mean(btc_lr_standart)
btc_lrs_std = np.std(btc_lr_standart)
btc_lrs_mean, btc_lrs_std

(-8.465560791422958e-18, 0.9996027805205745)

In [12]:
# 4

btc_lrr_634 = btc_lr.sample(634)
btc_lrr_size = len(btc_lr)
btc_lrr_634_mean = np.mean(btc_lrr_634)
btc_lrr_634_std = np.std(btc_lrr_634)

# критические значения t-распределения
t_values = stats.t.ppf([0.9, 0.95, 0.99], df=btc_lrr_size-1)
# доверительные интервалы
confidence_intervals = [(btc_lrr_634_mean - t_value * btc_lrr_634_std / np.sqrt(btc_lrr_size),
                         btc_lrr_634_mean + t_value * btc_lrr_634_std / np.sqrt(btc_lrr_size)) for t_value in t_values]

print("Среднее - ", btc_lrr_634_mean)
print("ДИ с уровнем значимости 90%: ", confidence_intervals[0])
print("ДИ с уровнем значимости 95%: ", confidence_intervals[1])
print("ДИ с уровнем значимости 99%: ", confidence_intervals[2])

Среднее -  0.0005505809592546728
ДИ с уровнем значимости 90%:  (-0.0007528143152754681, 0.0018539762337848138)
ДИ с уровнем значимости 95%:  (-0.0011226624717889074, 0.002223824390298253)
ДИ с уровнем значимости 99%:  (-0.001817193375920679, 0.0029183552944300247)


In [16]:
# 5
aapl_lr = pd.Series(aapl_df['Close']/aapl_df['Open']).apply(lambda x: np.log(x))
"Корелляция", btc_lr.corr(aapl_lr) # no

('Корелляция', 0.025941094330360036)

In [17]:
# 6
gas_lr = pd.Series(gas_df['Close']/gas_df['Open']).apply(lambda x: np.log(x))
print(btc_lr.mean(), aapl_lr.mean(), gas_lr.mean())
print(btc_lr.var(), aapl_lr.var(), gas_lr.var())

0.0006404770261511352 0.0011301782213586423 -0.0016151406711920951
0.0014963108804320073 0.00028863750173260545 0.0017172581186976831


In [14]:
ANSWERS = {
    'tasks' : {
        'task1_mu' : {
            "answer" : 0.0006404770261511352
        },
        'task1_sigma' : {
            "answer" : 0.03868217781397536
        },
        'task2_normal' : {
            'answer' : 'no'
        },
        'task3_mu' : {
            "answer" : -8.465560791422958e-18
        },
        'task3_sigma' : {
            "answer" : 0.9996027805205745
        },
        'task4_90' : {
            "answer" : -0.00026418159885703855
        },
        'task4_90_int' : {
            "answer" : [-0.0017678692314442987, 0.0012395060337302218]
        },
        'task4_95' : {
            "answer" : -0.00026418159885703855
        },
        'task4_95_int' : {
            "answer" : [-0.002194551842431159, 0.0016661886447170821]
        },
        'task4_99' : {
            "answer" : -0.00026418159885703855
        },
        'task4_99_int' : {
            "answer" : [-0.002995811100770171, 0.0024674479030560937]
        },
        'task5_corr' : {
            "answer" : "no"
        },
        'task6_corr_mu' : {
            "answer" : "no"
        },
        'task6_corr_sigma' : {
            "answer" : "no"
        }
    }
}

In [15]:
import yaml
yaml_result = yaml.dump(ANSWERS)

print(yaml_result)

with open(f"hw03.yaml", "w") as f:
    f.write(yaml_result)

tasks:
  task1_mu:
    answer: 0.0006404770261511352
  task1_sigma:
    answer: 0.03868217781397536
  task2_normal:
    answer: 'no'
  task3_mu:
    answer: -8.465560791422958e-18
  task3_sigma:
    answer: 0.9996027805205745
  task4_90:
    answer: -0.00026418159885703855
  task4_90_int:
    answer:
    - -0.0017678692314442987
    - 0.0012395060337302218
  task4_95:
    answer: -0.00026418159885703855
  task4_95_int:
    answer:
    - -0.002194551842431159
    - 0.0016661886447170821
  task4_99:
    answer: -0.00026418159885703855
  task4_99_int:
    answer:
    - -0.002995811100770171
    - 0.0024674479030560937
  task5_corr:
    answer: 'no'
  task6_corr_mu:
    answer: 'no'
  task6_corr_sigma:
    answer: 'no'

