In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import requests
import json

import numpy as np
import pandas as pd

from bs4 import BeautifulSoup

In [3]:
# url to API
url = 'https://ies-midterm.soulution.rocks/'

In [45]:
# post request to login to API
req = requests.post(url + "login", json={"cuni":"96640370"})
req_json = req.json()

{"data": {"personal_code": "f498a093", "dataset_ids": ["775352ba11", "e2beb850b2", "81395feb08", "40138ff3b8", "ef0bccd7cc"]}, "message": "Hello, Karl\u00ed\u010dek Ond\u0159ej! Good luck with the midterm. GET your data set from /data/file_id endpoint; use the codes you received in the data response as the file_id parameter! Print the personal code in your notebook."}


In [47]:
# printing personal code
print("My personal code is: " + str(req_json['data']['personal_code']))

My personal code is: f498a093


In [49]:
# function for loading data
def load_data(dataset_id):
    r = requests.get(f"{url}data/{dataset_id}")
    return r.json()

In [95]:
# loading data
warning_message = 'Error [503] - Sorry, I just slipped and fell. Thank God, I am made of silicon. You need to work this out.'
datasets = []
for idd in req_json['data']['dataset_ids']:
    successful = False
    # loading the same dataset until it is successful
    while not successful:
        data = load_data(idd)
        if data['message'] == warning_message:
            # if the data weren't loaded continue the while
            continue
        else:
            # apend data to datasets list and break the while loop with successful
            datasets.append(data)
            successful = True

In [174]:
# creating pandas dataframes
df_all = pd.DataFrame(datasets[0]['data']['data'])
df_all = df_all.melt(id_vars=['Date'], var_name="type", value_name=datasets[0]['data']['company']).set_index(['Date', 'type'])
df_all[datasets[0]['data']['company']] = pd.to_numeric(df_all[datasets[0]['data']['company']], downcast="float")

for data in datasets[1:]:
    df = pd.DataFrame(data['data']['data'])
    df = df.melt(id_vars=['Date'], var_name="type", value_name=data['data']['company']).set_index(['Date', 'type'])
    df[data['data']['company']] = pd.to_numeric(df[data['data']['company']], downcast="float")
    
    df_all = df_all.join(df, how="outer")

df_all.tail(n=20)

Unnamed: 0_level_0,Unnamed: 1_level_0,SMIN,IAG,CCH,EVR,CRDA
Date,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-11-17,Open,37.41,3.47,18.92,87.879997,
2020-11-17,Volume,16600.0,6726700.0,491872.0,309400.0,
2020-11-18,Adj.Close,37.599998,3.36,18.82,88.309998,
2020-11-18,Close,37.599998,3.36,18.82,88.309998,
2020-11-18,High,37.900002,3.45,19.24,90.690002,
2020-11-18,Low,37.59,3.34,18.74,88.279999,
2020-11-18,Open,37.900002,3.45,18.95,89.18,
2020-11-18,Volume,31500.0,5219400.0,756785.0,334200.0,
2020-11-19,Adj.Close,37.970001,3.37,19.1,88.489998,
2020-11-19,Close,37.970001,3.37,19.1,88.489998,


In [None]:
# Part 2

In [175]:
df_all.reset_index(inplace=True)

In [176]:
df_all = df_all[(df_all['Date'] >= "2019-01-02") & (df_all['Date'] <= "2020-11-20")]
df_all.head()

Unnamed: 0,Date,type,SMIN,IAG,CCH,EVR,CRDA
6,2019-01-02,Adj.Close,37.300087,3.69,9.623075,69.908577,
7,2019-01-02,Close,38.490002,3.69,9.65,73.839996,
8,2019-01-02,High,38.560001,3.76,9.65,74.230003,
9,2019-01-02,Low,38.330002,3.6,9.64,69.980003,
10,2019-01-02,Open,38.389999,3.68,9.65,70.110001,


In [177]:
# do we have data for all period
df_all[df_all['type'] == 'Adj.Close'].isna().sum()[2:]
# we can see that we have 476 missing days for CRDA

SMIN      0
IAG       0
CCH       0
EVR       0
CRDA    476
dtype: int64

In [178]:
# highest and lowest adj. close
df_all[df_all['type'] == 'Adj.Close'].drop(columns=['Date','type']).aggregate([np.min, np.max], axis = 0)
# first row is min for stock and second row is maximum

Unnamed: 0,SMIN,IAG,CCH,EVR,CRDA
amin,20.771763,1.88,9.603131,35.080837,5136.0
amax,39.713108,5.2,19.389999,93.306076,5153.0


In [186]:
# find monthly returns. Which company has the highest recorded? 
# month corresponds approximately to 21 trading days
df_adjclose = df_all[df_all['type'] == 'Adj.Close'].drop(columns=['Date','type'])

df_returns = df_adjclose.pct_change(periods=21)
df_returns.max()[df_returns.max() == df_returns.max().max()]

IAG    0.62766
dtype: float32

In [191]:
# 10 day std
df_adjclose.drop(columns=['CRDA']).rolling(10).std().tail()

Unnamed: 0,SMIN,IAG,CCH,EVR
2844,0.61327,0.173333,0.695142,2.765931
2850,0.514373,0.178076,0.743374,2.510661
2856,0.56896,0.191183,0.79096,2.326013
2862,0.655233,0.140238,0.86791,2.22376
2868,0.744163,0.075285,0.910748,1.435681
