**Frankfurt Stock Exchange: Carl Zeiss Meditec in 2017**

Importing modules and defining constants.
<br><i>NASDAQ_DATE_FMT added after inspecting sample data.</i>

In [1]:
import json
import pprint
from itertools import filterfalse
from datetime import datetime, timedelta
from urllib.request import urlopen, Request
from statistics import fmean, median
from collections import namedtuple, Counter
from textwrap import fill

from nasdaq_cred import api_key

API_KEY = api_key
NASDAQ_DATE_FMT = '%Y-%m-%d'


Defining functions
<br><br><i>TLDR:</i><br>
<b>nasdaq_json_dict</b> gets a JSON object from NASDAQ API and returns a dictionary. 
<br><b>dict_agg</b> returns the result of aggregate calculations performed on a dictionary grouped by a column.


In [2]:
def nasdaq_json_dict(
        market: str, stock_sym: str, **params) -> dict:
    '''Request a JSON object from NASDAQ API. Return it as a dictionary
    
    Arguments:
    market -- stock market abbreviation
    stock_sym -- stock symbol
    **params -- parameters. see below
    https://docs.data.nasdaq.com/docs/in-depth-usage
    
    Return a decoded JSON object as a dictionary
    '''
    q_url = f'https://data.nasdaq.com/api/v3/datasets/'
    q_url = q_url + f'{market}/{stock_sym}.json?'
    if params:
        q_url = q_url + '&'.join(
            f'{key}={val}' for key, val in params.items()
        )
    q_url = f'{q_url}&api_key={API_KEY}'
    with urlopen(Request(q_url)) as response:
         return json.load(response)
        
def hr(character: str = '_' ) -> str:
    '''Return a string of 79 * char for use as a text divider. Default
    character value is underscore to mimic html horizontal rule <hr>
    '''
    return 79 * character

def print_vars(**variables) -> None:
    '''Print linespaced variable name and variable separated by divider.
    Print the variable's type and length if applicable
    '''
    for var_name, var in variables.items():
        print(f'{var_name}:\n{hr()}')
        print(fill(f'{var}'))
        var_prop = f'type: {type(var).__name__}'
        if isinstance(var, (str, list, set, dict, tuple)):
            var_prop = f'{var_prop}, length: {len(var)}'
        print(var_prop)
    
def dict_struct(nested_dict: dict, lvl: int = 0) -> None:
    '''Print keys and corresponding value types at each level of nesting
    for an arbitrarily nested dictionary. Print structure of nested lists 
    and tuples within nested_dict. Sequences of elements of more than one
    type are described as sequences of object.
    '''
    spacing = 6 + max([len(key) for key in nested_dict.keys()])
    for key, val in nested_dict.items():
        val_type = ''
        obj_flag = 0
        while isinstance(val, (list, tuple)) and not obj_flag:
            val_type = (
                val_type + bool(val_type) * ': '
                + f'{type(val).__name__}'
            )
            if len(set(type(nested_val) for nested_val in val)) > 1:
                val_type = val_type + bool(val_type) * ': ' + 'object '
                obj_flag = 1
            val = val[0]
        if not obj_flag:
            val_type = val_type + bool(val_type) * ': ' + type(val).__name__
        print(''.join([
            2 * lvl * ' ', f'{lvl}[{key}]:'.ljust(spacing), f'{val_type}'
        ]))
        if isinstance(val, dict):
            dict_struct(val, lvl + 1)
            
def not_none(x: object) -> bool:
    return x is not None

def dict_agg(
        data: dict, col: str, *funcs: callable) -> tuple[float]:
    '''Return tuple of aggregate calculations performed on a dictionary
    storing numeric data in a tabular structure. If there are null values,
    aggregates are calculated on the non-null subset of data[col].
    
    Arguments:
    data -- dictionary of tabular data
    col -- string representing column over which aggregates are calculated
    *funcs -- list of callable functions that take a sequence as argument
    and have a numeric return type (min/max/sum/etc.)
    
    Return a tuple of floats
    '''
    return tuple(
        func(filter(not_none, data[col]))
        for func in funcs
    )


Calling the Nasdaq API to get a sample.

In [3]:
sample = nasdaq_json_dict('FSE', 'AFX_X', collapse = 'daily', limit = '1')
print(f'sample\n{hr()}')
pprint.pprint(sample)


sample
_______________________________________________________________________________
{'dataset': {'collapse': 'daily',
             'column_index': None,
             'column_names': ['Date',
                              'Open',
                              'High',
                              'Low',
                              'Close',
                              'Change',
                              'Traded Volume',
                              'Turnover',
                              'Last Price of the Day',
                              'Daily Traded Units',
                              'Daily Turnover'],
             'data': [['2020-12-01',
                       112.2,
                       112.2,
                       111.5,
                       112.0,
                       None,
                       51.0,
                       5703.0,
                       None,
                       None,
                       None]],
             'database_code': 'FSE

Inspecting the structure of the resultant object.

In [4]:
print(f'sample structure\nnest level[key]: val type(s)\n{hr()}')
dict_struct(sample)

sample structure
nest level[key]: val type(s)
_______________________________________________________________________________
0[dataset]:  dict
  1[id]:                     int
  1[dataset_code]:           str
  1[database_code]:          str
  1[name]:                   str
  1[description]:            str
  1[refreshed_at]:           str
  1[newest_available_date]:  str
  1[oldest_available_date]:  str
  1[column_names]:           list: str
  1[frequency]:              str
  1[type]:                   str
  1[premium]:                bool
  1[limit]:                  int
  1[transform]:              NoneType
  1[column_index]:           NoneType
  1[start_date]:             str
  1[end_date]:               str
  1[data]:                   list: list: object 
  1[collapse]:               str
  1[order]:                  NoneType
  1[database_id]:            int


In [5]:
sample = sample['dataset']


In [6]:
print(f'description:\n{hr()}')
print(sample['description'].replace('<br><br>', '\n'))


description:
_______________________________________________________________________________
Stock Prices for Carl Zeiss Meditec (2020-11-02) from the Frankfurt Stock Exchange.
Trading System: Xetra
ISIN: DE0005313704


Applying naming and styling conventions to the column names. Creating a dictionary pairing each column name to a verbose name.

In [7]:
column_names = [
    col.replace(' ', '_').lower()
    for col in sample['column_names']
]
verbose_names = dict(zip(column_names, [
    'date', 'opening price', 'highest price of the day', 
    'lowest price of the day', 'closing price', 'change', 'traded volume', 
    'turnover', 'last price of the day', 'daily traded units', 
    'daily turnover'
]))
print_vars(column_names = column_names)


column_names:
_______________________________________________________________________________
['date', 'open', 'high', 'low', 'close', 'change', 'traded_volume',
'turnover', 'last_price_of_the_day', 'daily_traded_units',
'daily_turnover']
type: list, length: 11


Inspecting the 'data' portion of the dataset.

In [8]:
sample_data = sample['data']
print_vars(sample_data = sample_data)


sample_data:
_______________________________________________________________________________
[['2020-12-01', 112.2, 112.2, 111.5, 112.0, None, 51.0, 5703.0, None,
None, None]]
type: list, length: 1


After inspecting the sample data, a string format constant was added to the imports and constants cell of this notebook. NASDAQ_DATE_FMT specifies the format used in converting the provided date strings to datetime.date objects.

Organizing sample data as a dictionary following the structure {field: list of values}

In [9]:
sample_data_dict = {
    column_names[i]: [row[i] for row in sample_data]
    for i in range(len(column_names))
}
print_vars(sample_data_dict = sample_data_dict)


sample_data_dict:
_______________________________________________________________________________
{'date': ['2020-12-01'], 'open': [112.2], 'high': [112.2], 'low':
[111.5], 'close': [112.0], 'change': [None], 'traded_volume': [51.0],
'turnover': [5703.0], 'last_price_of_the_day': [None],
'daily_traded_units': [None], 'daily_turnover': [None]}
type: dict, length: 11


Organizing a row of sample data as a list of namedtuples

In [10]:
fse_row = namedtuple('fse_row', column_names)
sample_data_row = fse_row(
    datetime.strptime(sample_data[0][0], NASDAQ_DATE_FMT).date(),
    *sample_data[0][1:]
)
print_vars(sample_data_row = sample_data_row)


sample_data_row:
_______________________________________________________________________________
fse_row(date=datetime.date(2020, 12, 1), open=112.2, high=112.2,
low=111.5, close=112.0, change=None, traded_volume=51.0,
turnover=5703.0, last_price_of_the_day=None, daily_traded_units=None,
daily_turnover=None)
type: fse_row, length: 11


These are your tasks for this mini project:

1. Collect data from the Franfurt Stock Exchange, for the ticker AFX_X, for the whole year 2017 (keep in mind that the date format is YYYY-MM-DD).
2. Convert the returned JSON object into a Python dictionary.
3. Calculate what the highest and lowest opening prices were for the stock in this period.
4. What was the largest change in any one day (based on High and Low price)?
5. What was the largest change between any two days (based on Closing Price)?
6. What was the average daily trading volume during this year?
7. (Optional) What was the median trading volume during this year. (Note: you may need to implement your own function for calculating the median.)

***
**Task 1**

Getting a json object from the NASDAQ api for Carl Zeiss Meditec on the Frankfurt Stock Exchange in 2017. Decoding this json object into a python dictionary assigned to the variable nasdaq_fse_afx_x_2017. Saving it to the "raw" folder in the "data" directory of this notebook

In [11]:
nasdaq_fse_afx_x_2017 = nasdaq_json_dict(
    'FSE', 'AFX_X', start_date = '2017-01-01', end_date = '2017-12-31'
)
with open('data/raw/nasdaq_fse_afx_x_2017.json', 'w+') as raw_data:
    json.dump(nasdaq_fse_afx_x_2017, raw_data)


Assigning the relevant portion of raw data to a working variable organized as a list of namedtuples. Converting the provided date strings to datetime.date objects.

In [12]:
afx_x_2017 = nasdaq_fse_afx_x_2017['dataset']['data']
afx_x_2017 = [
    fse_row(
        datetime.strptime(row[0], NASDAQ_DATE_FMT).date(),
        *row[1:]
    ) for row in afx_x_2017
]


Inspecting the percentage of null values for each column in column_names. Removing fields with more than 75% null values from the column_names list and adding them to the removed_columns list. Removing the corresponding key value pairs from the verbose_names dictionary.

In [13]:
print(f'{"column".ljust(30)}null values\n{hr()}')
removed_columns = []
for col in column_names:
    null_pct = fmean([int(getattr(row, col) is None) for row in afx_x_2017])
    print(f'{col.ljust(30)}{round(null_pct * 100, 1)}%')
    mostly_null = null_pct > 0.75
    if mostly_null:
        removed_columns.append(col)
        column_names.remove(col)
        verbose_names.pop(col)
print(f'{hr()}\n\nremoved columns:  {removed_columns}')      
print(f'retained columns: {column_names}')


column                        null values
_______________________________________________________________________________
date                          0.0%
open                          1.2%
high                          0.0%
low                           0.0%
close                         0.0%
change                        99.6%
turnover                      0.0%
last_price_of_the_day         100.0%
daily_turnover                100.0%
_______________________________________________________________________________

removed columns:  ['change', 'last_price_of_the_day', 'daily_turnover']
retained columns: ['date', 'open', 'high', 'low', 'close', 'traded_volume', 'turnover', 'daily_traded_units']


Creating a list of dates missing an opening price to fill from another source, if available.

In [14]:
missing_open_price = [row for row in afx_x_2017 if row.open is None]
print(f'dates missing opening prices:\n{hr()}')
for row in missing_open_price:
    print(row.date.isoformat())


dates missing opening prices:
_______________________________________________________________________________
2017-05-01
2017-04-17
2017-04-14


**Task 2**

Reorganizing data as a nested dictionary keyed by date. The structure is {date: {field: value}}

In [15]:
afx_x_2017_d = {
    row.date: {field: getattr(row, field) for field in column_names} 
    for row in afx_x_2017
}


In [16]:
pprint.pprint({
    key: afx_x_2017_d[key] 
    for key in list(afx_x_2017_d.keys())[:3]
})


{datetime.date(2017, 12, 27): {'close': 51.82,
                               'daily_traded_units': None,
                               'date': datetime.date(2017, 12, 27),
                               'high': 51.89,
                               'low': 50.76,
                               'open': 51.45,
                               'traded_volume': 57452.0,
                               'turnover': 2957018.0},
 datetime.date(2017, 12, 28): {'close': 51.6,
                               'daily_traded_units': None,
                               'date': datetime.date(2017, 12, 28),
                               'high': 51.82,
                               'low': 51.43,
                               'open': 51.65,
                               'traded_volume': 40660.0,
                               'turnover': 2099024.0},
 datetime.date(2017, 12, 29): {'close': 51.76,
                               'daily_traded_units': None,
                               'date': datetime.d

**Task 2**

Unsure of the requirements for task 2, reorganizing data as an alternate dictionary of lists keyed by field, following the structure {field: list of values}

In [17]:
afx_x_2017_f = {
    col: [getattr(row, col) for row in afx_x_2017]
    for col in column_names
}
print(f'afx_x_2017_f\n{hr()}')
pprint.pprint({key: afx_x_2017_f[key][:3] for key in afx_x_2017_f.keys()})


afx_x_2017_f
_______________________________________________________________________________
{'close': [51.76, 51.6, 51.82],
 'daily_traded_units': [None, None, None],
 'date': [datetime.date(2017, 12, 29),
          datetime.date(2017, 12, 28),
          datetime.date(2017, 12, 27)],
 'high': [51.94, 51.82, 51.89],
 'low': [51.45, 51.43, 50.76],
 'open': [51.76, 51.65, 51.45],
 'traded_volume': [34640.0, 40660.0, 57452.0],
 'turnover': [1792304.0, 2099024.0, 2957018.0]}


Creating a nested dictionary of summary statistics by column. The structure is {field : {statistic name : statistic value}}

In [18]:
afx_x_2017_summary = {
    field: dict(zip(
        ('min', 'max', 'mean', 'median'),
        dict_agg(afx_x_2017_f, field, *[min, max, fmean, median])
    )) for field in [
        'open', 'close', 'high', 'low', 'traded_volume', 'turnover'
    ]
}

for col, aggs in afx_x_2017_summary.items():
    print(f'{verbose_names[col]}')
    euro = ('price' in verbose_names[col]) * '€ '
    for name, val in aggs.items():
        print(f'{name}: '.ljust(10) + f'{euro}{round(val, 2):.2f}')
    print()
    

opening price
min:      € 34.00
max:      € 53.11
mean:     € 43.34
median:   € 43.45

closing price
min:      € 34.06
max:      € 53.09
mean:     € 43.36
median:   € 43.28

highest price of the day
min:      € 34.12
max:      € 53.54
mean:     € 43.70
median:   € 43.55

lowest price of the day
min:      € 33.62
max:      € 52.48
mean:     € 42.92
median:   € 42.62

traded volume
min:      45.00
max:      670349.00
mean:     89124.34
median:   76286.00

turnover
min:      1980.00
max:      25910543.00
mean:     3853589.45
median:   3292223.00



Checking work from previous steps. Assigning answers for remaining tasks to variables.

In [19]:
# enumerating dates in dataset for use as indices in window functions
trading_dates = sorted(afx_x_2017_d.keys())
trading_dates = dict(enumerate(trading_dates))

# minimum and maximum opening prices
min_open, max_open = dict_agg(afx_x_2017_f, 'open', *[min, max])

# largest price change over one day
price_deltas = [afx_x_2017_d[day]['high'] - afx_x_2017_d[day]['low'] for day in afx_x_2017_d]
max_price_delta = round(max(price_deltas), 2)
    
# maximum difference in closing value between any two dates
min_close, max_close = dict_agg(afx_x_2017_f, 'close', *[min, max])
max_close_difference = round(max_close - min_close, 2)

# maximum difference between closing prices of two consecutive calendar dates
max_close_diff_consec_calendar = round(max(
    [
        abs(afx_x_2017_d[day]['close'] 
        - afx_x_2017_d[day + timedelta(days = 1)]['close'])
        for day in
            [
                dy for dy in sorted(afx_x_2017_d.keys())[:-1]
                if dy + timedelta(days = 1) in afx_x_2017_d
            ]
    ]
), 2)

# maximium difference between closing prices of consecutive open trading dates
max_close_diff_consec_open = round(max([
    abs(
        afx_x_2017_d[trading_dates[i]]['close'] 
        - afx_x_2017_d[trading_dates[i + 1]]['close']
    )
    for i in range(len(trading_dates) - 1)
]), 2)


# average traded volume
avg_daily_traded_volume, median_traded_volume = dict_agg(
    afx_x_2017_f, 'traded_volume', *[fmean, median]
)


**Task 3, 4, 5, 6, 7**

Printing a list of reponses for tasks 3 through 7.

In [20]:
print(f'tasks 3-7\n{hr()}')
statements = [
    f'3. The lowest opening price in 2017 was €{afx_x_2017_summary["open"]["min"]:.2f}',
    f'3. The highest opening price was €{afx_x_2017_summary["open"]["max"]:.2f}',
    f'4. The largest change in price over the course of one day was €{max_price_delta:.2f}',
    f'5. The largest difference in closing price of two dates was €{max_close_difference:.2f}',
    f'5. The largest difference in closing price between two consecutive calendar dates was '
    + f'€{max_close_diff_consec_calendar:.2f}',
    f'5. The largest difference in closing price between two consecutive trading dates was '
    + f'€{max_close_diff_consec_open:.2f}',
    f'6. The average daily traded volume was {round(avg_daily_traded_volume, 2):.2f}',
    f'7. The median daily traded volume was {median_traded_volume:.2f}'
]
for statement in statements:
    print(fill(statement, subsequent_indent = '   '))


tasks 3-7
_______________________________________________________________________________
3. The lowest opening price in 2017 was €34.00
3. The highest opening price was €53.11
4. The largest change in price over the course of one day was €2.81
5. The largest difference in closing price of two dates was €19.03
5. The largest difference in closing price between two consecutive
   calendar dates was €2.56
5. The largest difference in closing price between two consecutive
   trading dates was €2.56
6. The average daily traded volume was 89124.34
7. The median daily traded volume was 76286.00
