# Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.patches import Patch, Rectangle, Polygon
import matplotlib.dates as mdates
import matplotlib.ticker as tkr
import seaborn as sns
import seaborn.objects as so
import mplfinance as mpf  # https://github.com/matplotlib/mplfinance
import numpy as np
from pprint import pprint as pp
import csv
from pathlib import Path
import itertools
from itertools import product, combinations, permutations, zip_longest, groupby
import random
from datetime import datetime, timedelta, date, timezone
from datetime import time as dtime
from collections import OrderedDict, defaultdict, Counter, namedtuple
import re
import requests
import statsmodels.api as sm
import calendar
import scipy
import json
from PIL import Image
from bs4 import BeautifulSoup as bs
from ast import literal_eval  # use to eval a string as a list df['column'] = df['column'].apply(literal_eval)
import math
import time  # messes up from datetime import time
import sys
import string
import json
import sklearn
import urllib
import urllib3
import psutil
from typing import List, Tuple, Union  # used for type hints
from gc import collect

import pandas_datareader as web
import natsort as ns
import yfinance as yf
# from stockstats import StockDataFrame as sdf  # dataframe wrapper for stock calculations

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html#overview
pd.set_option('display.max_columns', 700)
pd.set_option('display.max_rows', 400)
pd.set_option('display.min_rows', 10)
pd.set_option('display.expand_frame_repr', True)
# pd.set_option('precision', 5)
# pd.reset_option('precision')

In [None]:
# plt.style.use('seaborn')
# plt.rcParams['figure.figsize'] = (16.0, 10.0)
# plt.rcParams["patch.force_edgecolor"] = True
plt.rcParams['savefig.facecolor'] = 'white'  # use to set the background color when saving a figure
# sns.set_style("white")

[Matplotlib and Ipython-notebook: Displaying exactly the figure that will be saved][1]

  [1]: https://stackoverflow.com/questions/37864735/matplotlib-and-ipython-notebook-displaying-exactly-the-figure-that-will-be-save/37879281#37879281

In [None]:
%matplotlib inline
%config InlineBackend.print_figure_kwargs = {'bbox_inches': None}

In [None]:
import warnings
warnings.simplefilter("ignore")

# Colored Note Boxes

- [The Ultimate Markdown Guide](https://medium.com/analytics-vidhya/the-ultimate-markdown-guide-for-jupyter-notebook-d5e5abf728fd)

<div class="alert-success">
<b>Success:</b> This alert box indicates a successful or positive action.<b>Success:</b> This alert box indicates a successful or positive action.
</div>

<div class="alert-danger">
<b>Danger:</b> This alert box indicates a dangerous or potentially negative action.<b>Danger:</b> This alert box indicates a dangerous or potentially negative action.
</div>

<div class="alert-warning">
<b>Example:</b> Use yellow boxes for examples that are not inside code cells, or use for mathematical formulas if needed. Typically also used to display warning messages.<b>Example:</b> Use yellow boxes for examples that are not inside code cells, or use for mathematical formulas if needed. Typically also used to display warning messages.
</div>

<div class="alert alert-block alert-info">
<b>Tip:</b> Use blue boxes (alert-info) for tips and notes.
</div>

# Synthetic Data:

## Use recursion to flatten the nested `dicts`

 - [Thinking Recursively in Python][1]
 - [Flattening JSON objects in Python][2]
 - [flatten package][3]
 - [How to flatten nested JSON recursively, with flatten_json?][4]
 - The `flatten_json` function, will be used to flatten `data`
 
  [1]: https://realpython.com/python-thinking-recursively/
  [2]: https://towardsdatascience.com/flattening-json-objects-in-python-f5343c794b10
  [3]: https://github.com/amirziai/flatten
  [4]: https://stackoverflow.com/questions/58442723/how-to-flatten-nested-json-recursively-with-flatten-json

In [None]:
def flatten_json(nested_json: dict, exclude: list=[''], sep='_') -> dict:
    """
    Flatten a list of nested dicts.
    """
    out = dict()
    def flatten(x: (list, dict, str), name: str='', exclude=exclude):
        if type(x) is dict:
            for a in x:
                if a not in exclude:
                    flatten(x[a], f'{name}{a}{sep}')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, f'{name}{i}{sep}')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(nested_json)
    return out

# df = pd.DataFrame([flatten_json(x) for x in data['Return']])
# df = pd.DataFrame([flatten_json(x) for x in data[key]])

In [None]:
np.random.seed(365)

## Date Ranges

In [None]:
date_0 = pd.date_range(datetime.today(), periods=10).to_pydatetime().tolist()
date_0[:2]

In [None]:
date_1 = pd.date_range(end=datetime.today(), periods=10).to_pydatetime().tolist()
date_1[:2]

In [None]:
date = pd.bdate_range(end=datetime.today(), periods=10).to_pydatetime().tolist()
date[:2]

## Sinusoidal Sample Data

In [None]:
# sinusoidal sample data
sample_length = range(1, 14+1)
rads = np.arange(0, 2*np.pi, 0.01)
data = np.array([np.sin(t*rads) for t in sample_length])
df = pd.DataFrame(data.T, index=pd.Series(rads.tolist(), name='radians'), columns=[f'freq: {i}x' for i in sample_length])

## Date & Columns Random Numbers

In [None]:
np.random.seed(365)
rows = 2000
data = {'date': pd.bdate_range('2021-06-09', freq='60s', periods=rows),
        'a': np.random.randint(0, 3, size=(rows)),
        'b': np.random.randint(15, 25, size=(rows)),
        'c': np.random.randint(30, 40, size=(rows)),
        'd': np.random.randint(450, 550, size=(rows)),
        'e': np.random.randint(6000, 7000, size=(rows))}
df = pd.DataFrame(data)

In [None]:
# 4 columns and a datetime index
np.random.seed(365)
rows = 30
cols = 1
df = pd.DataFrame(np.random.rand(rows, cols) * 1000, columns=list(string.ascii_lowercase[:cols]),
                  index=pd.bdate_range(datetime.today(), freq='5min', periods=rows))

In [None]:
np.random.seed(365)
rows = 1100
data = {'date': pd.bdate_range(datetime.today(), freq='30min', periods=rows),
        'a': np.random.randint(10, size=rows),
        'groups': np.random.choice(['1-5', '6-25', '26-100', '100-500', '500-1000', '>1000'], size=rows),
        'treatment': np.random.choice(['Yes', 'No'], size=rows)}
df = pd.DataFrame(data)

## Stock Data

In [None]:
# for getting stock data
tickers = ['msft', 'aapl', 'twtr', 'intc', 'tsm', 'goog', 'amzn', 'fb', 'nvda']
# tickers = ['^gspc']
tickers = ['aapl']
df_list = list()
for ticker in tickers:
#     df = web.DataReader(ticker, data_source='yahoo', start='1975-01-01', end='2020-09-28')
    # df = web.DataReader(ticker, data_source='yahoo', start='1970-01-01', end='2022-06-13')
    df = yf.download(ticker, start='2023-01-05', end='2023-01-06', interval='1h')
    df['tkr'] = ticker
    df_list.append(df)
    
df = pd.concat(df_list).reset_index()

In [None]:
tickers = ['msft', 'aapl', 'intc', 'tsm', 'goog', 'amzn', 'meta', 'nvda']
# tickers = ['^gspc']
# tickers = ['aapl']

# df = pd.concat((web.DataReader(ticker, data_source='yahoo', start='2020-01-01', end='2022-09-30').assign(tkr=ticker) for ticker in tickers), ignore_index=False)
df = pd.concat((yf.download(ticker, start='2022-02-01', end='2023-07-14').assign(tkr=ticker) for ticker in tickers), ignore_index=False)

In [None]:
# stock data
periods = '3600'
resp = requests.get('https://api.cryptowat.ch/markets/poloniex/ethusdt/ohlc', params={'periods': periods})
data = resp.json()
df = pd.DataFrame(data['result'][periods], columns=['date', 'open', 'high', 'low', 'close', 'volume', 'amount'])
df['date'] = pd.to_datetime(df['date'], unit='s')

# stock = sdf.retype(df)
# stock['macds']
# stock

## Read JSON Data

In [None]:
p = Path('data.json')  # if in current dir
# p = Path.cwd() / 'test.txt'
# p = Path.cwd() / 'data/nvdcve-1.1-2019.json/nvdcve-1.1-2019.json'

In [None]:
with p.open('r', encoding='utf-8') as f:
    data = json.loads(f.read())
#     data = json.load(f)

In [None]:
# Use with a list of dicts
with p.open("r") as f:
    data = literal_eval(f.read())

In [None]:
df = pd.json_normalize(data)

## Read / Write Copied Data

In [None]:
# df = pd.read_clipboard(sep='\s*\|\s*').iloc[1:,1:-1]  # read markdown into dataframe
df = pd.read_clipboard(sep='\\s+') #, header=None)

In [None]:
df.to_clipboard(sep='\\s+', index=True)

In [None]:
trip.to_clipboard(sep=',', index=True)

In [None]:
df = pd.read_csv('test.csv')
# df = pd.read_csv('test.txt', sep='|', header=None, converters={2: eval})  # converters={'file_path_lists': eval}

In [None]:
df = pd.read_excel('test.xlsx')

In [None]:
df = pd.DataFrame(data)

## Notebook Memory Usage

In [None]:
# These are the usual ipython objects, including this one you are creating
ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']  # list a variables

# Get a sorted list of the objects and their sizes
sorted([(x, sys.getsizeof(globals().get(x))) for x in dir() if not x.startswith('_') and x not in sys.modules and x not in ipython_vars], key=lambda x: x[1], reverse=True)

## Move Legend

In [None]:
# move legend outside
ax.legend(bbox_to_anchor=(1, 0.5), loc='center left', frameon=False)
sns.move_legend(ax, bbox_to_anchor=(1, 0.5), loc='center left', frameon=False)

## Seaborn Datasets

In [None]:
tips = sns.load_dataset('tips')

## Matplotlib Inline / Interactive

In [None]:
%matplotlib inline

In [None]:
# for interactive plots
%matplotlib qt

# Work Space

In [None]:
df = pd.read_csv('d:/data/TrentonMcKinney_workouts.csv')
df['Workout Timestamp'] = df['Workout Timestamp'].str.replace(' (PST)', '', regex=False).str.replace(' (-08)', '', regex=False).str.replace(' (-07)', '', regex=False)
df['Workout Timestamp'] = pd.to_datetime(df['Workout Timestamp'])


bins = [0, 9, 20, 30, 45, 60, 120]

df['Length (minutes)'] = pd.cut(df['Length (minutes)'], bins=bins)


# df = df[~df['Length (minutes)'].le(9)].copy()
df = df.sort_values('Workout Timestamp', ignore_index=True)
# df['Length (minutes)'] = df['Length (minutes)'].astype(str)
df.head()

In [None]:
df.tail()

In [None]:
df['Length (minutes)'].unique()

In [None]:
df.info()

In [None]:
df['Fitness Discipline'].unique()

In [None]:
dd = {disc: df[df['Fitness Discipline'].eq(disc)].reset_index(drop=True).copy() for disc in df['Fitness Discipline'].unique()}

In [None]:
dd['Cycling'].head()

In [None]:
rm = dd['Cycling'][['Workout Timestamp', 'Avg. Watts']].set_index('Workout Timestamp').rolling('3D').mean()
rm.plot()

In [None]:
sns.relplot(kind='line', data=dd['Cycling'], x='Workout Timestamp', y='Avg. Watts', hue='Length (minutes)', aspect=1.5, height=10)

In [None]:
sns.relplot(kind='line', data=dd['Cycling'], x='Workout Timestamp', y='Total Output', hue='Length (minutes)', aspect=1.5, height=10)

matplotlib-3d

Removed tag(s) from the title, as per https://meta.stackexchange.com/help/tagging. Removed ? because the title is not a question; it's missing an auxiliary verb (e.g. How do I ...?).