In [16]:
import pandas as pd
import numpy as np
from yfinance import download as stock_price_download
from math import floor

### Create conversion dicts to go between a note and a midi value

In [31]:
notes: list[str] = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
octaves = [i for i in range(-1, 10)]

note_to_midi = {
    note: midi_code for midi_code, note in \
    enumerate([note + str(octave) for octave in octaves for note in notes]) \
    if midi_code < 128
    }

midi_to_note = {v: k for k,v in note_to_midi.items()}

# print(note_to_midi)

def to_midi(val):
    
    if 0 <= val <= 127:
        return midi_to_note[val]


#TODO: think of a better name for the note vs tone paradigm
# Currently: Notes have a tone and an octave, tones are just a letter for a scale value
def to_tone(note):
    return note[:2] if "#" in note else note[0]




### Generate patterns for major and minor scales

In [32]:
major_steps = ["W", "W", "H", "W", "W", "W", "H"]
major_indices = [0, 2, 4, 5, 7, 9, 11]

minor_steps = ["W", "H", "W", "W", "H", "W", "W"]
minor_indices = [0, 2, 3, 5, 7, 8, 10]

scales = {}

def firstToLast(l: list):
    l.append(l.pop(0))
    return l

for i in range(len(notes)):
    maj_tag = " Major"
    min_tag = " Minor"
    notes = firstToLast(notes)

    scales[notes[0] + maj_tag] = set([notes[i] for i in major_indices])
    scales[notes[0] + min_tag] = set([notes[i] for i in minor_indices])


    # print(f"Round {i}:")
    # print(notes)

# scales


### Read Financial Data

In [28]:
ticker: str = "PTON"
start_date: str = "2020-01-01"
end_date: str = "2022-01-01"

df: pd.DataFrame = stock_price_download(ticker, start_date, end_date)

steps = 32

num_entries: int = len(df)
step_factor: int = floor(num_entries / steps) 

# filtered_df = df.iloc[::step_factor]
filtered_df = df


min_value: float = df.min()
max_value: float = df.max()
value_range: float = max_value - min_value

normalized_df = (filtered_df - min_value) / value_range

normalized_df.head()


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,0.069578,0.062596,0.069055,0.069164,0.069164,0.041218
2020-01-03,0.073579,0.070769,0.079242,0.074978,0.074978,0.030132
2020-01-06,0.081175,0.073792,0.082182,0.069231,0.069231,0.01901
2020-01-07,0.07324,0.068044,0.076097,0.073626,0.073626,0.007751
2020-01-08,0.077513,0.064124,0.076234,0.068555,0.068555,0.059549


### Convert to midi notes and count note occurrences

In [29]:
midi_df = normalized_df * 127
midi_df = midi_df.apply(np.ceil)

midi_df['Notes'] = midi_df['Open'].apply(to_midi)

midi_df['Tones'] = midi_df['Notes'].apply(
    lambda s: s[:2] if "#" in s else s[0]
)

midi_df.head()


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Notes,Tones
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-02,9.0,8.0,9.0,9.0,9.0,6.0,A-1,A
2020-01-03,10.0,9.0,11.0,10.0,10.0,4.0,A#-1,A#
2020-01-06,11.0,10.0,11.0,9.0,9.0,3.0,B-1,B
2020-01-07,10.0,9.0,10.0,10.0,10.0,1.0,A#-1,A#
2020-01-08,10.0,9.0,10.0,9.0,9.0,8.0,A#-1,A#


In [35]:
tone_series = midi_df.groupby('Tones')['Tones'].count().sort_values(ascending=False).iloc[0:30]

tone_series

Tones
F#    50
A     49
B     48
A#    44
D     44
F     43
C     42
G#    42
D#    40
G     39
E     33
C#    31
Name: Tones, dtype: int64

In [36]:
# Greedy Algorithm to find closest scale

ScaleDict = dict[str, set[str]]

def greedy_closest_scale(df: pd.DataFrame, scales: ScaleDict):

    closest_scale: list[str] = []
    max_metric = 0
    
    for name, notes in scales.items():
        metric = sum([ df.loc(note) for note in notes])

        if metric > max_metric:
            closest_scale = [name]
            max_metric = metric
        elif metric == max_metric:
            closest_scale.append(name)

    return closest_scale

greedy_closest_scale(tone_series, scales)

ValueError: too many values to unpack (expected 2)