In [2]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import math
from datetime import datetime
from music21 import *

For the reward function:  
if correct segmentation: output +1 #(change of roughness)  
if incorrect segmentation: output -1#(correct change of roughness in the next segmentation)  
if correct do nothing : output +1  
if incorrect do nothing: output -1 #(correct change of roughness)  
if illegal: output 0  

In [7]:
class SegmentationEnv(Env):  # Fit one particular coin first
    def __init__(self, pieces):
        #Preprocess the pieces
        self.notes = []
        self.offset = []
        self.beat = []
        self.duration = []
        self.octave = []
        self.beatchanges = []
        for piece in pieces:
            xnotes = []
            xoffset = []
            xbeat = []
            xduration = []
            xoctave = []
            c = converter.parse(piece)
            post = c.flattenParts().flat
            for note in post.notes:
                duration = note.duration
                offset = note.offset
                beat = note.beat
                allnotes = list(note.pitches)
                for note1 in allnotes:
                    xnotes.append(note1.name)
                    xoffset.append(offset)
                    xbeat.append(beat)
                    xduration.append(duration)
                    xoctave.append(note1.octave)
            self.notes.append(xnotes)
            self.offset.append(xoffset)
            self.beat.append(xbeat)
            self.duration.append(xduration)
            self.octave.append(xoctave)
            xbeatchange = {}
            for ts in post.recurse().getElementsByClass(meter.TimeSignature):
                assert ts.denominator in [2,4,8]
                if ts.denominator == 2:
                    xbeatchange[ts.offset] = 2
                elif ts.denominator == 4:
                    xbeatchange[ts.offset] = 1
                else:
                    xbeatchange[ts.offset] = 0.5
            self.beatchanges.append(xbeatchange)
        #Actions: Remain segment (0), segment (1)
        self.action_space = Discrete(2)
        #Observations: First dim 12 pitch classes, Second dim Octave (1-7), Value is total duration.
        self.observation_space = Box(
            low=np.zeros((12,7)),
            high=np.ones((12,7))*20, # Set the maximum duration to 20. If exceed, then just keep at 20.
        )
        #internal state: check where the time currently is 
        self.current_state = 0
        self.current_noteidx = 0
        self.notelistfirst = 0
        self.notelistlast = 0
        self.latestbeatfirst = 0
        self.latestbeatlast = 0
        self.state = np.zeros((12,7))
        
    def step(self, action):
        action = action[0]
        # check liquidate condition and update balance
        # print("State checking", self.state)
        latest_data = []
        while len(latest_data) == 0:
            self.timestamp += 60000  # 1 minute currently
            latest_data = self.df.loc[self.df["timestamp"] == self.timestamp].values
        latest_data = latest_data[0]
        latest_data2 = self.dfnorm.loc[self.df["timestamp"] == self.timestamp].values[0]
        # timestamp,open,high,low,close,volume,funding_rate,funding_time
        funding_time = latest_data[7]
        funding_rate = latest_data[6]
        open_price = latest_data[1]
        self.latest_price = open_price
        # print("Latest price", open_price)
        pnl_long = (open_price - self.state[4]) * self.state[2]
        if pnl_long < 0 and self.state[3] + pnl_long <= 0:
            self.state[3] = 0
            self.state[2] = 0
            pnl_long = 0
        pnl_short = (self.state[7] - open_price) * self.state[5]
        if pnl_short < 0 and self.state[6] + pnl_short <= 0:
            self.state[5] = 0
            self.state[6] = 0
            pnl_short = 0
        self.state[0] = (
            self.state[1] + self.state[3] + self.state[6] + pnl_long + pnl_short
        )
        # print(f"state {self.state} pnl_long : {pnl_long},pnl_short: {pnl_short}")
        if self.state[0] <= 0:
            done = True
        if (
            funding_time <= self.timestamp and funding_time > self.timestamp - 60000
        ):  # It's funding time!
            # print("Funding right now!")
            net_position = self.state[2] - self.state[5]
            funding = net_position * open_price * funding_rate
            self.state[1] -= funding
            self.state[0] -= funding
        # Apply action
        if action[0] // 1 == 0:  # Buy long
            if action[1] >= 0.98:
                action[1] = 1
            elif action[1] <= 0.02:
                action[1] = 0
            amount_to_buy = min(
                self.state[1] * self.leverage,
                round_up(self.state[1] * action[1] * self.leverage, self.dp_usdt),
            )  # in USDT
            amount_coin = round_down((amount_to_buy / open_price), self.dp_coin)
            if amount_coin > 0:
                self.state[1] -= amount_to_buy / self.leverage
                self.state[4] = (self.state[4] * self.state[2] + amount_to_buy) / (
                    self.state[2] + amount_to_buy / open_price
                )
                self.state[2] += amount_coin
                self.state[3] += amount_to_buy / self.leverage
        elif action[0] // 1 == 1:  # sELL LONG
            if action[1] >= 0.98:
                action[1] = 1
            elif action[1] <= 0.02:
                action[1] = 0
            amount_to_sell = min(
                self.state[2], round_up(self.state[2] * action[1], self.dp_coin)
            )  # in target coin
            if amount_to_sell > 0:
                return_leverage = round_up(self.state[3] * action[1], self.dp_usdt)
                pnl = (open_price - self.state[4]) * amount_to_sell
                self.state[3] -= return_leverage
                self.state[3] = max(0, self.state[3])
                self.state[2] -= amount_to_sell
                self.state[1] += return_leverage + pnl
            if self.state[2] == 0:
                self.state[4] = 0
        if action[2] // 1 == 0:  # SHORT Sell
            if action[3] >= 0.98:
                action[3] = 1
            elif action[3] <= 0.02:
                action[3] = 0
            amount_to_buy = min(
                self.state[1] * self.leverage,
                round_up(self.state[1] * action[3] * self.leverage, self.dp_usdt),
            )  # in USDT
            amount_coin = round_up((amount_to_buy / open_price), self.dp_coin)
            if amount_coin > 0:
                self.state[1] -= amount_to_buy / self.leverage
                self.state[7] = (self.state[7] * self.state[5] + amount_to_buy) / (
                    self.state[5] + amount_to_buy / open_price
                )
                self.state[5] += amount_coin
                self.state[6] += amount_to_buy / self.leverage
        elif action[2] // 1 == 1:  # short buy
            if action[3] >= 0.98:
                action[3] = 1
            elif action[3] <= 0.02:
                action[3] = 0
            amount_to_sell = min(
                self.state[5], round_up(self.state[5] * action[3], self.dp_coin)
            )  # in target coin
            if amount_to_sell > 0:
                return_leverage = round_up(self.state[6] * action[3], self.dp_usdt)
                pnl = (self.state[7] - open_price) * amount_to_sell
                self.state[6] -= return_leverage
                self.state[6] = max(0, self.state[6])
                self.state[5] -= amount_to_sell
                self.state[1] += return_leverage + pnl
            if self.state[5] == 0:
                self.state[7] = 0
        else:
            pass

        # # Calculate reward
        # daily roi% + current time multiplied by weight, then calculate mean and variance
        # latest_roi = (self.state[0] - self.pastbal[-1]) / self.pastbal[-1]
        sharpe_roi = self.pastroi.copy()
        # sharpe_roi.append(latest_roi)
        # print(sharpe_roi)
        if len(sharpe_roi) == 1:
            reward = sharpe_roi[0]
        else:
            reward = np.mean(sharpe_roi) / np.std(sharpe_roi)  # sharpe ratio
            reward = 0 if np.isnan(reward) or np.isinf(reward) else reward
        self.state[-1] = reward
        if (self.timestamp - self.starttime) % 86400000 == 0:
            self.pastbal.append(self.state[0])
            self.pastroi.append(
                (self.pastbal[-1] - self.pastbal[-2]) / self.pastbal[-2]
            )
        # Check if shower is done
        if self.timestamp == self.endtime:
            done = True
        else:
            done = False

        # Apply temperature noise
        # self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {}
        stateinfo = latest_data2.tolist()[1:]
        stateinfo.extend(
            self.state[0:3]
        )  # TODO: consider also normalize the position amount (e.g. as a percentage to total?)
        stateinfo.append(self.state[5])
        # Return step information
        return np.array(stateinfo), reward, done, info

    def render(self):
        return

    def reset(self, df=None):
        self.current_state = 0
        self.current_noteidx = 0
        self.notelistfirst = 0
        self.notelistlast = 0
        self.latestbeatfirst = 0
        self.latestbeatlast = 0
        return np.array(stateinfo)

In [5]:
c = pitch.Pitch('C4')
c.octave

4

In [10]:
env = SegmentationEnv("sdf")
for i in range(10):
    print(env.action_space.sample())

1
1
0
0
1
0
1
1
0
1
