In [8]:
import numpy as np
import pandas as pd

class PortfolioEnv:
    def __init__(self, stock_data_csv, ipca_csv, window_size):
        self.stock_data = pd.read_csv(stock_data_csv, parse_dates=['Date'])
        self.ipca_data = pd.read_csv(ipca_csv, parse_dates=['Date'])
        
        self.stock_data.set_index('Date', inplace=True)
        self.ipca_data.set_index('Date', inplace=True)
        
        self.stock_data.fillna(0, inplace=True)
        self.ipca_data.fillna(0, inplace=True)

        self.window_size = window_size
        self.cumulative_wallet_value = 1  # Starts with 1, representing 100% of initial investment

        self.reset()

    def reset(self):
        self.current_step = self.window_size
        self.portfolio_allocation = np.zeros(self.stock_data.shape[1])
        self.portfolio_allocation[0] = 1  # All initial investment in the first asset
        self.cumulative_wallet_value = 1

        self.initial_prices = self.stock_data.iloc[self.current_step].values
        self.portfolio_value = 1

        return self._get_state()

    def _get_state(self):
        stock_window = self.stock_data.iloc[self.current_step - self.window_size:self.current_step].values
        inflation_window = self.ipca_data.iloc[self.current_step - self.window_size:self.current_step].values
        state = np.concatenate((stock_window, inflation_window), axis=1)
        return state

    def step(self, action):
        assert np.isclose(np.sum(action), 1), "Action must sum to 1"
        
        current_prices = self.stock_data.iloc[self.current_step].values
        next_changes = self.stock_data.iloc[self.current_step + 1].values

        portfolio_var_t = np.dot(self.portfolio_allocation, current_prices)
        portfolio_var_t1 = np.dot(action, next_changes)

        inflation_rate = self.ipca_data.iloc[self.current_step + 1].values[0]
        real_portfolio_var_t1 = (1 + portfolio_var_t1) / (1 + inflation_rate) - 1

        self.cumulative_wallet_value *= (1 + real_portfolio_var_t1)

        reward = real_portfolio_var_t1
        
        self.current_step += 1
        done = self.current_step >= len(self.stock_data) - 1

        self.portfolio_allocation = action

        return self._get_state(), reward, done

    def get_cumulative_value(self):
        return self.cumulative_wallet_value

    def render(self):
        current_prices = self.stock_data.iloc[self.current_step].values
        portfolio_var = np.dot(self.portfolio_allocation, current_prices)
        print(f"Portfolio variation: {portfolio_var}")
        print(f"Cumulative wallet value: {self.cumulative_wallet_value}")
        print(f"Portfolio allocation: {self.portfolio_allocation}")



stock_data = "data/clean/total-return-variation.csv"
ipca_data = "data/clean/ipca.csv"
window_size = 30
env = PortfolioEnv(stock_data, ipca_data, window_size)
env.render()


print("-"*30)
curent_state, reward, down = env.step(env.portfolio_allocation)
print(f"Reward: {reward}")
env.render()

Portfolio variation: 0.068585
Cumulative wallet value: 1
Portfolio allocation: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
------------------------------
Reward: 0.06857112857233383
Portfolio variation: 0.068619
Cumulative wallet value: 1.0685711285723338
Portfolio allocation: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


ModuleNotFoundError: No module named 'sklearn'