### Portfolio Dataset

In [5]:
class PortfolioDataset(torch.utils.data.Dataset):
    
    
    def __init__(self):
        
        self.sharpe = Sharpe()
    
    
    def load(self, data_folder):        
        
        self.data = pd.read_csv(data_folder + '/dataset.csv')
        
        self.mu = np.genfromtxt(data_folder + 'mu.csv')
        self.sigma = np.genfromtxt(data_folder + 'sigms.csv')
        
        self.parse_dataset()
        
        return self
    

    def load_portfolios(self, portfolios, asset_prices):   

        mu, sigma = self.sharpe.get_mu_sigma(asset_prices)

        data = pd.DataFrame(portfolios)

        # data['decimal'] = self.sharpe.samples_to_decimal(portfolios)

        data['risk'] = self.sharpe.get_risks(portfolios, sigma)
        data['return'] = self.sharpe.get_returns(portfolios, mu)
        data['sharpe'] = data['return'] / data['risk']

        data.fillna(value=0, inplace=True)
        
        self.mu = mu
        self.sigma = sigma
        self.data = data
        
        self.parse_dataset()
        
    
    def parse_dataset(self):
        
        array = self.data.to_numpy()
                        
        self.fields = array[:, :-3].astype(int)
        self.targets = array[:, -1]
        
        self.field_dimensions = np.max(self.fields, axis=0).astype(int) + 1
        
        self.field_dimensions[self.field_dimensions < 2] = 2
        
        
    def save(self, data_folder):
        
        self.data.to_csv(data_folder + 'dataset.csv', index=False)

        np.savetxt(data_folder + 'mu.csv', self.mu)
        np.savetxt(data_folder + 'sigms.csv', self.sigma)
        
        
    def append_records(self, new_fields, new_targets, record_repetitions_count):
        
        new_rows_count = new_fields.shape[0] * record_repetitions_count
    
        new_fields_array = np.tile(new_fields, (record_repetitions_count, 1))
        new_targets_array = np.tile(new_targets, (record_repetitions_count, 1)).ravel()

        self.fields = np.vstack((self.fields, new_fields_array))
        self.targets = np.concatenate((self.targets, new_targets_array))
    
    
    def __len__(self):
        
        return self.fields.shape[0]
    

    def __getitem__(self, index):
        
        fields = self.fields[index]
        target = self.targets[index].squeeze()
        
        return fields, target
        
        
    # Plots
    
    def plot_scatter(self, samples_to_plot=100000):

        INDEX_STEP = max(1, len(self.data) // samples_to_plot)


        top_sharpe_index = self.data.sharpe.idxmax()

        top_sharpe_row = self.data.loc[top_sharpe_index]

        top_return = top_sharpe_row['return']
        top_risk = top_sharpe_row['risk']

        plt.figure(figsize=(12, 8))

        plt.title('Markowitz portfolio (Combinations of all portfolio selections)')
        plt.xlabel('Volatility - standard deviation')
        plt.ylabel('Return')

        sharpe_scatter = plt.scatter(x=self.data.risk[::INDEX_STEP], 
                                     y=self.data['return'][::INDEX_STEP], 
                                     c=self.data.sharpe[::INDEX_STEP], 
                                     cmap='viridis')

        plt.scatter(top_risk, top_return, c='red', s=50, marker=5)

        plt.colorbar(sharpe_scatter, label='Sharpe Ratio')

        plt.show()
        
        
    def plot_histogram(self, samples_to_plot=100000):

        INDEX_STEP = max(1, len(self.data) // samples_to_plot)


        # sorted_sharpe_dataset = self.data[::INDEX_STEP].sort_values('sharpe', ignore_index=True)

        plt.figure(figsize=(15, 5))

        plt.hist(self.data[::INDEX_STEP].sharpe, bins=100,
                 label="Sharpe Ratios",
                 histtype='step',
                 # color='steelblue',
                 linewidth=5,
                 alpha=0.7)

        plt.title("Monte-Carlo generated Sharpe Ratio Distribution")
        plt.xlabel("Sharpe Ratio")
        plt.ylabel("Samples Count")
        plt.ticklabel_format(style='plain')
        plt.legend()
        plt.show()

### Decoding and Ranking

In [None]:
def portfolio_get_top_results(results, top_size=10, sort_by='sharpe'):
    
    is_ascending = sort_by == 'energy'

    top_results = results.sort_values(sort_by, 
                                      ascending=is_ascending)[:top_size]
    
    new_fields = np.vstack(top_results.portfolio.to_numpy())
    new_targets = top_results.sharpe.to_numpy()
    
    return new_fields, new_targets

In [None]:
def portfolio_decode_sampling_results(sampling_results):

    results = []

    for sampling_result in sampling_results.data():

        sample = np.array(list(sampling_result.sample.values()))
        
        portfolio = qubo.one_hot_to_bits(sample)
        
        decimal = sum(number * 2 ** position for position, number in enumerate(reversed(portfolio)))

        result = {'decimal': np.array(decimal),
                  'sample': sample,
                  'portfolio': portfolio,
                  'energy': sampling_result.energy,
                  'num_occurrences': sampling_result.num_occurrences}
        
        results.append(result)

    results = pd.DataFrame(results)
    
    results['risk'] = dataset.sharpe.get_risks(results.portfolio, dataset.sigma)
    results['return'] = dataset.sharpe.get_returns(results.portfolio, dataset.mu)
    
    results.risk.replace(0, 0.000001, inplace=True)

    results['sharpe'] = results['return'] / results['risk']
    
    return results

### Sharpe

In [None]:
# 1) Sharpe ratio

class Sharpe:

    @staticmethod
    def get_risks(portfolios, sigma):

        risks = []

        for portfolio in portfolios:

            x_t = np.array(portfolio)
            x = np.array([portfolio]).T

            risk = x_t.dot(sigma).dot(x)

            risks.append(risk[0])

        return risks


    @staticmethod
    def get_returns(portfolios, mu):

        returns = []

        for portfolio in portfolios:

            x = np.array([portfolio]).T

            return_value = mu.dot(x)   

            returns.append(return_value[0])

        return returns

    
    @staticmethod
    def get_sharpe(portfolios, sigma, mu):

        risks = Sharpe.get_risks(portfolios, sigma)
        returns = Sharpe.get_returns(portfolios, mu)

        sharpe = np.array(returns) / np.array(risks)

        return sharpe


    # 2) Mu Sigma

    @staticmethod
    def divide(a, b):

        if b == 0 and a == 0:
            return 1

        if b == 0 and a != 0:
            print(f"Division by 0 on values {a} and {b}")
            return np.nan

        return a / b

    
    @staticmethod
    def get_mu_sigma(asset_prices):

        vectorized_divide = np.vectorize(Sharpe.divide)

        prices = asset_prices.T.to_numpy()

        period_returns = vectorized_divide(prices[:, 1:], 
                                           prices[:, :-1]) - 1

        mu = period_return_mean = np.mean(period_returns, axis=1)

        sigma = period_return_covariance_matrix = np.cov(period_returns)

        # print("mu:", mu)
        # print("sigma:", sigma)

        return mu, sigma


    # 3) Samples to Decimal
    
    @staticmethod
    def samples_to_decimal(samples):

        sample_length = samples.shape[-1]

        indices = np.arange(sample_length,
                            dtype='object')[::-1]

        terms = samples * 2 ** indices

        decimals = terms.sum(axis=1) 

        # print("indices:", indices, indices.dtype)
        # print("terms:", terms, terms.dtype)
        # print("decimals:", decimals, decimals.dtype)

        return decimals

### Plots

In [None]:
def plot_dataset():

    plt.figure(figsize=(15, 5))
    plt.title("Dataset")
    plt.xlabel("Row")
    plt.ylabel("Sharpe Ratio")

    plt.plot(dataset.targets)
    plt.show()

In [None]:
def plot_top_samples():
    
    # Top Sharpes by Energy and by Sharpe

    FIRST_PART = TOP_SIZE

    results_by_energy = results.sort_values('energy', ascending=True)
    results_by_sharpe = results.sort_values('sharpe', ascending=False)

    plt.figure(figsize=(15, 5))
    plt.plot(results_by_energy.index[:FIRST_PART], results_by_energy.sharpe[:FIRST_PART], label='Top by energy', c='C5')
    plt.plot(results_by_energy.index[:FIRST_PART], results_by_sharpe.sharpe[:FIRST_PART], label='Top by Sharpe')

    plt.title(f"Top Energies and Sharpe ratios of top {FIRST_PART} samples")
    plt.xlabel("Sample")
    plt.ylabel("Sharpe Ratio")

    plt.legend()
    plt.show()

In [None]:
def plot_sharpe_convergence():
    
    # Plot Sharpe Ratio Convergence

    plt.figure(figsize=(15, 5))
    plt.plot(sharpe_maxs, label="Sharpe Ratio Maximum")
    plt.plot(sharpe_means, label="Sharpe Ratio Mean")
    plt.plot(sharpe_mins, label="Sharpe Ratio Minimum")

    plt.title("Sharpe Ratio Convergence")
    plt.xlabel("Iteration")
    plt.ylabel("Sharpe Ratio")

    plt.legend()
    plt.show()   

In [None]:
def plot_dataset_histograms():

    plt.figure(figsize=(15, 5))
    plt.hist(dataset.targets, 
             bins=100, linewidth=2, alpha=0.7, histtype='step', label='Updated dataset')
    plt.hist(initial_dataset_sharpe, 
             bins=100, linewidth=2, alpha=0.7, histtype='step', label='Initial dataset')

    plt.title("Dataset Sharpe Ratio Histogram")
    plt.xlabel("Sharpe Ratio")
    plt.ylabel("Count")

    plt.legend()
    plt.show()   

In [None]:
def plot_samples(column, label=None, color='C5'):
    
    label = label or str.capitalize(column.replace('_', ' '))

    sorted_results = results.sort_values('decimal')

    decimals = sorted_results['decimal']
    samples = sorted_results[column]
    
    sizes = sorted_results['num_occurrences']

    sizes = sizes - min(sizes)
    sizes = sizes ** 10
    sizes = sizes / (max(sizes) or 1) * 100 
    sizes[sizes < 1] = 1

    plt.figure(figsize=(15, 5))

    plt.plot(decimals, samples, color=color, alpha=0.5)

    plt.scatter(decimals, samples,
                label=label,
                color=color,
                s=sizes + 1)
    
    plt.ticklabel_format(style='scientific', useOffset=False)

    plt.title("Annealing Samples")
    plt.xlabel("Sample")
    plt.ylabel(label)
    plt.legend()

    # plt.gca().invert_yaxis()

    plt.show()

In [None]:
# def plot_samples():

#     sorted_results = results.sort_values('decimal')

#     portfolios = sorted_results['portfolio'].to_numpy()
#     decimals = sorted_results['decimal'].to_numpy()
#     energies = sorted_results['energy'].to_numpy()
#     sizes = sorted_results['num_occurrences'].to_numpy()

#     sizes = sizes - min(sizes)
#     sizes = sizes ** 10
#     sizes = sizes / (max(sizes) or 1) * 100 
#     sizes[sizes < 1] = 1

#     plt.figure(figsize=(15, 5))

#     plt.plot(decimals, energies, color='C5', alpha=0.5)

#     plt.scatter(decimals, energies,
#                 label=f"Occurrences",
#                 color='#fdb462',
#                 s=sizes + 1)

#     plt.ticklabel_format(style='scientific', useOffset=False)

#     plt.title("Annealing Samples")
#     plt.xlabel("Portfolio")
#     plt.ylabel("Energy")
#     plt.legend()

#     # plt.gca().invert_yaxis()

#     plt.show()