<a href="https://colab.research.google.com/github/microprediction/micromonte_colab_examples/blob/main/sklearn_empirical_qmc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance
!pip install pandas
!pip install scikit-learn
!pip install scipy


In [76]:
YOUR_EMAIL = 'nobody@nowhere.com'

In [77]:
import yfinance as yf
import pandas as pd
from sklearn.covariance import EmpiricalCovariance
from scipy.stats.qmc import MultivariateNormalQMC
import numpy as np
import json
import sys
import requests
import pandas as pd
from io import StringIO


num_samples_per_chunk = int(1048576/8)
num_chunks = 8
num_samples = num_chunks*num_samples_per_chunk


# List of SPDR ETFs symbols in alphabetical order
spdr_etfs = ['XLB', 'XLC', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLRE', 'XLU', 'XLV', 'XLY']

# Fetching the data
data = yf.download(spdr_etfs, start="2018-01-01", interval="1wk")

# Extracting the 'Adj Close' for each ETF
weekly_prices = data['Adj Close']

# Drop any NaN values
weekly_prices = weekly_prices.dropna()

# Calculating weekly returns
weekly_returns = weekly_prices.pct_change().dropna()

# Estimate the covariance matrix
cov_matrix = EmpiricalCovariance().fit(weekly_returns).covariance_

# Initialize the QMC sampler
qmc_engine = MultivariateNormalQMC(mean=np.zeros(len(spdr_etfs)), cov=cov_matrix)

# Generate samples
samples = qmc_engine.random(num_samples)

# Take a peek
df = pd.DataFrame(columns=spdr_etfs, data = samples)
print(df[:3])


# Convert DataFrame to CSV string
import numpy as np
import requests
from io import StringIO
import pandas as pd
import time

def send_in_chunks(df, num_chunks, max_retries=3):
    chunks = np.array_split(df, num_chunks)
    for chunk_no, chunk_df in enumerate(chunks):
        for attempt in range(max_retries):
            try:
                # Metadata and URL setup
                metadata = {'email': YOUR_EMAIL, 'chunk': chunk_no, 'num_chunks': num_chunks}
                URL = 'https://micromonte.pythonanywhere.com/upload'

                # Convert DataFrame chunk to CSV string
                csv_string = chunk_df.to_csv(index=False)

                # Stream the CSV string to the server
                with StringIO(csv_string) as f:
                    response = requests.post(URL, params=metadata, data=f)

                # Check response
                if response.ok:
                    print(f"Chunk {chunk_no} of {num_chunks} sent successfully.")
                    break  # Break the retry loop if successful
                else:
                    print(f"Failed to send chunk {chunk_no}, attempt {attempt + 1}. Response: {response.content}")

            except Exception as e:
                print(f"An error occurred: {e}")

            # Optional: wait before retrying
            time.sleep(1)  # Wait for 1 second before retrying

        else:
            print(f"Failed to send chunk {chunk_no} after {max_retries} attempts.")


send_in_chunks(df, num_chunks=num_chunks)


[*********************100%%**********************]  11 of 11 completed


        XLB       XLC       XLE       XLF       XLI       XLK       XLP  \
0  0.011365 -0.019495  0.020266 -0.003191  0.012859  0.001234  0.009961   
1 -0.004935  0.007295 -0.011247 -0.003412 -0.023124 -0.014551 -0.023456   
2 -0.027570 -0.018378  0.004815 -0.040816 -0.031558 -0.043898  0.003677   

       XLRE       XLU       XLV       XLY  
0 -0.035866 -0.030097  0.003595 -0.026203  
1 -0.013525 -0.000718 -0.016031  0.012829  
2  0.006206  0.015475 -0.027322 -0.026078  
Chunk 0 of 8 sent successfully.
Chunk 1 of 8 sent successfully.
Chunk 2 of 8 sent successfully.
Chunk 3 of 8 sent successfully.
Chunk 4 of 8 sent successfully.
Chunk 5 of 8 sent successfully.
Chunk 6 of 8 sent successfully.
Chunk 7 of 8 sent successfully.


In [120]:
# Scoring example

def compute_score(samples, z, h=300.0):
    distances = np.linalg.norm(samples - z, axis=1)
    return np.sum(np.exp(-h * distances))

z = 0.03*np.random.rand(11)
score = compute_score(samples=samples,z=z)
print(f"Total Score: {score}")


Total Score: 0.5817120867246773
