<a href="https://colab.research.google.com/github/microprediction/monteprediction_colab_examples/blob/main/monteprediction_entry.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance
!pip install pandas
!pip install scikit-learn
!pip install scipy
!pip install monteprediction





### Utilities
Just run this. No need to modify.

In [None]:
import yfinance as yf
import pandas as pd
from scipy.stats.qmc import MultivariateNormalQMC
import numpy as np
import json
import sys
import requests
import pandas as pd
from io import StringIO
from datetime import datetime, timedelta
import requests
from io import StringIO
import pandas as pd
import time



num_samples_per_chunk = int(1048576/8)
num_chunks = 8
num_samples = num_chunks*num_samples_per_chunk


# List of SPDR ETFs symbols in alphabetical order
spdr_etfs = ['XLB', 'XLC', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLRE', 'XLU', 'XLV', 'XLY']


def get_last_wednesday():
    today = datetime.now()
    offset = (today.weekday() - 2) % 7
    last_wednesday = today - timedelta(days=offset)
    return last_wednesday.date()


def send_in_chunks(df, email, num_chunks, max_retries=3):
    chunks = np.array_split(df, num_chunks)
    for chunk_no, chunk_df in enumerate(chunks):
        for attempt in range(max_retries):
            try:
                # Metadata and URL setup
                metadata = {'email': email, 'chunk': chunk_no, 'num_chunks': num_chunks}
                URL = 'https://micromonte.pythonanywhere.com/upload'

                # Convert DataFrame chunk to CSV string
                csv_string = chunk_df.to_csv(index=False)

                # Stream the CSV string to the server
                with StringIO(csv_string) as f:
                    response = requests.post(URL, params=metadata, data=f)

                # Check response
                if response.ok:
                    print(f"Chunk {chunk_no} of {num_chunks} sent successfully.")
                    break  # Break the retry loop if successful
                else:
                    print(f"Failed to send chunk {chunk_no}, attempt {attempt + 1}. Response: {response.content}")

            except Exception as e:
                print(f"An error occurred: {e}")

            # Optional: wait before retrying
            time.sleep(1)  # Wait for 1 second before retrying

        else:
            print(f"Failed to send chunk {chunk_no} after {max_retries} attempts.")



## Step 1. Create a dataframe with just over one million hypothetical weekly returns for each sector.   

Do this however you like this is just an example. One column per sector.

In [None]:
# This example uses Quasi-Monte Carlo on the empirical covariance
# There is absolutely no requirement you follow this pattern
last_wednesday = get_last_wednesday()
num_weeks = int(52+4*52*np.random.rand())
start_date = last_wednesday - timedelta(weeks=num_weeks)
data = yf.download(spdr_etfs, start=start_date, end=last_wednesday, interval="1wk")
weekly_prices = data['Adj Close']
weekly_returns = weekly_prices.pct_change().dropna()
from sklearn.covariance import EmpiricalCovariance         # See sklearn for many alternatives
cov_matrix = EmpiricalCovariance().fit(weekly_returns).covariance_
qmc_engine = MultivariateNormalQMC(mean=np.zeros(len(spdr_etfs)), cov=cov_matrix)
samples = qmc_engine.random(num_samples)
df = pd.DataFrame(columns=spdr_etfs, data = samples)
print(df[:3])




[*********************100%%**********************]  11 of 11 completed


        XLB       XLC       XLE       XLF       XLI       XLK       XLP  \
0 -0.033980  0.007851 -0.127086 -0.033556 -0.021845  0.019712 -0.006874   
1  0.025773  0.008576  0.027525  0.014640  0.010720  0.005156  0.025163   
2  0.009357  0.010007 -0.000190  0.028045  0.007075 -0.013294 -0.014388   

       XLRE       XLU       XLV       XLY  
0  0.044473  0.002325  0.018386  0.022192  
1  0.010593  0.047170  0.011911  0.005901  
2 -0.012570 -0.013203 -0.012293 -0.027354  


## Step 2. Submit the dataframe

In [None]:
YOUR_EMAIL = 'empirical@nowhere.com'  # Be sure to change this
send_in_chunks(df, num_chunks=num_chunks, email=YOUR_EMAIL)

Chunk 0 of 8 sent successfully.
Chunk 1 of 8 sent successfully.
Chunk 2 of 8 sent successfully.
Chunk 3 of 8 sent successfully.
Chunk 4 of 8 sent successfully.
Chunk 5 of 8 sent successfully.
Chunk 6 of 8 sent successfully.
Chunk 7 of 8 sent successfully.


### Just for interest...
The paths will be interpreted as a mixture of gaussians.
Your P/L will depend on your computed score versus that of everyone else.

In [None]:
def compute_score(samples, z, h=300.0):
    distances = np.linalg.norm(samples - z, axis=1)
    return np.sum(np.exp(-h * distances))

def back_to_weekday(d):
    if d.weekday() == 5:  # Saturday
        end_date = d - timedelta(days=1)  # Previous day (Friday)
    elif d.weekday() == 6:  # Sunday
        end_date = d - timedelta(days=2)  # Two days before (Friday)
    else:
        end_date = d
    return end_date


def get_most_recent_truth():
  # The most recent returns
  end_date = back_to_weekday( datetime.now() - timedelta(days=1) )
  start_date = end_date - timedelta(weeks=3)
  recent_data = yf.download(spdr_etfs, start=start_date.date(), end=end_date.date(), interval="1wk")
  recent_weekly_prices = data['Adj Close']
  return weekly_prices.pct_change().dropna().iloc[-1].values

z = get_most_recent_truth()
score = compute_score(samples=df.values,z=z)
print(f"Total Score: {score}")


[*********************100%%**********************]  11 of 11 completed


Total Score: 3.313742353178533
