# Data collection from quantrocket

In [None]:
# first create an empty database
from quantrocket.history import create_usstock_db
create_usstock_db("usstock-free-1d", bar_size="1 day", free=True)

In [None]:
# from the collect_histroy library, fill the database with values
from quantrocket.history import collect_history
collect_history("usstock-free-1d")

In [None]:
#get the securities
from quantrocket.master import get_securities
securities = get_securities(vendors="usstock", sec_types="STK")
securities.head()

In [None]:
# create a universe for securities
from quantrocket.master import create_universe
create_universe("usstock-free", sids=securities.index.tolist())

In [None]:
# create a security called filtered_securities which is a subset of securities
filtered_securities = securities[securities.Delisted==False]
filtered_securities = filtered_securities[["Symbol", "Exchange", "Name", "Delisted"]]
filtered_securities.head()

In [None]:
#create universe for filtered securities
create_universe("usstock-free-active", sids=filtered_securities.index.tolist())


In [None]:
#retrieve sid for AAPL and download the data as a CSV file from start to end date with only close stock prices
from quantrocket.history import download_history_file
sid_aapl = "FIBBG000B9XRY4"
download_history_file("usstock-free-1d",start_date="2023-01-01",end_date="2023-12-31",sids=sid_aapl,fields=["Close"],filepath_or_buffer="stockPrices.csv")

# Portfolio calculation - pre specified logic





## Install necessary libraries

In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

## Pre-processing

In [27]:
df = pd.read_csv('/content/stockPrices.csv')
print(df)

                Sid        Date     Close
0    FIBBG000B9XRY4  03-01-2023  124.2163
1    FIBBG000B9XRY4  04-01-2023  125.4975
2    FIBBG000B9XRY4  05-01-2023  124.1666
3    FIBBG000B9XRY4  06-01-2023  128.7352
4    FIBBG000B9XRY4  09-01-2023  129.2616
..              ...         ...       ...
245  FIBBG000B9XRY4  22-12-2023  193.3533
246  FIBBG000B9XRY4  26-12-2023  192.8040
247  FIBBG000B9XRY4  27-12-2023  192.9038
248  FIBBG000B9XRY4  28-12-2023  193.3333
249  FIBBG000B9XRY4  29-12-2023  192.2846

[250 rows x 3 columns]


In [28]:
df = df.drop(['Sid'],axis=1)

In [29]:
df.shape

(250, 2)

The dataset contains 2 columns (Dates and stock prices) and 250 rows in total

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    250 non-null    object 
 1   Close   250 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.0+ KB


As we can see there are no null values

### Checking for outliers - inter quartile range

In [31]:
# Check for outliers using IQR
Q1 = df['Close'].quantile(0.25)
Q3 = df['Close'].quantile(0.75)
IQR = Q3 - Q1
lower_limit = Q1 - (1.5 * IQR)
upper_limit = Q3 + (1.5 * IQR)
outliers = df[(df['Close'] < lower_limit) | (df['Close'] > upper_limit)]
print('Number of outliers using IQR (Inter-quartile range):', len(outliers))

Number of outliers using IQR (Inter-quartile range): 0


## Defining the states and finding maximum portfolio value

In [34]:
import numpy as np

def state_classification(returns):
    if returns >= 0.01:
        return 1  # Bull state
    elif returns > -0.01:
        return 0  # Flat state
    else:
        return -1  # Bear state

def calculate_transition_distribution(states):
    transition_counts = np.zeros((3, 3))  # 3 states: Bull, Flat, Bear
    for i in range(1, len(states)):
        prev_state = states[i - 1]
        current_state = states[i]
        transition_counts[prev_state + 1, current_state + 1] += 1

    transition_probs = transition_counts / np.sum(transition_counts, axis=1, keepdims=True)
    return transition_probs

def calculate_portfolio_value(states, prices):
    V = [0]  # initial Portfolio value
    buy_indices = []  # Indices where buy order is placed

    for i in range(1, len(states)):
        prev_state = states[i - 1]
        current_state = states[i]

        # Calculate transition probabilities and make decision
        transition_probs = calculate_transition_distribution(states[:i])
        value_additive_prob = transition_probs[1, 2]  # Transition to Bull state
        value_reductive_prob = transition_probs[1, 0]  # Transition to Bear state

        if value_additive_prob > value_reductive_prob:
            V.append(V[-1] + 1)  # Place buy order
            buy_indices.append(i)
        elif value_reductive_prob > value_additive_prob:
            V.append(V[-1] - 1)  # Do not place buy order
        else:
            V.append(V[-1])  # No change in portfolio value

    return V, buy_indices

In [36]:
prices = df['Close'].to_list()
returns = [(prices[i] - prices[i - 1]) / prices[i - 1] for i in range(1, len(prices))]
states = [state_classification(r) for r in returns]

# Calculate portfolio value and buy indices
portfolio_values, buy_indices = calculate_portfolio_value(states, prices)
final_portfolio_value = portfolio_values[-1]

print("Portfolio Value:", final_portfolio_value)
print("Buy Indices:", buy_indices)

Portfolio Value: 243
Buy Indices: [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218

  transition_probs = transition_counts / np.sum(transition_counts, axis=1, keepdims=True)
