<a href="https://colab.research.google.com/github/nazike/dataoptions/blob/main/OptionsAnalysisPricing191021.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
#read file from the github repo
columns = ['asset_price', 'maturity', 'rate', 'div', 'ivol', 'eu_price', 'am_price'] 
df1 = pd.read_csv('/Users/nazymazimbayev/Desktop/VS Code projects/dataoptions/data.csv')
df1.columns = columns
df2 = pd.read_csv('/Users/nazymazimbayev/Desktop/VS Code projects/dataoptions/data2.csv')
df2.columns = columns

In [3]:
#merge two dataframes
df = pd.concat([df1, df2], axis=0)


In [4]:
df.describe()

Unnamed: 0,asset_price,maturity,rate,div,ivol,eu_price,am_price
count,19026.0,19026.0,19026.0,19026.0,19026.0,19026.0,19026.0
mean,99.040981,1.196045,0.049347,0.040105,0.358929,14.756278,15.419963
std,16.932147,1.244231,0.028933,0.032698,0.141156,9.980747,10.366195
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,84.517517,0.41662,0.024252,0.006123,0.239931,6.250488,6.514011
50%,98.763641,0.717913,0.049229,0.038077,0.363941,13.356029,13.966866
75%,113.161929,1.176921,0.074103,0.06909,0.479842,22.073175,23.301069
max,129.99822,4.998285,0.099997,0.099987,0.599896,58.741167,58.839442


In [5]:
# Read the data
X_full = df.copy()
columns = ['asset_price', 'maturity', 'rate', 'div', 'ivol', 'eu_price', 'am_price'] 
X_full.columns = columns
X_full['target'] = (X_full['am_price']-X_full['eu_price']).copy()


# Remove rows with missing target, separate target from predictors
X_full.dropna(axis=0, subset=['target'], inplace=True)
y = X_full.target
y_amer = X_full['am_price']
X_full.drop(['am_price', 'target'], axis=1, inplace=True)


# Break off validation set from training data
X_train_full, X_valid_full, y_train, y_valid = train_test_split(X_full, y, 
                                                                train_size=0.8, test_size=0.2,
                                                                random_state=0)

# Select categorical columns with relatively low cardinality 
categorical_cols = [cname for cname in X_train_full.columns if
                    X_train_full[cname].nunique() < 10 and 
                    X_train_full[cname].dtype == "object"]

# Select numerical columns
numerical_cols = [cname for cname in X_train_full.columns if 
                X_train_full[cname].dtype in ['int64', 'float64']]

# Keep selected columns only
my_cols = categorical_cols + numerical_cols
X_train = X_train_full[my_cols].copy()
X_valid = X_valid_full[my_cols].copy()



In [6]:
X_train.head()

Unnamed: 0,asset_price,maturity,rate,div,ivol,eu_price
9066,86.772793,0.776712,0.016783,0.0,0.464685,21.73953
3309,106.976961,3.701752,0.051707,0.001839,0.229701,7.231691
3572,71.220933,1.764234,0.03408,0.084616,0.175687,33.049968
4068,95.36204,0.949529,0.083446,0.011833,0.582503,19.912005
9352,119.899509,0.263941,0.010202,0.0,0.572266,5.162801


In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor

# Preprocessing for numerical data
numerical_transformer = SimpleImputer(strategy='constant')

# Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Define model
model = XGBRegressor(n_estimators=5000, random_state=0, learning_rate=0.07, n_jobs=4)

# Bundle preprocessing and modeling code in a pipeline
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('model', model)
                     ])

# Preprocessing of training data, fit model 
clf.fit(X_train, y_train)

# Preprocessing of validation data, get predictions
preds = clf.predict(X_valid)

print('MAE:', mean_absolute_error(y_valid, preds))

MAE: 0.07346267971168954


In [9]:
# Making final price as a sum of max(0,target) and eu_price 

y_comp = np.maximum(np.zeros(len(preds)),preds) + X_valid['eu_price']
y_valid_amer = y_amer[y_comp.index]
data_error = y_comp - y_valid_amer
mean_error = data_error.mean() / y_comp.mean()
print(abs(100*mean_error), ' %')

0.8488966134935994  %


array([2.7922006], dtype=float32)

In [None]:
import timeit
def test_nn():
    return clf.predict(X_sample)

loop = 1000

result = timeit.timeit('test_nn()', globals=globals(), number=loop)
print(result, 'seconds per loop times of test function')
print(result/loop, 'seconds per each clf.predict()')

2.4242466179999838 seconds per loop times of test function
0.0024242466179999838 seconds per each clf.predict()


In [None]:
import numpy as np

def binomial_put(S, K, T, R, div, sig, n):
    
    h = T/n
    Rinv = np.exp(-R*h)
    a = np.exp((R-div)*h)
    b2 = (a**2)*(np.exp(h*sig**2)-1)
    tmp = a**2 + b2 + 1
    u = (tmp + (tmp**2 - 4*a**2)**0.5)/(2*a)
    d = 1/u
    p = (a-d)/(u-d)
    q = 1-p
    pp = Rinv*p
    qq = Rinv*q
    x = 2*n + 1
    
    s = np.zeros(x)
    s[n] = S
    
    for j in range(1,n+1):
        s[n+j] = s[n-1+j]*u
        s[n-j] = s[n+1-j]*d
        
    v = np.zeros(x)
    
    for j in range(1,x+2,2):
        v[j-1] = max(K-s[j-1],0)
        
    for i in range(n-1,0,-1):
        for j in range(-i,i+2,2):
            j1 = j+n+1
            v[j1-1] = max(K-s[j1-1],pp*v[j1]+qq*v[j1-2])
    
    y=pp*v[n+1]+qq*v[n-1]
    
    return y

In [None]:
S = 71.016737
K = 100.0
T = 2.136379
R = 0.024927
div = 0.0
sig = 0.216758
n=250


result = binomial_put(S, K, T, R, div, sig, n)


In [None]:
def test_bt():
    return binomial_put(S, K, T, R, div, sig, n)

loop = 1000

result = timeit.timeit('test_bt()', globals=globals(), number=loop)
print(result, 'seconds per loop times of test function')
print(result/loop, 'seconds per each binomial_put')

41.63978371899998 seconds per loop times of test function
0.04163978371899998 seconds per each binomial_put
