<a href="https://colab.research.google.com/github/nazike/dataoptions/blob/main/OptionsAnalysisPricing191021.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [4]:
#read file from the github repo
columns = ['asset_price', 'maturity', 'rate', 'div', 'ivol', 'eu_price', 'am_price', 'lower_bound', 'upper_bound', 'normalized_am_price'] # define the columns
df = pd.read_csv('/Users/nazymazimbayev/Documents/git-projects/dataoptions/V3.csv')
#merge two dataframes
df.columns = columns

In [5]:
# Read the data
X_full = df.copy()
X_full.columns = columns
# Remove rows with missing target, separate target from predictors
X_full.dropna(axis=0, subset=['normalized_am_price'], inplace=True)

In [6]:
# Creating target column as a [0,1]
X_full['target']=np.round(X_full['normalized_am_price'],0)
y = X_full.target
y_amer = X_full['am_price']

# Dropping data we don't need for training
X_full.drop(['target'], axis=1, inplace=True)
X_full.drop(['am_price'], axis=1, inplace=True)
X_full.drop(['normalized_am_price'], axis=1, inplace=True)

In [7]:
# Break off validation set from training data
X_train_full, X_valid_full, y_train, y_valid = train_test_split(X_full, y, 
                                                                train_size=0.8, test_size=0.2,
                                                                random_state=0)

# Select categorical columns with relatively low cardinality 
categorical_cols = [cname for cname in X_train_full.columns if
                    X_train_full[cname].nunique() < 10 and 
                    X_train_full[cname].dtype == "object"]

# Select numerical columns
numerical_cols = [cname for cname in X_train_full.columns if 
                X_train_full[cname].dtype in ['int64', 'float64']]

# Keep selected columns only
my_cols = categorical_cols + numerical_cols
X_train = X_train_full[my_cols].copy()
X_valid = X_valid_full[my_cols].copy()

In [8]:
X_full.describe()

Unnamed: 0,asset_price,maturity,rate,div,ivol,eu_price,lower_bound,upper_bound
count,100226.0,100226.0,100226.0,100226.0,100226.0,100226.0,100226.0,100226.0
mean,98.156193,1.212714,0.03484,0.03518738,0.340181,15.496518,15.895336,15.967396
std,16.862843,1.255173,0.024512,0.0246722,0.156052,10.416161,10.626516,10.668001
min,70.00066,0.10001,1e-06,5.427117e-07,1.5e-05,0.0,0.484509,0.50017
25%,83.683658,0.42339,0.015599,0.01581464,0.214289,6.684839,6.870645,6.908132
50%,97.392227,0.725215,0.031126,0.03149386,0.346146,14.217318,14.630224,14.698572
75%,112.166268,1.238937,0.046696,0.04701458,0.473961,23.10087,23.813362,23.92819
max,129.999578,4.999891,0.099998,0.09999347,0.599988,63.146201,63.146202,63.146473


In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from xgboost import XGBClassifier

# Preprocessing for numerical data
numerical_transformer = SimpleImputer(strategy='constant')

# Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Define model
model = XGBClassifier(n_estimators=10000, learning_rate=0.05, max_depth=6, min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8, objective='binary:logistic', nthread=4, scale_pos_weight=1, seed=27)

# Bundle preprocessing and modeling code in a pipeline
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('model', model)
                     ])

# Preprocessing of training data, fit model 
clf.fit(X_train, y_train)



Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num',
                                                  SimpleImputer(strategy='constant'),
                                                  ['asset_price', 'maturity',
                                                   'rate', 'div', 'ivol',
                                                   'eu_price', 'lower_bound',
                                                   'upper_bound']),
                                                 ('cat',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='most_frequent')),
                                                                  ('onehot',
                                                                   OneHotEncoder(handle_unknown='ignore'))]),
                                                  [])])),
                ('model',
           

In [12]:
# Preprocessing of validation data, get predictions
preds = clf.predict(X_valid)

print('MSE',mean_squared_error(y_valid, preds))

print('MAE:', mean_absolute_error(y_valid, preds))

MSE 0.10166616781402774
MAE: 0.10166616781402774


In [14]:
preds_full=clf.predict(X_full)
preds_full

array([0., 1., 1., ..., 1., 0., 1.])

In [15]:
#create a dataframe with the predicted values and write it to csv   
predicted_df=df.copy()
predicted_df.drop(['normalized_am_price'], axis=1, inplace=True)
predicted_df['predicted_target']=preds_full
predicted_df['predicted_am_price']=predicted_df['predicted_target']*(predicted_df['upper_bound']-predicted_df['lower_bound'])+predicted_df['lower_bound']
predicted_df.to_csv('/Users/nazymazimbayev/Documents/git-projects/dataoptions/class_full_predicted_dataoptions.csv', index=False)

In [16]:
print('MSE',mean_squared_error(predicted_df['am_price'], predicted_df['predicted_am_price']))

MSE 0.0004104461427478377


In [None]:
#calculation of the neural net time 
X_sample = X_full.iloc[0:1].copy()


import timeit
def test_nn():
    return clf.predict(X_sample)

loop = 1000

result_nn = timeit.timeit('test_nn()', globals=globals(), number=loop)
print(result_nn, 'seconds per loop times of test function')
print(result_nn/loop, 'seconds per each clf.predict()')

3.019512332999966 seconds per loop times of test function
0.003019512332999966 seconds per each clf.predict()


In [None]:
#function to calculate binomial option price

import numpy as np

def binomial_put(S, K, T, R, div, sig, n):
    
    h = T/n
    Rinv = np.exp(-R*h)
    a = np.exp((R-div)*h)
    b2 = (a**2)*(np.exp(h*sig**2)-1)
    tmp = a**2 + b2 + 1
    u = (tmp + (tmp**2 - 4*a**2)**0.5)/(2*a)
    d = 1/u
    p = (a-d)/(u-d)
    q = 1-p
    pp = Rinv*p
    qq = Rinv*q
    x = 2*n + 1
    
    s = np.zeros(x)
    s[n] = S
    
    for j in range(1,n+1):
        s[n+j] = s[n-1+j]*u
        s[n-j] = s[n+1-j]*d
        
    v = np.zeros(x)
    
    for j in range(1,x+2,2):
        v[j-1] = max(K-s[j-1],0)
        
    for i in range(n-1,0,-1):
        for j in range(-i,i+2,2):
            j1 = j+n+1
            v[j1-1] = max(K-s[j1-1],pp*v[j1]+qq*v[j1-2])
    
    y=pp*v[n+1]+qq*v[n-1]
    
    return y

In [None]:
#parameters for the binomial put option price calcualtion , similar to X_full.iloc[0:1]

S = 91.063192
K = 100.0
T = 0.64276
R = 0.073981	
div = 0.069325	
sig = 0.312864
n=250


result = binomial_put(S, K, T, R, div, sig, n)


In [None]:
#binomial option price calcualtion time
def test_bt():
    return binomial_put(S, K, T, R, div, sig, n)

loop = 1000

result_bt = timeit.timeit('test_bt()', globals=globals(), number=loop)
print(result_bt, 'seconds per loop times of test function')
print(result_bt/loop, 'seconds per each binomial_put')

21.317060082999888 seconds per loop times of test function
0.021317060082999887 seconds per each binomial_put


In [None]:
#result, which shows how much faster the neural net is than the binomial tree
print(result_bt/result_nn, 'times nn faster than bt')

NameError: name 'result_bt' is not defined

1276