<a href="https://colab.research.google.com/github/nazike/dataoptions/blob/main/OptionsAnalysisPricing191021.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:

df = pd.read_csv('/Users/nazymazimbayev/Desktop/VS Code projects/dataoptions/data.csv')



In [None]:
# Read the data
X_full = pd.read_csv('data.csv')
columns = ['asset_price', 'maturity', 'rate', 'div', 'ivol', 'eu_price', 'am_price'] 
X_full.columns = columns
X_full['target'] = (X_full['am_price']-X_full['eu_price']).copy()


# Remove rows with missing target, separate target from predictors
X_full.dropna(axis=0, subset=['target'], inplace=True)
y = X_full.target
y_amer = X_full['am_price']
X_full.drop(['am_price', 'target'], axis=1, inplace=True)


# Break off validation set from training data
X_train_full, X_valid_full, y_train, y_valid = train_test_split(X_full, y, 
                                                                train_size=0.8, test_size=0.2,
                                                                random_state=0)

# Select categorical columns with relatively low cardinality 
categorical_cols = [cname for cname in X_train_full.columns if
                    X_train_full[cname].nunique() < 10 and 
                    X_train_full[cname].dtype == "object"]

# Select numerical columns
numerical_cols = [cname for cname in X_train_full.columns if 
                X_train_full[cname].dtype in ['int64', 'float64']]

# Keep selected columns only
my_cols = categorical_cols + numerical_cols
X_train = X_train_full[my_cols].copy()
X_valid = X_valid_full[my_cols].copy()



In [None]:
X_train.head()

Unnamed: 0,asset_price,maturity,rate,div,ivol,eu_price
3972,116.025713,0.820349,0.032117,0.081244,0.562491,15.587932
5932,122.579343,0.374312,0.094859,0.0,0.41172,2.499845
4787,114.890931,0.374259,0.016274,0.087227,0.355693,4.400367
2380,78.600814,3.123196,0.020283,0.006259,0.439053,34.982935
675,97.571564,0.929457,0.05167,0.073215,0.528091,20.883309


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor

# Preprocessing for numerical data
numerical_transformer = SimpleImputer(strategy='constant')

# Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Define model
model = XGBRegressor(n_estimators=5000, random_state=0, learning_rate=0.07, n_jobs=4)

# Bundle preprocessing and modeling code in a pipeline
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('model', model)
                     ])

# Preprocessing of training data, fit model 
clf.fit(X_train, y_train)

# Preprocessing of validation data, get predictions
preds = clf.predict(X_valid)

print('MAE:', mean_absolute_error(y_valid, preds))

MAE: 0.0915285808521583


In [None]:
# Making final price as a sum of max(0,target) and eu_price 

y_comp = np.maximum(np.zeros(len(preds)),preds) + X_valid['eu_price']
y_valid_amer = y_amer[y_comp.index]
data_error = y_comp - y_valid_amer
mean_error = data_error.mean() / y_comp.mean()
print(100*mean_error, ' %')

0.05545840069168395  %


In [None]:
# Writing data to CSV 
tmp = [X_valid, y_valid_amer, y_comp]
df = pd.concat(tmp, axis = 1)
df.columns = ['f1_asset_price',	'f2_maturity',	'f3_rate',	'f4_div',	'f5_ivol',	'f6_eu_price', 'benchmark_am_price', 'predicted_am_price']
df.to_csv('/content/drive/MyDrive/data_research/prediciton_valid.csv', index = False)

In [None]:
# Sample vector (X_sample) for prediction

X_sample = X_valid[:1]

X_sample['asset_price'] = 71.016737
X_sample['maturity'] = 2.136379
X_sample['rate'] = 0.024927
X_sample['div'] = 0.0
X_sample['ivol'] = 0.216758
X_sample['eu_price'] = 26.33074

X_sample

Unnamed: 0,asset_price,maturity,rate,div,ivol,eu_price
2932,71.016737,2.136379,0.024927,0.0,0.216758,26.33074


In [None]:
# Time to execute calculation
y_predicted = clf.predict(X_sample)

In [None]:
y_predicted

array([2.7922006], dtype=float32)

In [None]:
import timeit
def test_nn():
    return clf.predict(X_sample)

loop = 1000

result = timeit.timeit('test_nn()', globals=globals(), number=loop)
print(result, 'seconds per loop times of test function')
print(result/loop, 'seconds per each clf.predict()')

2.4242466179999838 seconds per loop times of test function
0.0024242466179999838 seconds per each clf.predict()


In [None]:
import numpy as np

def binomial_put(S, K, T, R, div, sig, n):
    
    h = T/n
    Rinv = np.exp(-R*h)
    a = np.exp((R-div)*h)
    b2 = (a**2)*(np.exp(h*sig**2)-1)
    tmp = a**2 + b2 + 1
    u = (tmp + (tmp**2 - 4*a**2)**0.5)/(2*a)
    d = 1/u
    p = (a-d)/(u-d)
    q = 1-p
    pp = Rinv*p
    qq = Rinv*q
    x = 2*n + 1
    
    s = np.zeros(x)
    s[n] = S
    
    for j in range(1,n+1):
        s[n+j] = s[n-1+j]*u
        s[n-j] = s[n+1-j]*d
        
    v = np.zeros(x)
    
    for j in range(1,x+2,2):
        v[j-1] = max(K-s[j-1],0)
        
    for i in range(n-1,0,-1):
        for j in range(-i,i+2,2):
            j1 = j+n+1
            v[j1-1] = max(K-s[j1-1],pp*v[j1]+qq*v[j1-2])
    
    y=pp*v[n+1]+qq*v[n-1]
    
    return y

In [None]:
S = 71.016737
K = 100.0
T = 2.136379
R = 0.024927
div = 0.0
sig = 0.216758
n=250


result = binomial_put(S, K, T, R, div, sig, n)


In [None]:
def test_bt():
    return binomial_put(S, K, T, R, div, sig, n)

loop = 1000

result = timeit.timeit('test_bt()', globals=globals(), number=loop)
print(result, 'seconds per loop times of test function')
print(result/loop, 'seconds per each binomial_put')

41.63978371899998 seconds per loop times of test function
0.04163978371899998 seconds per each binomial_put
