# Load modules and external files

You need to import four python scripts for implied volatility calibration :
- *newton.py*
- *BSImplVol.py*
- *BS.py*
- *Bisect.py*

In [None]:
import pandas as pd
import numpy as np
import scipy.integrate as integrate
from scipy import interpolate
import math
import matplotlib.pyplot as plt
import tensorflow as tf
from io import StringIO

plt.style.use('ggplot')
from mpl_toolkits import mplot3d
from mpl_toolkits.mplot3d import Axes3D
import sklearn as skl
from sklearn import preprocessing
import importlib
import scipy.stats as st
import numpy as np
import math
import scipy.stats as st
import matplotlib.ticker as mtick
import time
from scipy import interpolate

In [None]:
%load_ext autoreload
%autoreload 2


In [None]:
#Load python files to google colaborative environment
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [None]:
from BS import bsformula
from Bisect import bisect
from newton import newton
from BSImplVol import bsimpvol

# Load data with google colab 

You will find in github repository six days of data.
For each day you need to load six csv files :
- *underlying.csv* for the stock value.
- *locvol.csv* for the local volatility calibrated with tree pricing and tikhonov volatility (see Crépey (2002)).
- *dividend.csv* for dividend extracted from put-call parity.
- *discount.csv* for zero-coupon curve. 
- *dataTrain.csv* for prices and/or implied volatility used in training set.
- *dataTest.csv* for prices and/or implied volatility used in testing set.

In [None]:
#Read csv files as dataFrames
zeroCouponCurve = pd.read_csv("discount.csv",decimal=".").apply(pd.to_numeric)
dividendCurve = pd.read_csv("dividend.csv",decimal=".").apply(pd.to_numeric)
trainingData = pd.read_csv("dataTrain.csv",decimal=".").apply(pd.to_numeric)
testingData = pd.read_csv("dataTest.csv",decimal=".").apply(pd.to_numeric)
underlyingNative = pd.read_csv("underlying.csv",decimal=".").apply(pd.to_numeric)
localVolatilityNative = pd.read_csv("locvol.csv",decimal=".").apply(pd.to_numeric)


In [None]:
def parseDatFile(fileName):
  s = open(fileName).read()
  
  defPos=s.find("[option]")
  finPos=s.find("[dividend]")
  df = pd.read_csv(StringIO(s[defPos:finPos].replace("\n\n",";").replace("\n",",").replace(";",";\n")),decimal=".", sep=",", header=None)
  
  matC = pd.to_numeric(df[1].str.split(pat="= ", expand=True)[1]).round(3)
  strikeC = pd.to_numeric(df[3].str.split(pat="= ", expand=True)[1]).round()
  priceC = pd.to_numeric(df[4].str.replace(";","").str.split(pat="= ", expand=True)[1])
  typeC = pd.to_numeric(df[2].str.split(pat="= ", expand=True)[1])
  formattedDat = pd.DataFrame([matC, strikeC, priceC, typeC], index = ["Maturity", "Strike", "Price", "Type"]).transpose().astype({"Type":"int32"})
  
  filteredDat = formattedDat[formattedDat["Type"]==2]
  return filteredDat
filteredDat = parseDatFile("7_8_2001__filterdax.dat")

#### From Dat files

In [None]:
def parseModelParamDatFile(fileName):
    s = open(fileName).read()
    
    parts = s.split("\n\n")
    number1 = parts[0]
    repo = parts[1]
    dates = parts[2]
    interestRates = parts[3]
    dividendRates = parts[4]
    
    number2 = parts[5]
    number3 = parts[6]
    
    n = parts[7]
    sigmaRef = parts[8]
    h = parts[9]
    sigmaMax = parts[10]
    sigmaMin = parts[11]
    
    number4 = parts[12]
    underlying = parts[13]
    
    def splitRow(row):
        return np.array(row.split("\t")).astype(np.float)
    
    tree = ("\n".join(parts[14:])).split("\n")
    tree.remove("")
    formattedTree = np.reshape(np.array(list(map(splitRow, tree))), (-1,3))
    
    
    return pd.DataFrame(formattedTree, columns = ["date", "stock(%)", "vol"])
    

In [None]:
def parseImpliedVolDatFile(fileName):
    s = open(fileName).read()
    
    parts = s.split("\n\n")
    
    def splitRow(row):
        return np.array(row.split("\t")).astype(np.float)
    
    testGrid = ("\n".join(parts)).split("\n")
    testGrid.remove("")
    formattedTestGrid = np.reshape(np.array(list(map(splitRow, testGrid))), (-1,4))
    
    return pd.DataFrame(formattedTestGrid, columns=["Strike","Maturity","Implied vol.","Option price"])

In [None]:
def parseCalibrOutDatFile(fileName):
    s = open(fileName).read()
    
    parts = s.split("\n")
    
    def splitRow(row):
        return np.array(row.split("\t"))
    def filterRow(row):
        return len(row)==10
    def formatRow(row):
        return row.astype(np.float)
    
    #tree = ("\n".join(parts)).split("\n")
    #tree.remove("")
    filteredTrainingData = list(filter(filterRow , 
                                       list(map(splitRow, parts))))
    formattedTrainingData = np.array(list(map(formatRow, filteredTrainingData)))
    
    colNames = ["Active", "Option\ntype", "Maturity", "Strike", "Moneyness", 
                "Option\nprice", "Implied\nvol.", "Calibrated\nvol.","Market vol. -\nCalibrated vol."]
    dfTrainingData = pd.DataFrame(formattedTrainingData[:,:-1], columns = colNames)
    dfTrainingData["Active"] = dfTrainingData["Active"].astype(np.int) 
    dfTrainingData["Option\ntype"] = dfTrainingData["Option\ntype"].astype(np.int) 
    return dfTrainingData

In [None]:
def parseDatFiles(fileName):
    s = open(fileName).read()
    
    posUnderlying = s.find("[underlying]")
    posZeroCoupon = s.find("[zero_coupon]")
    posOption = s.find("[option]")
    posDividend = s.find("[dividend]")
    
    underlyingString = s[posUnderlying:posZeroCoupon]
    zeroCouponString = s[posZeroCoupon:posOption]
    optionString = s[posOption:posDividend]
    dividendString = s[posDividend:-2] 
    
    def extractData(subStr, tag):
        parts = subStr.replace(tag + "\n", "").split("\n\n")
        try :
            parts.remove("")
        except ValueError:
            #Not found, we continue
            pass
        
        def parseRow(row):
            return (int(row.split(" = ")[1]) if (row.split(" = ")[0] == "type") else float(row.split(" = ")[1]))
        
        def splitRow(row):
            table = np.array(row.split("\n"))
            parseTable = np.array(list(map(parseRow, table)))
            return np.reshape(parseTable, (-1))
        
        return np.array(list(map(splitRow, parts)))
    
    
    underlying = pd.DataFrame(extractData(underlyingString, "[underlying]"), 
                              columns=["S","Repo"])
    zeroCoupon = pd.DataFrame(extractData(zeroCouponString, "[zero_coupon] "), 
                              columns=["Maturity","Price"])
    option = pd.DataFrame(extractData(optionString, "[option] "), 
                          columns=["Maturity","Type", "Price", "Strike"])
    option["Type"] = option["Type"].astype(np.int) 
    dividend = pd.DataFrame(extractData(dividendString, "[dividend] "), 
                            columns=["Maturity","Amount"])
    return underlying, zeroCoupon, dividend, option

In [None]:
localVolatilityNative = parseModelParamDatFile("./esx/8_8_2001__filterdax.dat.modelparam.dat")
#localVolatilityNative = parseModelParamDatFile("./esx/30_11_1999__filteresx.dat.modelparam.dat")

In [None]:
testingData = parseImpliedVolDatFile("./esx/8_8_2001__filterdax.dat.impliedvol.dat")
#testingData = parseImpliedVolDatFile("./esx/30_11_1999__filteresx.dat.impliedvol.dat")

In [None]:
trainingData = parseCalibrOutDatFile("./esx/8_8_2001__filterdax.dat.calibr.out.dat")
#trainingData = parseCalibrOutDatFile("./esx/30_11_1999__filteresx.dat.calibr.out.dat")

In [None]:
underlyingNative, zeroCouponCurve, dividendCurve, filteredDat = parseDatFiles("./esx/8_8_2001__filterdax.dat")
#underlyingNative, zeroCouponCurve, dividendCurve, filteredDat = parseDatFiles("./esx/30_11_1999__filteresx.dat")

#### Cleaning datasets

In [None]:
#Format dividend curve as a Pandas series
dividendDf = dividendCurve.set_index('Maturity').sort_index()
dividendDf.loc[1.0] = 0.0
dividendDf.sort_index(inplace=True)
dividendDf.tail()

In [None]:
#Format zero coupon curve as a Pandas series
rateCurveDf = zeroCouponCurve.set_index('Maturity').sort_index()
# keep only rates expriring before 1 year
rateCurveDf = rateCurveDf.loc[rateCurveDf.index <= 1.01]
rateCurveDf.head()

In [None]:
localVolatilityNative.head()

In [None]:
#Format local volatility
localVolatility = localVolatilityNative.dropna()
localVolatility["Strike"] = localVolatility["stock(%)"] * underlyingNative["S"].values
localVolatility["date"] = localVolatility["date"].round(decimals=3)
renameDict = {"date": "Maturity", 
              "vol" : "LocalVolatility", 
              "stock(%)" : "StrikePercentage"}
localVolatility = localVolatility.rename(columns=renameDict).set_index(["Strike", "Maturity"])
localVolatility.head()

In [None]:
localVolatility.head()

In [None]:
underlyingNative.head()

In [None]:
testingData.head()

In [None]:
#Treatment for training data
filteredTestingData = testingData[(testingData["Implied vol."] > 0) * (testingData["Option price"] > 0)]
filteredTestingData["Maturity"] = filteredTestingData["Maturity"].round(decimals=3)
renameDict = {"Implied vol.": "ImpliedVol", 
              "Option price" : "Price", 
              "Implied delta" : "ImpliedDelta", 
              "Implied gamma" : "ImpliedGamma",
              "Implied theta" : "ImpliedTheta",
              "Local delta" : "LocalDelta",
              "Local gamma" : "LocalGamma"}
formattedTestingData = filteredTestingData.rename(columns=renameDict).set_index(["Strike", "Maturity"])["ImpliedVol"]
formattedTestingData.head()

In [None]:
trainingData.head()

In [None]:
#Treatment for testing data
filteredTrainingData = trainingData[(trainingData["Calibrated\nvol."] > 0) * (trainingData["Option\nprice"] > 0) * (trainingData["Option\ntype"] == 2)]
filteredTrainingData["Maturity"] = filteredTrainingData["Maturity"].round(decimals=3)
renameDict = {"Option\ntype" : "OptionType", 
              "Option\nprice" : "Price", 
              "Calibrated\nvol." : "ImpliedVol",#"LocalImpliedVol", 
              "Implied\nvol." : "LocalImpliedVol"}#"ImpliedVol"}
formattedTrainingData = filteredTrainingData.drop(["Active", "Market vol. -\nCalibrated vol."],axis=1).rename(columns=renameDict).set_index(["Strike","Maturity"])
formattedTrainingData.head()

In [None]:
formattedTrainingData.shape

# Formatting data

### Boostsrapping Rate Curve


- For bootstrapping short rate $r$ and dividend rate $q$, we assume piecewise constant short rate for risk free rate and dividend i.e. 
$\exp{(-\int_{0}^{T} r_t d_t)} = \exp{(-\sum_{i} r_i h)}$ and $\exp{(\int_{0}^{T} q_t d_t)} = \exp{(\sum_{i} q_i h)}$.
- $\forall i \in \{0,..,N\}$ with $ t_0 = 0$ and $t_N = T$, we have that $\frac{\log{B(0,t_{i+1})} - \log{B(0,t_i)}}{h} = r_i$ with $B(0,T_i)$ the price of a bond expiring at time $t_i$. 
- For dividend, we just to substitute $B(0,T_i)$ with with spot action price plus dividend cash flow received until time $T_i$ i.e. $S_{t_0} + \sum\limits_i Div_{t_i}$.
- Then we linearly interpolate $r$ and $q$.
-  Linear interpolation is also used for integrals $\int_{0}^{T} q_t d_t$ and $\int_{0}^{T} r_t d_t$ in order to obtain discount factor or dividend factor. 

In [None]:
#Compute the integral and return the linear interpolation function 
def interpIntegral(curve):
    #curve is piece-wise constant
    timeDelta = curve.index.to_series().diff().fillna(0)
    timeStep = np.linspace(0,0.99,100)
    integralStepWise = (curve * timeDelta).cumsum()
    integralStepWise.loc[0] = 0.0
    integralStepWise.sort_index(inplace=True)
    integralSpline = interpolate.interp1d(integralStepWise.index,
                                          integralStepWise, 
                                          fill_value= 'extrapolate', 
                                          kind ='linear')
    return pd.Series(integralSpline(timeStep),index=timeStep), integralSpline

def bootstrapZeroCoupon(curvePrice, name):
    #Bootstrap short rate curve
    def computeShortRate(curve) :
      shortRateList = [] 
      for i in range(curve.size):
        if i == 0 :
          shortRateList.append(-(np.log(curve.iloc[i]))/(curve.index[i]))
        else : 
          shortRateList.append(-(np.log(curve.iloc[i])-np.log(curve.iloc[i-1]))/(curve.index[i]-curve.index[i-1]))
      return pd.Series(shortRateList,index = curve.index)
    #For t=0 we take the first available point to ensure right continuity
    riskFreeCurve = computeShortRate(curvePrice)
    riskFreeCurve.loc[0.00] = riskFreeCurve.iloc[0]
    riskFreeCurve = riskFreeCurve.sort_index()

    #Bootstrap yield curve
    def zeroYield(x):
      if(float(x.name) < 1):
        return (1/x - 1)/float(x.name)
      else:
        return (x**(-1/float(x.name)) - 1)
    yieldCurve = curvePrice.apply(zeroYield, axis = 1)
    yieldCurve.loc[0.00] = yieldCurve.iloc[0]
    yieldCurve = yieldCurve.sort_index()

    plt.plot(riskFreeCurve, label = "Short rate")

    #Interpolate short rate curve and yield curve
    timeStep = np.linspace(0,0.99,100)
    riskCurvespline = interpolate.interp1d(riskFreeCurve.index,
                                           riskFreeCurve,#riskFreeCurve[name],
                                           fill_value= 'extrapolate',
                                           kind ='next')
    interpolatedCurve = pd.Series(riskCurvespline(timeStep),index=timeStep)
    plt.plot(interpolatedCurve, label="Interpolated short rate")
    plt.legend()
    plt.show()

    plt.plot(yieldCurve, label = "Yield curve")
    yieldCurvespline = interpolate.interp1d(yieldCurve.index,
                                            yieldCurve['Price'],
                                            fill_value= 'extrapolate',
                                            kind ='next')
    interpolatedCurve = pd.Series(yieldCurvespline(timeStep),index=timeStep)
    plt.plot(interpolatedCurve, label = "Interpolated Yield curve")
    plt.legend()
    plt.show()
    
    #Integrate short rate
    interpolatedIntegral, riskFreeIntegral = interpIntegral(riskFreeCurve)
    plt.plot(interpolatedIntegral)
    plt.show()

    return riskFreeCurve, riskCurvespline, yieldCurve, yieldCurvespline, interpolatedIntegral, riskFreeIntegral


In [None]:
riskFreeCurve, riskCurvespline, yieldCurve, yieldCurvespline, interpolatedIntegral, riskFreeIntegral = bootstrapZeroCoupon(rateCurveDf, "Short rate")

In [None]:
riskFreeCurve

In [None]:
interpolatedIntegral

### Boostraping dividend curve

In [None]:
def bootstrapDividend(curvePrice, underlying, name):
    #Compute cumulative sum of dividend plus spot price
    priceEvolution = underlying['S'].iloc[0] - curvePrice['Amount'].cumsum()
    priceEvolution.loc[0] = underlying['S'].iloc[0]
    priceEvolution.sort_index(inplace=True)

    #Bootstrap short rate for dividend
    def computeShortRate(curve) :
      shortRateList = [] 
      for i in range(curve.size):
        if i == 0 :
          shortRateList.append(-(np.log(curve.iloc[i+1])-np.log(curve.iloc[i]))/(curve.index[i+1]-curve.index[i]))
        else : 
          shortRateList.append(-(np.log(curve.iloc[i])-np.log(curve.iloc[i-1]))/(curve.index[i]-curve.index[i-1]))
      return pd.Series(shortRateList,index = curve.index).dropna()
    logReturnDividendDf = computeShortRate(priceEvolution)

    #Dividend yield curve
    def divYield(x):
      return ((priceEvolution[x]/priceEvolution.iloc[0])**(1/float(x)) - 1) #np.log(priceEvolution[x]/priceEvolution.iloc[0])/x
    dividendYield = logReturnDividendDf.index.to_series().tail(-1).apply(divYield)
    dividendYield.loc[0.00] = dividendYield.iloc[0]
    dividendYield = dividendYield.sort_index()

    plt.plot(logReturnDividendDf, label = "Short rate")

    #Interpolate short rate curve and yield curve
    timeStep = np.linspace(0,0.99,100)
    logReturnDividendSpline = interpolate.interp1d(logReturnDividendDf.index,
                                                   logReturnDividendDf,#logReturnDividendDf[name],
                                                   fill_value= 'extrapolate',
                                                   kind ='next')
    interpolatedCurve = pd.Series(logReturnDividendSpline(timeStep),index=timeStep)
    plt.plot(interpolatedCurve, label="Interpolated short rate")
    plt.legend()
    plt.show()

    plt.plot(dividendYield, label = "Yield curve")
    yieldCurvespline = interpolate.interp1d(dividendYield.index,
                                            dividendYield.values,
                                            fill_value= 'extrapolate',
                                            kind ='next')
    interpolatedCurve = pd.Series(yieldCurvespline(timeStep),index=timeStep)
    plt.plot(interpolatedCurve, label = "Interpolated Yield curve")
    plt.legend()
    plt.show()
    
    #Integrate short rate
    interpolatedIntegral, logReturnDividendIntegral = interpIntegral(logReturnDividendDf)#logReturnDividendDf[name])
    plt.plot(interpolatedIntegral)
    plt.show()

    return logReturnDividendDf, logReturnDividendSpline, dividendYield, yieldCurvespline, interpolatedIntegral, logReturnDividendIntegral

In [None]:
spreadDividend, divSpline, yieldDividend, divYieldSpline, interpolatedIntegral, divSpreadIntegral  = bootstrapDividend(dividendDf, underlyingNative, "Spread")

In [None]:
spreadDividend

In [None]:
interpolatedIntegral

### Pricing black-scholes price

#### Change of variable

- In presence of dividend rate $d$ and risk free rate $r$ Dupire formula is :   $$\sigma^2(T,K) = 2 \frac{ \partial_T P(T,K) + (r-q)\partial_K P(T,K) + qP(T,K)}{K² \partial_{K}^2 P(T,K)}$$ 
with Strike $K$, Maturity $T$, dividend rate $q$ and risk-free rate $r$, $P$ our pricing function. 
- We apply the following change of variable : $$ w(T,k) = \exp{(\int_{0}^{T} q_t dt)} P(T,K)$$ with $K = k \exp{(\int_{0}^{T} (r_t - q_t) dt)} $.

- Then Dupire equation becomes :  $\sigma^2(T,K) = 2 \frac{ \partial_T w(T,k)}{k² \partial_{k}^2 w(T,k)}$. 
- If we learn the mapping $v$ with a neural network then we should obtain quickly by adjoint differentiation $\partial_T w$ and $\partial_{k²}^2 w$ and therefore $\sigma$.


In [None]:
#Linear interpolation combined with Nearest neighbor extrapolation
def customInterpolator(interpolatedData, newStrike, newMaturity):
  strikeRef = np.ravel(interpolatedData.index.get_level_values("Strike").values)
  maturityRef = np.ravel(interpolatedData.index.get_level_values("Maturity").values)
  xym = np.vstack((strikeRef, maturityRef)).T

  fInterpolation = interpolate.griddata(xym,
                                        interpolatedData.values.flatten(),
                                        (newStrike, newMaturity),
                                        method = 'linear',
                                        rescale=True)

  fExtrapolation =  interpolate.griddata(xym,
                                         interpolatedData.values.flatten(),
                                         (newStrike, newMaturity),
                                         method = 'nearest',
                                         rescale=True)
    
  return np.where(np.isnan(fInterpolation), fExtrapolation, fInterpolation)

In [None]:
import scipy.stats as st
#Density derivative
def dpdf(x):
    v = 1
    return -x*np.exp(-x**2/(2.0*v**2))/(v**3*np.sqrt(2.0*np.pi))
    

def generalizedGreeks(cp, s, k, rf, t, v, div, rfInt, divInt):
        """ Price an option using the Black-Scholes model.
        cp: +1/-1 for call/put
        s: initial stock price
        k: strike price
        t: expiration time
        v: volatility
        rf: risk-free rate at time t
        div: dividend at time t
        rfInt: deterministic risk-free rate integrated between 0 and t
        divInt: deterministic dividend integrated between 0 and t
        """

        d1 = (np.log(s/k)+(rfInt-divInt+0.5*v*v*t))/(v*np.sqrt(t))
        d2 = d1 - v*np.sqrt(t)
        
        Nd1 = st.norm.cdf(cp*d1)
        Nd2 = st.norm.cdf(cp*d2)

        discountFactor = np.exp(-rfInt)
        forwardFactor = np.exp(-divInt)
        avgDiv = divInt/t
        avgRf = rfInt/t

        optprice = (cp*s*forwardFactor*Nd1) - (cp*k*discountFactor*Nd2)

        delta = cp*Nd1
        vega  = s*np.sqrt(t)*st.norm.pdf(d1)
        delta_k = -s*forwardFactor*Nd1/(v*np.sqrt(t)*k) - cp*discountFactor*Nd2 + k*discountFactor*Nd2/(v*np.sqrt(t)*k)
        
        gamma_k = s*forwardFactor/((v*np.sqrt(t)*k)**2)*(Nd1*v*np.sqrt(t) + cp*dpdf(cp*d1)) - k*discountFactor/((v*np.sqrt(t)*k)**2)*(Nd2*v*np.sqrt(t) + cp*dpdf(cp*d2)) +  2.0*discountFactor*Nd2/(v*np.sqrt(t)*k)  

        dd1_dt = (avgRf-avgDiv+0.5*v*v)/(v*np.sqrt(t)) - 0.5*(np.log(s/k)+(rfInt-divInt+0.5*v*v*t))/(v*v*t**(3/2))
        dd2_dt = dd1_dt - 0.5*v/np.sqrt(t)
        delta_T = avgRf*cp*k*discountFactor*Nd2 - avgDiv*cp*s*forwardFactor*Nd1 + s*forwardFactor*Nd1*dd1_dt- k*discountFactor*Nd2*dd2_dt
        
        return optprice, delta, vega, delta_k, gamma_k, delta_T

In [None]:
S0 = underlyingNative["S"].values
#Change of variable for deterministic discount curve and dividend curve
def changeOfVariable(s,t):
  def qInterp(m):
    return divSpreadIntegral(m).astype(np.float32)
  q = qInterp(t)
  
  def rInterp(m):
    return riskFreeIntegral(m).astype(np.float32)
  r = rInterp(t)

  factorPrice = np.exp( - q )

  divSpread = q-r

  factorStrike = np.exp( divSpread )
  adjustedStrike = np.multiply(s, factorStrike)
  return adjustedStrike, factorPrice

#Change of variable for constant discount and dividend short rate 
def changeOfVariable_BS(s,t):
  
  factorPrice = np.exp( - q*t )

  divSpread = (q-r)*t

  factorStrike = np.exp( divSpread )
  adjustedStrike = np.multiply(s, factorStrike)
  return adjustedStrike, factorPrice

In [None]:
#Generate a proper dataset from implied volatility
def generateData(impliedVol,
                 S0,
                 rIntegralSpline,
                 qIntegralSpline,
                 rSpline,
                 qSpline,
                 priceDf = None,
                 spotValue = True):
  #Get grid coordinates
  if priceDf is None :
    x_train = impliedVol.index.to_frame()
    #Get implied vol by interpolating another grid
    x_train["ImpliedVol"] = impliedVol
  else :
    x_train = pd.MultiIndex.from_arrays([priceDf["Strike"], priceDf["Maturity"]], 
                                        names=('Strike', 'Maturity')).to_frame()
    #Get implied vol by interpolating another grid
    x_train["ImpliedVol"] = customInterpolator(impliedVol, 
                                               x_train["Strike"], 
                                               x_train["Maturity"])
  #Get sensitivities and prices
  isPut = True
  cp = -1 if isPut else 1
  impliedPriceFunction = lambda x : generalizedGreeks(cp, 
                                                      S0, 
                                                      x["Strike"] , 
                                                      rSpline(x["Maturity"]), 
                                                      x["Maturity"], 
                                                      x["ImpliedVol"], 
                                                      qSpline(x["Maturity"]), 
                                                      rIntegralSpline(x["Maturity"]), 
                                                      qIntegralSpline(x["Maturity"]))
  
  res = np.reshape(np.array(list(zip(x_train.apply(impliedPriceFunction,axis=1).values))),
                   (x_train.shape[0], 6))  # put greeks
  prices = res[:,0] if priceDf is None else priceDf["Price"].values
  deltas = res[:,1]
  vegas = res[:,2]
  delta_ks = res[:,3]
  gamma_ks = res[:,4]
  delta_Ts = res[:,5]
  
  #Vega for optional loss weighting
  sigmaRef = 0.25
  impliedPriceFunction = lambda x : generalizedGreeks(cp, 
                                                      S0, 
                                                      x["Strike"] , 
                                                      rSpline(x["Maturity"]), 
                                                      x["Maturity"], 
                                                      sigmaRef, 
                                                      qSpline(x["Maturity"]), 
                                                      rIntegralSpline(x["Maturity"]), 
                                                      qIntegralSpline(x["Maturity"]))
  
  res1 = np.reshape(np.array(list(zip(x_train.apply(impliedPriceFunction,axis=1).values))),
                    (x_train.shape[0], 6))  # put greeks
  
  #Get adjusted strike for the change of variables
  changedVar = changeOfVariable(x_train["Strike"],x_train["Maturity"])
  
  multiIndex = x_train["ImpliedVol"].index

  #Gather all data as a Dataframe 
  cols = ["Price", "Delta", "Vega", "Delta Strike", "Gamma Strike", 
          "Theta", "ChangedStrike", "DividendFactor", "Strike", "Maturity", "ImpliedVol", "VegaRef"]

  dfData = np.vstack((prices, deltas, vegas, delta_ks, gamma_ks, delta_Ts) + 
                     changedVar + (x_train["Strike"], x_train["Maturity"], x_train["ImpliedVol"], res1[:,2]))
  
  df = pd.DataFrame(dfData.T , columns=cols, index = multiIndex)

  #Add pricing with spot delivery
  if spotValue : 
    KAvailable = multiIndex.get_level_values("Strike").unique()
    TSpot = np.zeros_like(KAvailable)
    priceSpot = np.maximum(KAvailable- S0[0],0)
    deltaSpot = -np.sign(np.maximum(KAvailable- S0[0],0))
    gammaSpot = np.zeros_like(deltaSpot)
    vegasSpot = gammaSpot
    deltaKSpot = np.sign(np.maximum(KAvailable- S0[0],0))
    thetaSpot = 1000000 * deltaKSpot

    #Ignore implied vol for T=0
    impliedSpot = np.zeros_like(thetaSpot)

    changedVarSpot = changeOfVariable(KAvailable,TSpot)
    
    dfDataSpot = np.vstack((priceSpot, deltaSpot, vegasSpot, deltaKSpot, gammaSpot, thetaSpot) +
                          changedVarSpot + (KAvailable, TSpot, impliedSpot, vegasSpot))
    indexSpot = pd.MultiIndex.from_arrays([np.array(KAvailable), TSpot], names=('Strike', 'Maturity'))
    dfSpot = pd.DataFrame(dfDataSpot.T , columns=cols, index = indexSpot)
    df = df.append(dfSpot).sort_index()

  #Add forward logmoneyness if we want to calibrate local volatility from implied volatilities
  df["logMoneyness"] = np.log(df["ChangedStrike"] / S0[0]) 
  df["impliedTotalVariance"] = np.square(df["ImpliedVol"]) #*  df["Maturity"]

  return df.sort_index()

In [None]:
S0[0]

In [None]:
formattedTrainingData.sort_index()

In [None]:
testingData.tail()

In [None]:
testingDataSet = generateData(formattedTestingData,
                              S0,
                              riskFreeIntegral,
                              divSpreadIntegral,
                              riskCurvespline,
                              divSpline)
testingDataSet.tail()

In [None]:
#Checking call put parity
maturity = testingData.iloc[-4]["Maturity"]
strike = testingData.iloc[-4]["Strike"]
(S0 * np.exp(-divSpreadIntegral(maturity))  - np.exp(-riskFreeIntegral(maturity)) * strike) 

In [None]:
#Put call parity
testingData.iloc[-4]["Option price"] - testingDataSet.loc[(testingData.iloc[-4]["Strike"],round(testingData.iloc[-4]["Maturity"],3))]["Price"]

In [None]:
#Use all prices in dat files
trainingDataSet = generateData(formattedTrainingData["ImpliedVol"],
                               S0,
                               riskFreeIntegral,
                               divSpreadIntegral,
                               riskCurvespline,
                               divSpline,
                               priceDf = filteredDat)
trainingDataSet.tail()

In [None]:
#Use same prices as those for tikhonov calibration
trainingDataSet = generateData(formattedTrainingData["ImpliedVol"], 
                               S0, 
                               riskFreeIntegral, 
                               divSpreadIntegral, 
                               riskCurvespline, 
                               divSpline,
                               priceDf = formattedTrainingData.reset_index())
trainingDataSet.tail()

In [None]:
filteredTrainingData.tail()

In [None]:
filteredTrainingData[filteredTrainingData["Strike"]==5900]

In [None]:
trainingDataSet[trainingDataSet["Strike"]==5900]

In [None]:
filteredTrainingData[filteredTrainingData["Strike"]==4000]

In [None]:
trainingDataSet[trainingDataSet["Strike"]==4000]

In [None]:
localVolatility.head()

In [None]:
testingData.tail()

In [None]:
#Get local volatility from Crépey (2002) by nearest neighbour interpolation
def interpolatedLocalVolatility(localVol, priceGrid):
    
    strikePrice = priceGrid.index.get_level_values("Strike").values.flatten()
    maturityPrice = priceGrid.index.get_level_values("Maturity").values.flatten()
    coordinates = customInterpolator(localVol["LocalVolatility"], strikePrice, maturityPrice)
 

    return pd.Series(coordinates, index = priceGrid.index)

trainingDataSet["locvol"] = interpolatedLocalVolatility(localVolatility, trainingDataSet["Price"])
testingDataSet["locvol"] = interpolatedLocalVolatility(localVolatility, testingDataSet["Price"])

In [None]:
localVolatility[localVolatility.index.get_level_values("Maturity") <= 0.01]

In [None]:
dataSet = trainingDataSet #Training set
dataSetTest = testingDataSet #Testing set

In [None]:
#Data for gaussian processes
tGrid = np.linspace(0, 1, 101)
exportedRiskFreeIntegral = riskFreeIntegral(tGrid),
exportedDivSpreadIntegral = divSpreadIntegral(tGrid)
exportedRRiskCurvespline = riskCurvespline(tGrid),
exportedDivSpline = divSpline(tGrid)
dfCurve = pd.DataFrame(np.vstack([exportedRiskFreeIntegral, exportedDivSpreadIntegral, exportedRRiskCurvespline, exportedDivSpline]).T,
                       columns=["riskFreeIntegral","divSpreadIntegral","riskCurvespline","divSpline"], 
                       index = tGrid)
#Discount and dividend curve
dfCurve.to_csv("dfCurve.csv")
#Training dataset
dataSet.to_csv("trainingDataSet.csv")
#Testing dataset
dataSetTest.to_csv("testingDataSet.csv")

In [None]:
dataSetTest.head()

# Neural network 

## Scaling methods

Use min-max of scaling strike between 0 et 1 for improving stability of neural network training. 

In [None]:
def transformCustomMinMax(df, scaler):
  return pd.DataFrame(scaler.transform(df),
                      index = df.index, 
                      columns = df.columns)
#Reverse operation min-max scaling
def inverseTransformMinMax(df, scaler):
  return pd.DataFrame(scaler.inverse_transform(df),
                      index = df.index, 
                      columns = df.columns)
#Same thing but for a particular column
def inverseTransformColumnMinMax(originalDf, scaler, column):
  colIndex = originalDf.columns.get_loc(column.name)
  maxCol = scaler.data_max_[colIndex]
  minCol = scaler.data_min_[colIndex]
  return pd.Series(minCol + (maxCol - minCol) * column, index = column.index).rename(column.name)  
#Reverse transform of min-max scaling but for greeks   
def inverseTransformColumnGreeksMinMax(originalDf, 
                                       scaler,
                                       columnDerivative,
                                       columnFunctionName,
                                       columnVariableName,
                                       order = 1):
  colFunctionIndex = originalDf.columns.get_loc(columnFunctionName)
  maxColFunction = scaler.data_max_[colFunctionIndex]
  minColFunction = scaler.data_min_[colFunctionIndex]
  scaleFunction = (maxColFunction - minColFunction)
  
  colVariableIndex = originalDf.columns.get_loc(columnVariableName)
  maxColVariable = scaler.data_max_[colVariableIndex]
  minColVariable = scaler.data_min_[colVariableIndex]
  scaleVariable = (maxColVariable - minColVariable) ** order

  return pd.Series(scaleFunction * columnDerivative / scaleVariable , 
                   index = columnDerivative.index).rename(columnDerivative.name) 

In [None]:
#Tools functions for min-max scaling
def transformCustomId(df, scaler):
  return pd.DataFrame(df,
                      index = df.index, 
                      columns = df.columns)
def inverseTransformId(df, scaler):
  return pd.DataFrame(df,
                      index = df.index, 
                      columns = df.columns)
def inverseTransformColumnId(originalDf, scaler, column):
  return pd.Series(column, index = column.index).rename(column.name)  

def inverseTransformColumnGreeksId(originalDf, scaler, 
                                 columnDerivative, 
                                 columnFunctionName, 
                                 columnVariableName,
                                 order = 1):
  return pd.Series(columnDerivative , index = columnDerivative.index).rename(columnDerivative.name)


In [None]:
activateScaling = False
transformCustom = transformCustomMinMax if activateScaling else transformCustomId
inverseTransform = inverseTransformMinMax if activateScaling else inverseTransformId
inverseTransformColumn = inverseTransformColumnMinMax if activateScaling else inverseTransformColumnId
inverseTransformColumnGreeks = inverseTransformColumnGreeksMinMax if activateScaling else inverseTransformColumnGreeksId

In [None]:
scaler = skl.preprocessing.MinMaxScaler(feature_range=(0, 1))
scaler.fit(dataSet)
scaledDataSet = transformCustom(dataSet, scaler)
scaledDataSetTest = transformCustom(dataSetTest, scaler)

In [None]:
scaledDataSet.head()

In [None]:
#Search strike for ATM option
midS0 = dataSet[dataSet.index.get_level_values("Strike") >= S0[0]].index.get_level_values("Strike").min()

## Plot functions

In [None]:
#Plot loss for each epoch 
def plotEpochLoss(lossSerie):
  fig = plt.figure(figsize=(20,10))
  ax = fig.gca()
  
  ax.plot(lossSerie , "-", color="black")
  ax.set_xlabel("Epoch number", fontsize=18, labelpad=20)
  ax.set_ylabel("Logarithmic Loss", fontsize=18, labelpad=20)
  ax.set_title("Training Loss evolution", fontsize=24)
  ax.tick_params(labelsize=16)
  ax.set_facecolor('white')
  plt.show()
  return

In [None]:
KMin = 0.7 * S0[0]
KMax = 1.3 * S0[0]


In [None]:
#Plot a surface as a superposition of curves
def plotMultipleCurve(data,
                      Title = 'True Price Surface',
                      yMin = KMin,
                      yMax = KMax,
                      zAsPercent = False):
  

  dataCurve = data[(data.index.get_level_values("Strike") <= yMax) * (data.index.get_level_values("Strike") >= yMin)]

  fig = plt.figure(figsize=(20,10))
  ax = fig.gca()

  for t in np.linspace(0,0.8,9) :
    k = dataCurve[dataCurve.index.get_level_values("Maturity") >= t].index.get_level_values("Maturity").unique().min()
    curveK = dataCurve[dataCurve.index.get_level_values("Maturity")==k]
    dataSerie = pd.Series(curveK.values * (100 if zAsPercent else 1) ,
                          index = curveK.index.get_level_values("Strike"))
    ax.plot(dataSerie , "--+", label=str(k))
  ax.legend()  
  ax.set_xlabel(data.index.names[0], fontsize=18, labelpad=20)
  ax.set_ylabel(data.name, fontsize=18, labelpad=20)
  if zAsPercent :
    ax.yaxis.set_major_formatter(mtick.PercentFormatter())
  ax.set_title(Title, fontsize=24)
  ax.tick_params(labelsize=16)
  ax.set_facecolor('white')
  plt.show()
  return

In [None]:
plotMultipleCurve(localVolatility["LocalVolatility"][localVolatility.index.get_level_values("Maturity")>0.01],
                  Title = 'Local Volatility Surface',
                  yMin=0.7*S0[0],
                  yMax=1.4*S0[0], 
                  zAsPercent=True)

In [None]:
#Plotting function for surface
#xTitle : title for x axis
#yTitle : title for y axis
#zTitle : title for z axis
#Title : plot title
#az : azimuth i.e. angle of view for surface
#yMin : minimum value for y axis
#yMax : maximum value for y axis
#zAsPercent : boolean, if true format zaxis as percentage 
def plotGridCustom(coordinates, zValue,
                   xTitle = "Maturity",
                   yTitle = "Strike",
                   zTitle = "Price",
                   Title = 'True Price Surface', 
                   az=320, 
                   yMin = KMin,
                   yMax = KMax,
                   zAsPercent = False):
  y = coordinates[:,0]
  filteredValue = (y > yMin) & (y < yMax)
  x = coordinates[:,1][filteredValue]
  y = coordinates[:,0][filteredValue]
  z = zValue[filteredValue].flatten()
  
  fig = plt.figure(figsize=(20,10))
  ax = fig.gca(projection='3d')
  
  ax.set_xlabel(xTitle, fontsize=18, labelpad=20)
  ax.set_ylabel(yTitle, fontsize=18, labelpad=20)
  ax.set_zlabel(zTitle, fontsize=18, labelpad=10)
  
  cmap=plt.get_cmap("inferno")
  colors=cmap(z * 100 if zAsPercent else z)[np.newaxis, :, :3]
  surf = ax.plot_trisurf(x, y,
                         z * 100 if zAsPercent else z ,
                         linewidth=1.0,
                         antialiased=True, 
                         cmap = cmap,
                         color=(0,0,0,0))
  scaleEdgeValue = surf.to_rgba(surf.get_array())
  surf.set_edgecolors(scaleEdgeValue) 
  surf.set_alpha(0)

  if zAsPercent :
    ax.zaxis.set_major_formatter(mtick.PercentFormatter())
  ax.view_init(elev=10., azim=az)
  ax.set_title(Title, fontsize=24)
  ax.set_facecolor('white')

  plt.tick_params(labelsize=16)

  
  plt.show()


  return

In [None]:
#Plotting function from a dataframe
def plotSurface(data, 
                zName, 
                Title = 'True Price Surface', 
                az=320,
                yMin = KMin,
                yMax = KMax,
                zAsPercent = False):
  plotGridCustom(data.index.to_frame().values, 
                 data[zName].values,
                 xTitle = data.index.names[1],
                 yTitle = data.index.names[0],
                 zTitle = zName,
                 Title = Title, 
                 az=az, 
                 yMin = yMin, 
                 yMax = yMax, 
                 zAsPercent=zAsPercent)
  return

#Plotting function from a pandas series
def plotSerie(data,
              Title = 'True Price Surface',
              az=320,
              yMin = KMin,
              yMax = KMax, 
              zAsPercent = False):
  

  plotGridCustom(data.index.to_frame().values, 
                 data.values,
                 xTitle = data.index.names[1],
                 yTitle = data.index.names[0],
                 zTitle = data.name,
                 Title = Title, 
                 az=az, 
                 yMin = yMin, 
                 yMax = yMax, 
                 zAsPercent = zAsPercent)
  return

In [None]:
#Plotting function for surface
#xTitle : title for x axis
#yTitle : title for y axis
#zTitle : title for z axis
#Title : plot title
#az : azimuth i.e. angle of view for surface
#yMin : minimum value for y axis
#yMax : maximum value for y axis
#zAsPercent : boolean, if true format zaxis as percentage 
def plot2GridCustom(coordinates, zValue,
                    coordinates2, zValue2,
                    xTitle = "Maturity",
                    yTitle = "Strike",
                    zTitle = "Price",
                    Title = 'True Price Surface', 
                    az=320, 
                    yMin = KMin,
                    yMax = KMax,
                    zAsPercent = False):
  y = coordinates[:,0]
  filteredValue = (y > yMin) & (y < yMax)
  x = coordinates[:,1][filteredValue]
  y = coordinates[:,0][filteredValue]
  z = zValue[filteredValue].flatten()
  
  y2 = coordinates2[:,0]
  filteredValue2 = (y2 > yMin) & (y2 < yMax)
  x2 = coordinates2[:,1][filteredValue2]
  y2 = coordinates2[:,0][filteredValue2]
  z2 = zValue2[filteredValue2].flatten()
  
  fig = plt.figure(figsize=(20,10))
  ax = fig.gca(projection='3d')
  
  ax.set_xlabel(xTitle, fontsize=18, labelpad=20)
  ax.set_ylabel(yTitle, fontsize=18, labelpad=20)
  ax.set_zlabel(zTitle, fontsize=18, labelpad=10)
  
  cmap=plt.get_cmap("inferno")
  colors=cmap(z * 100 if zAsPercent else z)[np.newaxis, :, :3]
  ax.scatter(x2, y2, z2, marker='o', color="r", alpha=1, s=40)
  ax.scatter(x, y, z, marker='o', color="b", alpha=1, s=40)
  #surf = ax.plot_trisurf(x, y,
  #                       z * 100 if zAsPercent else z ,
  #                       linewidth=1.0,
  #                       antialiased=True, 
  #                       cmap = cmap,
  #                       color=(0,0,0,0))
  #scaleEdgeValue = surf.to_rgba(surf.get_array())
  #surf.set_edgecolors(scaleEdgeValue) 
  #surf.set_alpha(0)


  if zAsPercent :
    ax.zaxis.set_major_formatter(mtick.PercentFormatter())
  ax.view_init(elev=10., azim=az)
  #ax.set_title(Title, fontsize=24)
  ax.set_facecolor('white')

  plt.tick_params(labelsize=16)

  
  plt.show()


  return

#Plotting function from a pandas series
def plot2Series(data, 
                data2,
                Title = 'True Price Surface',
                az=320,
                yMin = KMin,
                yMax = KMax, 
                zAsPercent = False):
  

  plot2GridCustom(data.index.to_frame().values, 
                  data.values,
                  data2.index.to_frame().values, 
                  data2.values,
                  xTitle = data.index.names[1],
                  yTitle = data.index.names[0],
                  zTitle = data.name,
                  Title = Title, 
                  az=az, 
                  yMin = yMin, 
                  yMax = yMax, 
                  zAsPercent = zAsPercent)
  return

In [None]:
plt.get_cmap("plasma")(0)

In [None]:
plotSurface(dataSet, "Price", Title = 'True Price Surface')

In [None]:
inverseTransform(scaledDataSet, scaler).head()

In [None]:
def convertToLogMoneyness(formerSerie):
  maturity = formerSerie.index.get_level_values("Maturity")
  logMoneyness = np.log(S0[0] / formerSerie.index.get_level_values("Strike"))
  newIndex = pd.MultiIndex.from_arrays([np.array(logMoneyness.values), np.array(maturity.values)], names=('LogMoneyness', 'Maturity'))
  if type(formerSerie) == type(pd.Series()) :
    return pd.Series(formerSerie.values , index=newIndex)
  return pd.DataFrame(formerSerie.values, index = newIndex, columns= formerSerie.columns)

In [None]:
#Plot predicted value, benchmark value, absoluate error and relative error
#It also compute RMSE between predValue and refValue
#predValue : approximated value 
#refValue : benchamrk value
#quantityName : name for approximated quantity
#az : azimuth i.e. angle of view for surface
#yMin : minimum value for y axis
#yMax : maximum value for y axis
def predictionDiagnosis(predValue, 
                        refValue, 
                        quantityName, 
                        az=320,
                        yMin = KMin,
                        yMax = KMax):
  
  predValueFiltered = predValue[predValue.index.get_level_values("Maturity") > 0.001]
  refValueFiltered = refValue[refValue.index.get_level_values("Maturity") > 0.001]
  title = "Predicted " + quantityName + " surface"
  plotSerie(predValueFiltered.rename(quantityName), 
            Title = title, 
            az=az,
            yMin = yMin,
            yMax = yMax)
  
  title = "True " + quantityName + " surface"
  plotSerie(refValueFiltered.rename(quantityName), 
            Title = title, 
            az=az,
            yMin = yMin,
            yMax = yMax)
  
  title = quantityName + " surface error"
  absoluteError = np.abs(predValueFiltered - refValueFiltered) 
  plotSerie(absoluteError.rename(quantityName + " Absolute Error"),
            Title = title,
            az=az,
            yMin = yMin,
            yMax = yMax)
  
  title = quantityName + " surface error"
  relativeError = np.abs(predValueFiltered - refValueFiltered) / refValueFiltered
  plotSerie(relativeError.rename(quantityName + " Relative Error (%)"),
            Title = title,
            az=az,
            yMin = yMin,
            yMax = yMax, 
            zAsPercent = True)
  
  print("RMSE : ", np.sqrt(np.mean(np.square(absoluteError))) )
  
  return

#Diagnose Price, theta, gamma and local volatility
def modelSummary(price, 
                 volLocale, 
                 delta_T, 
                 gamma_K, 
                 benchDataset,
                 sigma=0.3, 
                 az=40,
                 yMin = KMin,
                 yMax = KMax,
                 logMoneynessScale = False):
  nbArbitrageViolations = ((delta_T<0) + (gamma_K<0)).sum()
  print("Number of static arbitrage violations : ", nbArbitrageViolations)
  if logMoneynessScale : 
    pricePred = convertToLogMoneyness(price)
    volLocalePred = convertToLogMoneyness(volLocale)
    delta_TPred = convertToLogMoneyness(delta_T)
    gKRefPred = convertToLogMoneyness(gamma_K)
    benchDatasetScaled = convertToLogMoneyness(benchDataset)
    yMinScaled = np.log(S0[0]/yMax)
    yMaxScaled = np.log(S0[0]/yMin)
    azimutIncrement = 180
  else : 
    pricePred = price
    volLocalePred = volLocale
    delta_TPred = delta_T
    gKRefPred = gamma_K
    benchDatasetScaled = benchDataset
    yMinScaled = yMin
    yMaxScaled = yMax
    azimutIncrement = 0
  
  priceRef = benchDatasetScaled["Price"]
  predictionDiagnosis(pricePred, 
                      priceRef, 
                      "Price",
                      az=320 + azimutIncrement,
                      yMin = yMinScaled,
                      yMax = yMaxScaled)
  
  
  volLocaleRef = benchDatasetScaled["locvol"]
  predictionDiagnosis(volLocalePred, 
                      volLocaleRef, 
                      "Local volatility",
                      az= az + azimutIncrement,
                      yMin = yMinScaled,
                      yMax = yMaxScaled)
  
  
  dTRef = benchDatasetScaled["Theta"]
  predictionDiagnosis(delta_TPred, 
                      dTRef, 
                      "Theta",
                      az=340 + azimutIncrement,
                      yMin = yMinScaled,
                      yMax = yMaxScaled)
  
  
  gKRef = benchDatasetScaled["Gamma Strike"]
  predictionDiagnosis(gKRefPred, 
                      gKRef, 
                      "Gamma Strike",
                      az=340 + azimutIncrement,
                      yMin = yMinScaled,
                      yMax = yMaxScaled)
  return
  

### Implied volatility function calibration by bissection

In [None]:
def bs_price(cp, s, k, rf, t, v, div):
        """ Price an option using the Black-Scholes model.
        cp: +1/-1 for call/put
        s: initial stock price
        k: strike price
        t: expiration time
        v: volatility
        rf: risk-free rate
        div: dividend
        """
    
        d1 = (np.log(s/k)+(rf-div+0.5*v*v)*t)/(v*np.sqrt(t))
        d2 = d1 - v*np.sqrt(t)

        optprice = (cp*s*np.exp(-div*t)*st.norm.cdf(cp*d1)) - (cp*k*np.exp(-rf*t)*st.norm.cdf(cp*d2))
        
        return optprice

def bissectionMethod(S_0, r, q, implied_vol0, maturity, Strike, refPrice, epsilon):
    calibratedSigma = implied_vol0
    #Call black-scholes price function for initial value
    priceBS = bs_price(-1 ,S0, Strike, r, maturity, calibratedSigma, q)
    sigmaUp = 2.0
    sigmaInf = epsilon
    lossSerie = []
    
    priceMax = bs_price(-1 ,S0, Strike, r, maturity, sigmaUp, q)
    if priceMax < refPrice:
        return priceMax, sigmaUp, pd.Series(lossSerie)
    
    priceMin = bs_price(-1 ,S0, Strike, r, maturity, sigmaInf, q)
    if priceMin > refPrice:
        return priceMin, sigmaInf, pd.Series(lossSerie) 

    #Stop the optimization when the error is less than epsilon
    while(abs(priceBS - refPrice) > epsilon):
        #Update the upper bound or the lower bound 
        #by comparing calibrated price and the target price 
        if priceBS < refPrice : 
            sigmaInf = calibratedSigma
        else :
            sigmaUp = calibratedSigma
        #Update calibratedSigma
        calibratedSigma = (sigmaUp + sigmaInf) / 2
        #Update calibrated price
        priceBS = bs_price(-1 ,S0, Strike, r, maturity, calibratedSigma, q)
        #Record the calibration error for this step
        lossSerie.append(abs(priceBS - refPrice)) 
        
    return priceBS, calibratedSigma, pd.Series(lossSerie)

In [None]:
#Execute calibration of implied volatility from estimated price and benchmark price
#Then plot esitmated implied vol, absolute and relative error
def plotImpliedVol(priceSurface, 
                   refImpliedVol, 
                   rIntegralSpline = None, 
                   qIntegralSpline = None, 
                   az=40,
                   yMin = KMin,
                   yMax = KMax,
                   relativeErrorVolMax = 1000,
                   logMoneynessScale = False):
    return plotImpliedVolConcrete(priceSurface[priceSurface.index.get_level_values("Maturity") > 0.001],
                                  refImpliedVol[refImpliedVol.index.get_level_values("Maturity") > 0.001],
                                  rIntegralSpline = rIntegralSpline,
                                  qIntegralSpline = qIntegralSpline,
                                  az=az,
                                  yMin = yMin,
                                  yMax = yMax,
                                  relativeErrorVolMax = relativeErrorVolMax, 
                                  logMoneynessScale = logMoneynessScale)

def plotImpliedVolConcrete(priceSurface,
                           refImpliedVol,
                           rIntegralSpline = None,
                           qIntegralSpline = None,
                           az=40,
                           yMin = KMin,
                           yMax = KMax,
                           relativeErrorVolMax = 10,
                           logMoneynessScale = False):
    priceSurfaceScaled = convertToLogMoneyness(priceSurface) if logMoneynessScale else priceSurface
    refImpliedVolScaled = convertToLogMoneyness(refImpliedVol) if logMoneynessScale else refImpliedVol
    df = priceSurfaceScaled.index.to_frame()
    df["Price"] = priceSurfaceScaled
    df["Strike"] = convertToLogMoneyness(priceSurface.index.to_frame()["Strike"]) if logMoneynessScale else priceSurface.index.to_frame()["Strike"]
    scaledYMin = np.log(S0[0]/yMax) if logMoneynessScale else yMin
    scaledYMax = np.log(S0[0]/yMin) if logMoneynessScale else yMax
    azimutIncrement = 180 if logMoneynessScale else 0


    epsilon = 1e-9
    calibrationFunction = lambda x : bissectionMethod(S0, 
                                                      rIntegralSpline(x["Maturity"])/x["Maturity"] if (rIntegralSpline is not None) else r, 
                                                      qIntegralSpline(x["Maturity"])/x["Maturity"] if (qIntegralSpline is not None) else q, 
                                                      0.2, 
                                                      x["Maturity"], 
                                                      x["Strike"], 
                                                      x["Price"], 
                                                      epsilon)[1]

    impliedVol = df.apply(calibrationFunction, axis = 1).rename("Implied Volatility")
    impliedVolError = np.abs(impliedVol-refImpliedVolScaled).rename('Absolute Error')
    relativeImpliedVolError = (impliedVolError / refImpliedVolScaled).rename("Relative error (%)")
    
    plotSerie(impliedVol, 
              Title = 'Implied volatility surface', 
              az=az + azimutIncrement,
              yMin = scaledYMin,
              yMax = scaledYMax)

    plotSerie(impliedVolError, 
              Title = 'Implied volatility error', 
              az=az + azimutIncrement,
              yMin = scaledYMin,
              yMax = scaledYMax)
    
    plotSerie(relativeImpliedVolError.clip(0,relativeErrorVolMax / 100.0), 
              Title = 'Implied volatility relative error', 
              az=az + azimutIncrement,
              yMin = scaledYMin,
              yMax = scaledYMax,
              zAsPercent = True)
  
    print("Implied volalitity RMSE : ", np.sqrt(np.mean(np.square(impliedVolError))) )

    return impliedVol

In [None]:
%matplotlib inline

In [None]:
plotSerie(localVolatility["LocalVolatility"],
          Title = 'Local Volatility Surface',
          az=30,
          yMin=0.7*S0,
          yMax=1.4*S0, zAsPercent=True)

In [None]:
plotSerie(dataSet["locvol"],
          Title = 'Local Volatility Surface',
          az=30,
          yMin=0.7*S0,
          yMax=1.4*S0, zAsPercent=True)

In [None]:
dataSet["locvol"].head()

## Learning Price

In [None]:
#Import tensorflow for 1.x version 
from keras.layers import Dense, Input
from keras import Model
import keras.backend as K
import keras.activations as Act
from functools import partial
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [None]:
#Deactivate warning messages
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [None]:
hyperparameters = {}
#penalization coefficient
hyperparameters["lambdaLocVol"] = 100
hyperparameters["lambdaSoft"] = 100 
hyperparameters["lambdaGamma"] = 10000

#Derivative soft constraints parameters
hyperparameters["lowerBoundTheta"] = 0.01
hyperparameters["lowerBoundGamma"] = 0.00001

#Local variance parameters
hyperparameters["DupireVarCap"] = 10
hyperparameters["DupireVolLowerBound"] = 0.05
hyperparameters["DupireVolUpperBound"] = 0.40

#Learning scheduler coefficient
hyperparameters["LearningRateStart"] = 0.1
hyperparameters["Patience"] = 100
hyperparameters["batchSize"] = 50
hyperparameters["FinalLearningRate"] = 1e-6
hyperparameters["FixedLearningRate"] = False

#Training parameters
hyperparameters["nbUnits"] = 200 #number of units for hidden layers
hyperparameters["maxEpoch"] = 10000 #maximum number of epochs

### Learning scheduler

In [None]:
#Format result from training step
def evalAndFormatResult(price, loss, dataSet):

    scaledPredPrice = pd.Series(price.flatten(), index = dataSet.index).rename("Price")
    predPrice = inverseTransformColumn(dataSet, scaler, scaledPredPrice)
    
    return predPrice, pd.Series(loss)

#Format result from training step when local volatility is computed
def evalAndFormatDupireResult(price, volDupire, theta, gamma, dupireVar, loss, dataSet):
    predPrice, lossEpoch = evalAndFormatResult(price, loss, dataSet)

    predDupire = pd.Series(volDupire.flatten(), index = dataSet.index).rename("Dupire")
    
    scaledTheta = pd.Series(theta.flatten(), index = dataSet.index).rename("Theta")
    predTheta = inverseTransformColumnGreeks(dataSet, scaler, scaledTheta, 
                                             "Price", "Maturity")
    
    scaledGammaK = pd.Series(gamma.flatten(), index = dataSet.index).rename("GammaK")
    predGammaK = inverseTransformColumnGreeks(dataSet, scaler, scaledGammaK, 
                                              "Price", "ChangedStrike", order = 2)
    
    return predPrice, predDupire, predTheta, predGammaK, lossEpoch



In [None]:
#Penalization for pseudo local volatility
def intervalRegularization(localVariance, vegaRef, hyperParameters):
  lowerVolBound = hyperParameters["DupireVolLowerBound"]
  upperVolBound = hyperParameters["DupireVolUpperBound"]
  no_nans = tf.clip_by_value(localVariance, 0, hyperParameters["DupireVarCap"])
  reg = tf.nn.relu(tf.square(lowerVolBound) - no_nans) + tf.nn.relu(no_nans - tf.square(upperVolBound))
  lambdas = hyperParameters["lambdaLocVol"] / tf.reduce_mean(vegaRef)
  return lambdas * tf.reduce_mean(tf.boolean_mask(reg, tf.is_finite(reg)))

#Add above regularization to the list of penalization
def addDupireRegularisation(priceTensor, tensorList, penalizationList, formattingResultFunction, vegaRef, hyperParameters):
    updatedPenalizationList = penalizationList + [intervalRegularization(tensorList[-1], vegaRef, hyperParameters)]
    return priceTensor, tensorList, updatedPenalizationList, formattingResultFunction

In [None]:
#Mini-batch sampling methods for large datasets
def selectMiniBatchWithoutReplacement(dataSet, batch_size):
    nbObs = dataSet.shape[0]
    idx = np.arange(nbObs) 
    np.random.shuffle(idx) 
    nbBatches = int(np.ceil(nbObs/batch_size))
    xBatchList = []
    lastBatchIndex = 0
    for i in range(nbBatches):
        firstBatchIndex = i*batch_size
        lastBatchIndex = (i+1)*batch_size
        xBatchList.append(dataSet.iloc[idx[firstBatchIndex:lastBatchIndex],:])
    xBatchList.append(dataSet.iloc[idx[lastBatchIndex:],:])
    return xBatchList

def selectMiniBatchWithReplacement(dataSet, batch_size):
    nbObs = dataSet.shape[0] 
    nbBatches = int(np.ceil(nbObs/batch_size)) + 1
    xBatchList = []
    lastBatchIndex = 0
    for i in range(nbBatches):
        idx = np.random.randint(nbObs, size = batch_size)
        xBatchList.append(dataSet.iloc[idx,:])
    return xBatchList


In [None]:
#Train neural network with a decreasing rule for learning rate
#NNFactory :  function creating the architecture
#dataSet : training data
#activateRegularization : boolean, if true add bound penalization to dupire variance
#hyperparameters : dictionnary containing various hyperparameters
#modelName : name under which tensorflow model is saved
def create_train_model(NNFactory, 
                       dataSet, 
                       activateRegularization, 
                       hyperparameters,
                       modelName = "bestModel"):
    hidden_nodes = hyperparameters["nbUnits"] 
    nbEpoch = hyperparameters["maxEpoch"] 
    fixedLearningRate = (None if hyperparameters["FixedLearningRate"] else hyperparameters["LearningRateStart"])
    patience = hyperparameters["Patience"]
    
    # Go through num_iters iterations (ignoring mini-batching)
    activateLearningDecrease = (~ hyperparameters["FixedLearningRate"])
    learningRate = hyperparameters["LearningRateStart"]
    learningRateEpoch = 0
    finalLearningRate = hyperparameters["FinalLearningRate"]

    batch_size = hyperparameters["batchSize"]

    start = time.time()
    # Reset the graph
    tf.reset_default_graph()
    
    # Placeholders for input and output data   
    Strike = tf.placeholder(tf.float32,[None,1])
    Maturity = tf.placeholder(tf.float32,[None,1])
    factorPrice = tf.placeholder(tf.float32,[None,1])
    y = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='y')
    vegaRef = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='vegaRef')
    learningRateTensor = tf.placeholder(tf.float32,[])
    
    #Get scaling for strike
    colStrikeIndex = dataSet.columns.get_loc("ChangedStrike")
    maxColFunction = scaler.data_max_[colStrikeIndex]
    minColFunction = scaler.data_min_[colStrikeIndex]
    scF = (maxColFunction - minColFunction) 
    scaleTensor = tf.constant(scF, dtype=tf.float32)
    strikeMinTensor = tf.constant(minColFunction, dtype=tf.float32)

    price_pred_tensor = None
    TensorList = None
    penalizationList = None 
    formattingFunction = None
    if activateRegularization : #Add pseudo local volatility regularisation
        price_pred_tensor, TensorList, penalizationList, formattingFunction = addDupireRegularisation( *NNFactory(hidden_nodes,
                                                                                                                  Strike,
                                                                                                                  Maturity, 
                                                                                                                  scaleTensor, 
                                                                                                                  strikeMinTensor, 
                                                                                                                  vegaRef, 
                                                                                                                  hyperparameters) ,
                                                                                                      vegaRef, 
                                                                                                      hyperparameters)
    else :
        price_pred_tensor, TensorList, penalizationList, formattingFunction = NNFactory(hidden_nodes,
                                                                                        Strike, 
                                                                                        Maturity, 
                                                                                        scaleTensor, 
                                                                                        strikeMinTensor, 
                                                                                        vegaRef, 
                                                                                        hyperparameters)

    price_pred_tensor_sc= tf.multiply( factorPrice, price_pred_tensor)
    TensorList[0] = price_pred_tensor_sc
    
    # Define a loss function
    pointwiseError = tf.reduce_mean(tf.abs(price_pred_tensor_sc - y) / vegaRef)
    errors = tf.add_n([pointwiseError] + penalizationList) 
    loss = tf.log(tf.reduce_mean(errors))



    # Define a train operation to minimize the loss
    lr = learningRate

    optimizer = tf.train.AdamOptimizer(learning_rate=learningRateTensor)
    train = optimizer.minimize(loss)

    # Initialize variables and run session
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(init)
    n = dataSet.shape[0]
    scaledInput = transformCustomMinMax(dataSet, scaler)

    
    maturity = dataSet["Maturity"].values.reshape(n,1)
    loss_serie = []

    def createFeedDict(batch):
        batchSize = batch.shape[0]
        feedDict = {Strike : scaledInput["ChangedStrike"].loc[batch.index].values.reshape(batchSize,1),
                    Maturity : batch["Maturity"].values.reshape(batchSize,1), 
                    y : batch["Price"].values.reshape(batchSize,1),
                    factorPrice : batch["DividendFactor"].values.reshape(batchSize,1), 
                    learningRateTensor : learningRate,
                    vegaRef : np.ones_like(batch["VegaRef"].values.reshape(batchSize,1))}
        return feedDict

    #Learning rate is divided by 10 if no imporvement is observed for training loss after "patience" epochs
    def updateLearningRate(iterNumber, lr, lrEpoch):
        if not activateLearningDecrease :
            print("Constant learning rate, stop training")
            return False, lr, lrEpoch
        if learningRate > finalLearningRate :
            lr *= 0.1
            lrEpoch = iterNumber
            saver.restore(sess, modelName)
            print("Iteration : ", lrEpoch, "new learning rate : ", lr)
        else :
          print("Last Iteration : ", lrEpoch, "final learning rate : ", lr)
          return False, lr, lrEpoch
        return True, lr, lrEpoch
    
    epochFeedDict = createFeedDict(dataSet)

    def evalBestModel():
        if not activateLearningDecrease :
            print("Learning rate : ", learningRate, " final loss : ", min(loss_serie))
        currentBestLoss = sess.run(loss, feed_dict=epochFeedDict)
        currentBestPenalizations = sess.run([pointwiseError, penalizationList], feed_dict=epochFeedDict)
        print("Best loss (hidden nodes: %d, iterations: %d): %.2f" % (hidden_nodes, len(loss_serie), currentBestLoss))
        print("Best Penalization : ", currentBestPenalizations)
        return
    
    for i in range(nbEpoch):
        miniBatchList = [dataSet]
        penalizationResult = sess.run(penalizationList, feed_dict=epochFeedDict)
        lossResult = sess.run(pointwiseError, feed_dict=epochFeedDict)

        #miniBatchList = selectMiniBatchWithoutReplacement(dataSet, batch_size)
        for k in range(len(miniBatchList)) :
            batchFeedDict = createFeedDict(miniBatchList[k])
            sess.run(train, feed_dict=batchFeedDict)
        
        
        loss_serie.append(sess.run(loss, feed_dict=epochFeedDict))

        if (len(loss_serie) < 2) or (loss_serie[-1] <= min(loss_serie)):
          #Save model as model is improved
          saver.save(sess, modelName)
        if (np.isnan(loss_serie[-1]) or  #Unstable model
            ( (i-learningRateEpoch >= patience) and (min(loss_serie[-patience:]) > min(loss_serie)) ) ) : #No improvement for training loss during the latest 100 iterations
          continueTraining, learningRate, learningRateEpoch = updateLearningRate(i, learningRate, learningRateEpoch)
          if continueTraining :
            evalBestModel()
          else :
            break
    saver.restore(sess, modelName)  
    
    evalBestModel()

    evalList  = sess.run(TensorList, feed_dict=epochFeedDict)
    
    sess.close()
    end = time.time()
    print("Training Time : ", end - start)
    
    return formattingFunction(*evalList, loss_serie, dataSet) 

In [None]:
#Evaluate neural network without training, it restores parameters obtained from a pretrained model 
#NNFactory :  function creating the neural architecture
#dataSet : dataset on which neural network is evaluated 
#activateRegularization : boolean, if true add bound penalization for dupire variance
#hyperparameters : dictionnary containing various hyperparameters
#modelName : name of tensorflow model to restore
def create_eval_model(NNFactory, 
                      dataSet, 
                      activateRegularization, 
                      hyperparameters,
                      modelName = "bestModel"):
    hidden_nodes = hyperparameters["nbUnits"] 
    
    # Go through num_iters iterations (ignoring mini-batching)
    activateLearningDecrease = (~ hyperparameters["FixedLearningRate"])
    learningRate = hyperparameters["LearningRateStart"]

    # Reset the graph
    tf.reset_default_graph()
    
    # Placeholders for input and output data   
    Strike = tf.placeholder(tf.float32,[None,1])
    Maturity = tf.placeholder(tf.float32,[None,1])
    factorPrice = tf.placeholder(tf.float32,[None,1])
    y = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='y')
    vegaRef = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='vegaRef')
    learningRateTensor = tf.placeholder(tf.float32,[])
    
    #Get scaling for strike
    colStrikeIndex = dataSet.columns.get_loc("ChangedStrike")
    maxColFunction = scaler.data_max_[colStrikeIndex]
    minColFunction = scaler.data_min_[colStrikeIndex]
    scF = (maxColFunction - minColFunction) 
    scaleTensor = tf.constant(scF, dtype=tf.float32)
    strikeMinTensor = tf.constant(minColFunction, dtype=tf.float32)

    price_pred_tensor = None
    TensorList = None
    penalizationList = None 
    formattingFunction = None
    if activateRegularization : 
        price_pred_tensor, TensorList, penalizationList, formattingFunction = addDupireRegularisation( *NNFactory(hidden_nodes,
                                                                                                                  Strike,
                                                                                                                  Maturity, 
                                                                                                                  scaleTensor, 
                                                                                                                  strikeMinTensor, 
                                                                                                                  vegaRef,
                                                                                                                  hyperparameters,
                                                                                                                  IsTraining=False), 
                                                                                                      vegaRef,
                                                                                                      hyperparameters )
    else :
        price_pred_tensor, TensorList, penalizationList, formattingFunction = NNFactory(hidden_nodes,
                                                                                        Strike, 
                                                                                        Maturity, 
                                                                                        scaleTensor, 
                                                                                        strikeMinTensor,
                                                                                        vegaRef,
                                                                                        hyperparameters,
                                                                                        IsTraining=False)

    price_pred_tensor_sc= tf.multiply(factorPrice,price_pred_tensor)
    TensorList[0] = price_pred_tensor_sc
    
    # Define a loss function
    pointwiseError = tf.reduce_mean(tf.abs(price_pred_tensor_sc - y) / vegaRef)
    errors = tf.add_n([pointwiseError] + penalizationList)
    loss = tf.log(tf.reduce_mean(errors))


    # Define a train operation to minimize the loss
    lr = learningRate 

    optimizer = tf.train.AdamOptimizer(learning_rate=learningRateTensor)
    train = optimizer.minimize(loss)

    # Initialize variables and run session
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(init)
    n = dataSet.shape[0]
    scaledInput = transformCustomMinMax(dataSet, scaler)

    
    maturity = dataSet["Maturity"].values.reshape(n,1)
    loss_serie = []

    def createFeedDict(batch):
        batchSize = batch.shape[0]
        feedDict = {Strike : scaledInput["ChangedStrike"].loc[batch.index].values.reshape(batchSize,1),
                    Maturity : batch["Maturity"].values.reshape(batchSize,1), 
                    y : batch["Price"].values.reshape(batchSize,1),
                    factorPrice : batch["DividendFactor"].values.reshape(batchSize,1), 
                    learningRateTensor : learningRate,
                    vegaRef : np.ones_like(batch["VegaRef"].values.reshape(batchSize,1))}
        return feedDict
    
    epochFeedDict = createFeedDict(dataSet)

    def evalBestModel():
        if not activateLearningDecrease :
            print("Learning rate : ", learningRate, " final loss : ", min(loss_serie))
        currentBestLoss = sess.run(loss, feed_dict=epochFeedDict)
        currentBestPenalizations = sess.run([pointwiseError, penalizationList], feed_dict=epochFeedDict)
        print("Best loss (hidden nodes: %d, iterations: %d): %.2f" % (hidden_nodes, len(loss_serie), currentBestLoss))
        print("Best Penalization : ", currentBestPenalizations)
        return
    
    saver.restore(sess, modelName)  
    
    evalBestModel()

    evalList  = sess.run(TensorList, feed_dict=epochFeedDict)
    
    sess.close()
    
    return formattingFunction(*evalList, [0], dataSet)

### Convex architecture (Price only)

In [None]:

#Soft constraints for strike convexity and strike/maturity monotonicity  
def arbitragePenalties(priceTensor, strikeTensor, maturityTensor, scaleTensor, vegaRef, hyperparameters):

    dK = tf.gradients(priceTensor, strikeTensor, name="dK")
    hK = tf.gradients(dK[0], strikeTensor, name="hK") / tf.square(scaleTensor)
    theta = tf.gradients(priceTensor,maturityTensor,name="dT")
    
    lambdas = hyperparameters["lambdaSoft"]  / tf.reduce_mean(vegaRef) 
    lowerBoundTheta = tf.constant(hyperparameters["lowerBoundTheta"])
    lowerBoundGamma = tf.constant(hyperparameters["lowerBoundGamma"])
    grad_penalty = lambdas * tf.reduce_mean(tf.nn.relu(-theta[0] + lowerBoundTheta ))
    hessian_penalty = lambdas * hyperparameters["lowerBoundGamma"] * tf.reduce_mean(tf.nn.relu(-hK[0] + lowerBoundGamma ))
    
    return [grad_penalty, hessian_penalty]

In [None]:
#Tools function for Neural network architecture

#Initilize weights as positive
def positiveKernelInitializer(shape, 
                              dtype=None, 
                              partition_info=None):
  return tf.abs(tf.keras.initializers.normal()(shape,dtype=dtype, partition_info=partition_info))

#Soft convex layer
def convexLayer(n_units, tensor, isTraining, name, isNonDecreasing = True):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor if isNonDecreasing else (- tensor), 
                            units=n_units,
                            kernel_initializer=tf.keras.initializers.glorot_normal())
    
    
    return tf.nn.softplus(layer)

#Soft monotonic layer
def monotonicLayer(n_units,  tensor, isTraining, name):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor, 
                            units=n_units,
                            kernel_initializer=tf.keras.initializers.glorot_normal())
    
    
    
    return tf.nn.sigmoid(layer)

#Soft convex layer followed by output layer for regression 
def convexOutputLayer(n_units, tensor, isTraining, name, isNonDecreasing = True):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor if isNonDecreasing else (- tensor), 
                            units=2*n_units,
                            kernel_initializer=tf.keras.initializers.glorot_normal(),
                            activation = 'softplus')
    
     
    layer = tf.layers.dense(layer, 
                            units=1,
                            kernel_initializer=positiveKernelInitializer,
                            activation = 'softplus')
    
    return layer




#Neural network factory for Hybrid approach : splitted network with soft contraints
def NNArchitectureConstrained(n_units, 
                              strikeTensor,
                              maturityTensor, 
                              scaleTensor, 
                              strikeMinTensor, 
                              vegaRef, 
                              hyperparameters,
                              IsTraining=True):
  #First splitted layer
  hidden1S = convexLayer(n_units = n_units,
                         tensor = strikeTensor,
                         isTraining=IsTraining, 
                         name = "Hidden1S")
  
  hidden1M = monotonicLayer(n_units = n_units,
                            tensor = maturityTensor, 
                            isTraining = IsTraining, 
                            name = "Hidden1M")
  
  hidden1 = tf.concat([hidden1S, hidden1M], axis=-1)
  
  #Second and output layer
  out = convexOutputLayer(n_units = n_units,
                          tensor = hidden1,
                          isTraining = IsTraining,
                          name = "Output")
  #Soft constraints
  penaltyList = arbitragePenalties(out, strikeTensor, 
                                   maturityTensor, 
                                   scaleTensor, 
                                   vegaRef, 
                                   hyperparameters)
  
  return out, [out], penaltyList, evalAndFormatResult

In [None]:
plt.plot(dataSet.index.get_level_values("Strike"), dataSet["Price"])

In [None]:
y_pred0, lossSerie0 = create_train_model(NNArchitectureConstrained, scaledDataSet, False, hyperparameters, modelName = "softConvexHybridModel")

In [None]:
print("Minimum error : ",lossSerie0.min())
plotEpochLoss(lossSerie0)

In [None]:
lossSerie0.iloc[-1]

In [None]:
y_pred0, lossSerie0 = create_eval_model(NNArchitectureConstrained, 
                                        scaledDataSet, 
                                        False, 
                                        hyperparameters, 
                                        modelName = "softConvexHybridModel")
predictionDiagnosis(y_pred0, dataSet["Price"], " Price ")
impV0 = plotImpliedVol(y_pred0, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
y_pred0.head()

In [None]:
y_pred0.loc[(midS0,slice(None))].head()

In [None]:
dataSet["Price"].head()

In [None]:
y_pred0Test, lossSerie0Test = create_eval_model(NNArchitectureConstrained, 
                                                scaledDataSetTest, 
                                                False, 
                                                hyperparameters, 
                                                modelName = "softConvexHybridModel")
predictionDiagnosis(y_pred0Test, dataSetTest["Price"], " Price ")
impV0Test = plotImpliedVol(y_pred0Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

### Unconstrained neural network (Price only)

In [None]:
#Unconstrained dense layer
def unconstrainedLayer(n_units,  tensor, isTraining, name, activation = K.softplus):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor, 
                            units=n_units,
                            activation=activation,  
                            kernel_initializer=tf.keras.initializers.he_normal())
    return layer

#Factory for unconstrained network
def NNArchitectureUnconstrained(n_units, 
                                strikeTensor,
                                maturityTensor, 
                                scaleTensor, 
                                strikeMinTensor, 
                                vegaRef,
                                hyperparameters,
                                IsTraining=True):
  
  inputLayer = tf.concat([strikeTensor,maturityTensor], axis=-1)
  
  #First layer
  hidden1 = unconstrainedLayer(n_units = n_units,
                               tensor = inputLayer,
                               isTraining=IsTraining, 
                               name = "Hidden1")
  
  #Second layer
  hidden2 = unconstrainedLayer(n_units = n_units,
                               tensor = hidden1,
                               isTraining=IsTraining, 
                               name = "Hidden2")
  #Output layer 
  out = unconstrainedLayer(n_units = 1,
                           tensor = hidden2,
                           isTraining=IsTraining, 
                           name = "Output",
                           activation = None)
  
  return out, [out], [], evalAndFormatResult


In [None]:
y_pred1, lossSerie1 = create_train_model(NNArchitectureUnconstrained, 
                                         scaledDataSet, 
                                         False, 
                                         hyperparameters,
                                         modelName = "unconstrainedModel")

In [None]:
print("Minimum error : ",lossSerie1.min())
plotEpochLoss(lossSerie1)

In [None]:
lossSerie1.iloc[-1]

In [None]:
y_pred1, lossSerie1 = create_eval_model(NNArchitectureUnconstrained, 
                                        scaledDataSet, 
                                        False, 
                                        hyperparameters,
                                        modelName = "unconstrainedModel")
predictionDiagnosis(y_pred1, dataSet["Price"], " Price ")
impV1 = plotImpliedVol(y_pred1, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
y_pred1.head()

In [None]:
y_pred1.loc[(midS0,slice(None))].head()

In [None]:
dataSet["Price"].head()

In [None]:
y_pred1Test, lossSerie1Test = create_eval_model(NNArchitectureUnconstrained, 
                                                scaledDataSetTest, 
                                                False, 
                                                hyperparameters,
                                                modelName = "unconstrainedModel")
predictionDiagnosis(y_pred1Test, dataSetTest["Price"], " Price ")
impV1Test = plotImpliedVol(y_pred1Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

## Dupire formula implementation

In [None]:
#Dupire formula from exact derivative computation
def dupireFormula(HessianStrike, 
                  GradMaturity, 
                  Strike,
                  scaleTensor,
                  strikeMinTensor,
                  IsTraining=True):
  twoConstant = tf.constant(2.0)
  dupireVar = tf.math.divide(tf.math.divide(tf.math.scalar_mul(twoConstant,GradMaturity), 
                                            HessianStrike), 
                             tf.square(Strike + strikeMinTensor / scaleTensor))
  #Initial weights of neural network can be random which lead to negative dupireVar
  dupireVolTensor = tf.sqrt(dupireVar) 
  return dupireVolTensor, dupireVar

In [None]:
#Dupire formula with derivative obtained from native tensorflow algorithmic differentiation
def rawDupireFormula(priceTensor, 
                     adjustedStrikeTensor, 
                     maturityTensor,
                     scaleTensor,
                     strikeMinTensor,
                     IsTraining=True):
  batchSize = tf.shape(adjustedStrikeTensor)[0]
  dK = tf.reshape(tf.gradients(priceTensor, adjustedStrikeTensor, name="dK")[0], shape=[batchSize,-1])
  hK = tf.reshape(tf.gradients(dK, adjustedStrikeTensor, name="hK")[0], shape=[batchSize,-1])
  dupireDenominator = tf.square(adjustedStrikeTensor + strikeMinTensor / scaleTensor) * hK

  dT = tf.reshape(tf.gradients(priceTensor,maturityTensor,name="dT")[0], shape=[batchSize,-1])

  #Initial weights of neural network can be random which lead to negative dupireVar
  dupireVar = 2 * dT / dupireDenominator
  dupireVol = tf.sqrt(dupireVar) 
  return  dupireVol, dT, hK / tf.square(scaleTensor), dupireVar

### Hybrid architecture (Exact derivatives)

In [None]:
def exact_derivatives(Strike, Maturity):
    w1K = tf.get_default_graph().get_tensor_by_name( 'dense/kernel:0')
    w1T = tf.get_default_graph().get_tensor_by_name( 'dense_1/kernel:0')
    w2 = tf.get_default_graph().get_tensor_by_name( 'dense_2/kernel:0')
    w3 = tf.get_default_graph().get_tensor_by_name( 'dense_3/kernel:0')

    b1K = tf.get_default_graph().get_tensor_by_name( 'dense/bias:0')
    b1T = tf.get_default_graph().get_tensor_by_name( 'dense_1/bias:0')
    b2 = tf.get_default_graph().get_tensor_by_name( 'dense_2/bias:0')
    b3 = tf.get_default_graph().get_tensor_by_name( 'dense_3/bias:0')

    Z1K= tf.nn.softplus(tf.matmul(Strike, w1K) + b1K)
    Z1T= tf.nn.sigmoid(tf.matmul(Maturity, w1T) + b1T)

    Z= tf.concat([Z1K, Z1T], axis=-1)
    I2=tf.matmul(Z, w2) + b2
    Z2=tf.nn.softplus(I2)
    I3=tf.matmul(Z2, w3) + b3
    F=tf.nn.softplus(I3)

    D1K= tf.nn.sigmoid(tf.matmul(Strike, w1K) + b1K)
    I2K=tf.multiply(D1K, w1K)
    Z2K = tf.concat([I2K, tf.scalar_mul(tf.constant(0.0),I2K)],axis=-1)
   
    dI2dK=tf.matmul(Z2K, w2)
    Z2w3=tf.multiply(tf.nn.sigmoid(I2),dI2dK)
    dI3dK=tf.matmul(Z2w3, w3)
    dF_dK=tf.multiply(tf.nn.sigmoid(I3),dI3dK)
    
    D1T= sigmoidGradient(tf.matmul(Maturity,w1T) + b1T)
    I2T=tf.multiply(D1T, w1T)
    Z2T = tf.concat([tf.scalar_mul(tf.constant(0.0),I2T), I2T],axis=-1)
   
    dI2dT=tf.matmul(Z2T, w2)
    Z2w3=tf.multiply(tf.nn.sigmoid(I2),dI2dT)
    dI3dT=tf.matmul(Z2w3, w3)
    dF_dT=tf.multiply(tf.nn.sigmoid(I3),dI3dT)
    
    
    d2F_dK2=tf.multiply(sigmoidGradient(I3),tf.square(dI3dK))
    DD1K=sigmoidGradient(tf.matmul(Strike, w1K) + b1K)
    w1K2=tf.multiply(w1K,w1K)
    ID2K=tf.multiply(DD1K,w1K2)
    ZD2K = tf.concat([ID2K, tf.scalar_mul(tf.constant(0.0),ID2K)],axis=-1)
   
    d2I2_dK2=tf.matmul(ZD2K, w2)
    
    ZD2=tf.multiply(sigmoidGradient(I2), tf.square(dI2dK)) 
    ZD2+=tf.multiply(tf.nn.sigmoid(I2),d2I2_dK2)
    d2I3dK2=tf.matmul(ZD2, w3)
    
    d2F_dK2+=tf.multiply(tf.nn.sigmoid(I3),d2I3dK2)
    
    return dF_dT, dF_dK, d2F_dK2

In [None]:
#Tools functions for neural architecture
def positiveKernelInitializer(shape, 
                              dtype=None, 
                              partition_info=None):
  return tf.abs(tf.keras.initializers.normal()(shape,dtype=dtype, partition_info=partition_info))


#Neural network architecture
def convexLayer1(n_units, tensor, isTraining, name, isNonDecreasing = True):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor if isNonDecreasing else (- tensor), 
                            units=n_units,
                            kernel_initializer=tf.keras.initializers.glorot_normal())
    
    
    return tf.nn.softplus(layer), layer

def monotonicLayer1(n_units,  tensor, isTraining, name):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor, 
                            units=n_units,
                            kernel_initializer=tf.keras.initializers.glorot_normal())
    
    
    
    return tf.nn.sigmoid(layer),layer

def convexOutputLayer1(n_units, tensor, isTraining, name, isNonDecreasing = True):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor if isNonDecreasing else (- tensor), 
                            units=2*n_units,
                            kernel_initializer=tf.keras.initializers.glorot_normal(),
                            activation = 'softplus') 
    
     
    layer = tf.layers.dense(layer, 
                            units=1,
                            kernel_initializer=positiveKernelInitializer, 
                            activation = 'softplus')
    
    return layer, layer 
  

def convexLayerHybrid1(n_units, 
                      tensor, 
                      isTraining, 
                      name, 
                      activationFunction2 = Act.softplus,
                      activationFunction1 = Act.exponential,
                      isNonDecreasing = True):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor if isNonDecreasing else (- tensor), 
                            units=n_units,
                            kernel_initializer=positiveKernelInitializer)
    l1,l2 = tf.split(layer,2,1)
    output = tf.concat([activationFunction1(l1),activationFunction2(l2)],axis=-1)
    return output , layer

def sigmoidGradient(inputTensor):
  return tf.nn.sigmoid(inputTensor) * ( 1 - tf.nn.sigmoid(inputTensor) )

def sigmoidHessian(inputTensor) :
  return (tf.square(1 - tf.nn.sigmoid(inputTensor)) -
          tf.nn.sigmoid(inputTensor) * (1 - tf.nn.sigmoid(inputTensor)))

  
def NNArchitectureConstrainedDupire(n_units, 
                                    strikeTensor,
                                    maturityTensor, 
                                    scaleTensor, 
                                    strikeMinTensor,
                                    vegaRef, 
                                    hyperparameters,
                                    IsTraining=True):
  #First splitted layer
  hidden1S, layer1S = convexLayer1(n_units = n_units,
                                   tensor = strikeTensor,
                                   isTraining=IsTraining,
                                   name = "Hidden1S")
  
  hidden1M,layer1M = monotonicLayer1(n_units = n_units,
                                     tensor = maturityTensor,
                                     isTraining = IsTraining,
                                     name = "Hidden1M")
  
  hidden1 = tf.concat([hidden1S, hidden1M], axis=-1)
  
  #Second layer and output layer
  out, layer = convexOutputLayer1(n_units = n_units,
                                  tensor = hidden1,
                                  isTraining = IsTraining,
                                  name = "Output")
  
  
  dT, dS, HS = exact_derivatives(strikeTensor, maturityTensor)
  
  
  
  #Local volatility
  dupireVol, dupireVar = dupireFormula(HS, dT, 
                                       strikeTensor,
                                       scaleTensor,
                                       strikeMinTensor, 
                                       IsTraining=IsTraining)
  
  #Soft constraints on price
  lambdas = hyperparameters["lambdaSoft"]
  lowerBoundTheta = tf.constant(hyperparameters["lowerBoundTheta"])
  lowerBoundGamma = tf.constant(hyperparameters["lowerBoundGamma"])
  grad_penalty = lambdas * tf.reduce_mean(tf.nn.relu(-dT + lowerBoundTheta) / vegaRef)
  HSScaled = HS / tf.square(scaleTensor)
  hessian_penalty = lambdas * hyperparameters["lambdaGamma"] * tf.reduce_mean(tf.nn.relu(- HSScaled + lowerBoundGamma) / vegaRef)
  
  return out, [out, dupireVol, dT, HSScaled, dupireVar], [grad_penalty, hessian_penalty], evalAndFormatDupireResult


In [None]:
tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

In [None]:
y_pred2, volLocale2, dNN_T2, gNN_K2, lossSerie2 = create_train_model(NNArchitectureConstrainedDupire,
                                                                     scaledDataSet,
                                                                     False, 
                                                                     hyperparameters,
                                                                     modelName = "convexHybridMatthewDupireVolModel")

In [None]:
plotEpochLoss(lossSerie2)

In [None]:
lossSerie2.iloc[-1]

In [None]:
y_pred2, volLocale2, dNN_T2, gNN_K2, lossSerie2 = create_eval_model(NNArchitectureConstrainedDupire, 
                                                                    scaledDataSet, 
                                                                    False, 
                                                                    hyperparameters,
                                                                    modelName = "convexHybridMatthewDupireVolModel")
modelSummary(y_pred2, volLocale2, dNN_T2, gNN_K2, dataSet)
impV2 = plotImpliedVol(y_pred2, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
volLocale2.loc[(midS0,slice(None))]

In [None]:
y_pred2Test, volLocale2Test, dNN_T2Test, gNN_K2Test, lossSerie2Test = create_eval_model(NNArchitectureConstrainedDupire, 
                                                                                        scaledDataSetTest, 
                                                                                        False, 
                                                                                        hyperparameters,
                                                                                        modelName = "convexHybridMatthewDupireVolModel")
modelSummary(y_pred2Test, volLocale2Test, dNN_T2Test, gNN_K2Test, dataSetTest)
impV2Test = plotImpliedVol(y_pred2Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

### Hybrid Network (Derivatives from algorithmic differentiation) 

In [None]:

def NNArchitectureConstrainedRawDupire(n_units, 
                                       strikeTensor,
                                       maturityTensor,
                                       scaleTensor,
                                       strikeMinTensor, 
                                       vegaRef, 
                                       hyperparameters,
                                       IsTraining=True):
  #First splitted layer
  hidden1S = convexLayer(n_units = n_units,
                         tensor = strikeTensor,
                         isTraining=IsTraining, 
                         name = "Hidden1S")
  
  hidden1M = monotonicLayer(n_units = n_units,
                            tensor = maturityTensor, 
                            isTraining = IsTraining, 
                            name = "Hidden1M")
  
  hidden1 = tf.concat([hidden1S, hidden1M], axis=-1)
  
  #Second hidden layer and output layer
  out = convexOutputLayer(n_units = n_units,
                          tensor = hidden1,
                          isTraining = IsTraining,
                          name = "Output")
  
  #Compute local volatility
  dupireVol, theta, hK, dupireVar = rawDupireFormula(out, strikeTensor, 
                                                     maturityTensor, 
                                                     scaleTensor, 
                                                     strikeMinTensor,
                                                     IsTraining=IsTraining)

  #Soft constraints for no-arbitrage
  lambdas = hyperparameters["lambdaSoft"] 
  lowerBoundTheta = tf.constant(hyperparameters["lowerBoundTheta"])
  lowerBoundGamma = tf.constant(hyperparameters["lowerBoundGamma"])
  grad_penalty = lambdas * tf.reduce_mean(tf.nn.relu(-theta + lowerBoundTheta) / vegaRef)
  hessian_penalty = lambdas * hyperparameters["lambdaGamma"] * tf.reduce_mean(tf.nn.relu(-hK + lowerBoundGamma) / vegaRef)
  
  return out, [out, dupireVol, theta, hK, dupireVar], [grad_penalty, hessian_penalty], evalAndFormatDupireResult

In [None]:
y_pred3, volLocale3, dNN_T3, gNN_K3, lossSerie3 = create_train_model(NNArchitectureConstrainedRawDupire,
                                                                     scaledDataSet,
                                                                     False, 
                                                                     hyperparameters,
                                                                     modelName = "convexHybridDupireVolModel")

In [None]:
plotEpochLoss(lossSerie3)

In [None]:
lossSerie3.iloc[-1]

In [None]:
y_pred3, volLocale3, dNN_T3, gNN_K3, lossSerie3 = create_eval_model(NNArchitectureConstrainedRawDupire, 
                                                                    scaledDataSet, 
                                                                    False,
                                                                    hyperparameters,
                                                                    modelName = "convexHybridDupireVolModel")
modelSummary(y_pred3, volLocale3, dNN_T3, gNN_K3, dataSet)
impV3 = plotImpliedVol(y_pred3, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
volLocale3.loc[(midS0,slice(None))]

In [None]:
y_pred3Test, volLocale3Test, dNN_T3Test, gNN_K3Test, lossSerie3Test = create_eval_model(NNArchitectureConstrainedRawDupire, 
                                                                                        scaledDataSetTest, 
                                                                                        False, 
                                                                                        hyperparameters,
                                                                                        modelName = "convexHybridDupireVolModel")
modelSummary(y_pred3Test, volLocale3Test, dNN_T3Test, gNN_K3Test, dataSetTest)
impV3Test = plotImpliedVol(y_pred3Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:

dNN_T3Test[dNN_T3Test<=0]

In [None]:
modelSummary(y_pred3, 
             volLocale3, 
             dNN_T3, 
             gNN_K3, 
             dataSet,
             logMoneynessScale = True)
impV3 = plotImpliedVol(y_pred3, 
                       dataSet["ImpliedVol"], 
                       rIntegralSpline=riskFreeIntegral, 
                       qIntegralSpline=divSpreadIntegral,
                       logMoneynessScale = True)

In [None]:
modelSummary(y_pred3Test, 
             volLocale3Test, 
             dNN_T3Test, 
             gNN_K3Test, 
             dataSetTest,
             logMoneynessScale = True)
impV3Test = plotImpliedVol(y_pred3Test, 
                           dataSetTest["ImpliedVol"], 
                           rIntegralSpline=riskFreeIntegral, 
                           qIntegralSpline=divSpreadIntegral,
                           logMoneynessScale = True)

### Standard network with soft constraints

In [None]:
def NNArchitectureVanillaSoftDupire(n_units, strikeTensor,
                                    maturityTensor,
                                    scaleTensor,
                                    strikeMinTensor,
                                    vegaRef,
                                    hyperparameters,
                                    IsTraining=True):
  
  inputLayer = tf.concat([strikeTensor,maturityTensor], axis=-1)
  #First layer
  hidden1 = unconstrainedLayer(n_units = n_units,
                               tensor = inputLayer,
                               isTraining=IsTraining, 
                               name = "Hidden1")
  #Second layer
  hidden2 = unconstrainedLayer(n_units = n_units,
                               tensor = hidden1,
                               isTraining=IsTraining, 
                               name = "Hidden2")
  #Output layer
  out = unconstrainedLayer(n_units = 1,
                           tensor = hidden2,
                           isTraining=IsTraining, 
                           name = "Output",
                           activation = None)
  #Local volatility 
  dupireVol, theta, hK, dupireVar = rawDupireFormula(out, strikeTensor,
                                                     maturityTensor,
                                                     scaleTensor,
                                                     strikeMinTensor,
                                                     IsTraining=IsTraining)
  #Soft constraints for no arbitrage
  lambdas = hyperparameters["lambdaSoft"] 
  lowerBoundTheta = tf.constant(hyperparameters["lowerBoundTheta"])
  lowerBoundGamma = tf.constant(hyperparameters["lowerBoundGamma"])
  grad_penalty = lambdas * tf.reduce_mean(tf.nn.relu(-theta + lowerBoundTheta) / vegaRef)
  hessian_penalty = lambdas * hyperparameters["lambdaGamma"] * tf.reduce_mean(tf.nn.relu(-hK + lowerBoundGamma) / vegaRef)
  
  return out, [out, dupireVol, theta, hK, dupireVar], [grad_penalty, hessian_penalty], evalAndFormatDupireResult

In [None]:
y_pred4, volLocale4, dNN_T4, gNN_K4, lossSerie4 = create_train_model(NNArchitectureVanillaSoftDupire,
                                                                     scaledDataSet,
                                                                     False, 
                                                                     hyperparameters,
                                                                     modelName = "convexSoftDupireVolModel")

In [None]:
plotEpochLoss(lossSerie4)

In [None]:
lossSerie4.iloc[-1]

In [None]:
y_pred4, volLocale4, dNN_T4, gNN_K4, lossSerie4 = create_eval_model(NNArchitectureVanillaSoftDupire,
                                                                    scaledDataSet, 
                                                                    False, 
                                                                    hyperparameters,
                                                                    modelName = "convexSoftDupireVolModel")
modelSummary(y_pred4, volLocale4, dNN_T4, gNN_K4, dataSet)
impV4 = plotImpliedVol(y_pred4, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
volLocale4.loc[(midS0,slice(None))]

In [None]:
y_pred4Test, volLocale4Test, dNN_T4Test, gNN_K4Test, lossSerie4Test = create_eval_model(NNArchitectureVanillaSoftDupire, 
                                                                                        scaledDataSetTest, 
                                                                                        False, 
                                                                                        hyperparameters,
                                                                                        modelName = "convexSoftDupireVolModel")
modelSummary(y_pred4Test, volLocale4Test, dNN_T4Test, gNN_K4Test, dataSetTest)
impV4Test = plotImpliedVol(y_pred4Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
modelSummary(y_pred4, 
             volLocale4, 
             dNN_T4, 
             gNN_K4, 
             dataSet,
             logMoneynessScale = True)
impV4 = plotImpliedVol(y_pred4, 
                       dataSet["ImpliedVol"], 
                       rIntegralSpline=riskFreeIntegral, 
                       qIntegralSpline=divSpreadIntegral,
                       logMoneynessScale = True)

In [None]:
modelSummary(y_pred4Test, 
             volLocale4Test, 
             dNN_T4Test, 
             gNN_K4Test, 
             dataSetTest,
             logMoneynessScale = True)
impV4Test = plotImpliedVol(y_pred4Test, 
                           dataSetTest["ImpliedVol"], 
                           rIntegralSpline=riskFreeIntegral, 
                           qIntegralSpline=divSpreadIntegral,
                           logMoneynessScale = True)

### Unconstrained standard network

In [None]:
def NNArchitectureUnconstrainedDupire(n_units, strikeTensor,
                                      maturityTensor,
                                      scaleTensor,
                                      strikeMinTensor, 
                                      vegaRef,
                                      hyperparameters,
                                      IsTraining=True):
  
  inputLayer = tf.concat([strikeTensor,maturityTensor], axis=-1)
  
  #First layer
  hidden1 = unconstrainedLayer(n_units = n_units,
                               tensor = inputLayer,
                               isTraining=IsTraining, 
                               name = "Hidden1")
  #Second layer
  hidden2 = unconstrainedLayer(n_units = n_units,
                               tensor = hidden1,
                               isTraining=IsTraining, 
                               name = "Hidden2")
  #Ouput layer
  out = unconstrainedLayer(n_units = 1,
                           tensor = hidden2,
                           isTraining=IsTraining, 
                           name = "Output",
                           activation = None)
  #Local volatility
  dupireVol, theta, hK, dupireVar = rawDupireFormula(out, strikeTensor,
                                                     maturityTensor,
                                                     scaleTensor,
                                                     strikeMinTensor,
                                                     IsTraining=IsTraining)
  
  return out, [out, dupireVol, theta, hK, dupireVar], [], evalAndFormatDupireResult

In [None]:
y_pred5, volLocale5, dNN_T5, gNN_K5, lossSerie5 = create_train_model(NNArchitectureUnconstrainedDupire,
                                                                     scaledDataSet,
                                                                     False, 
                                                                     hyperparameters,
                                                                     modelName = "unconstrainedDupireVolModel")

In [None]:
plotEpochLoss(lossSerie5)

In [None]:
lossSerie5.iloc[-1]

In [None]:
y_pred5, volLocale5, dNN_T5, gNN_K5, lossSerie5 = create_eval_model(NNArchitectureUnconstrainedDupire,
                                                                    scaledDataSet,
                                                                    False,
                                                                    hyperparameters,
                                                                    modelName = "unconstrainedDupireVolModel")
modelSummary(y_pred5, volLocale5, dNN_T5, gNN_K5, dataSet)
impV5 = plotImpliedVol(y_pred5, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
volLocale5.loc[(midS0,slice(None))]

In [None]:
y_pred5Test, volLocale5Test, dNN_T5Test, gNN_K5Test, lossSerie5Test = create_eval_model(NNArchitectureUnconstrainedDupire, 
                                                                                        scaledDataSetTest, 
                                                                                        False, 
                                                                                        hyperparameters,
                                                                                        modelName = "unconstrainedDupireVolModel")
modelSummary(y_pred5Test, volLocale5Test, dNN_T5Test, gNN_K5Test, dataSetTest)
impV5Test = plotImpliedVol(y_pred5Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
modelSummary(y_pred5, 
             volLocale5, 
             dNN_T5, 
             gNN_K5, 
             dataSet,
             logMoneynessScale = True)
impV5 = plotImpliedVol(y_pred5, 
                       dataSet["ImpliedVol"], 
                       rIntegralSpline=riskFreeIntegral, 
                       qIntegralSpline=divSpreadIntegral,
                       logMoneynessScale = True)

In [None]:
modelSummary(y_pred5Test, 
             volLocale5Test, 
             dNN_T5Test, 
             gNN_K5Test, 
             dataSetTest,
             logMoneynessScale = True)
impV5Test = plotImpliedVol(y_pred5Test, 
                           dataSetTest["ImpliedVol"], 
                           rIntegralSpline=riskFreeIntegral, 
                           qIntegralSpline=divSpreadIntegral,
                           logMoneynessScale = True)

### Hard constrained architecture

In [None]:
#Tools functions for hard constrained neural architecture

def convexLayerHard(n_units, tensor, isTraining, name, isNonDecreasing = True):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor if isNonDecreasing else (- tensor), 
                            units=n_units,
                            kernel_constraint = tf.keras.constraints.NonNeg(), 
                            kernel_initializer=tf.keras.initializers.glorot_normal())
    
    
    return tf.nn.softplus(layer), layer 

def monotonicLayerHard(n_units,  tensor, isTraining, name):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor, 
                            units=n_units,
                            kernel_constraint = tf.keras.constraints.NonNeg(), 
                            kernel_initializer=tf.keras.initializers.glorot_normal())
    
    
    
    return tf.nn.sigmoid(layer),layer

def convexOutputLayerHard(n_units, tensor, isTraining, name, isNonDecreasing = True):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor if isNonDecreasing else (- tensor), 
                            units=2*n_units,
                            kernel_constraint = tf.keras.constraints.NonNeg(), 
                            kernel_initializer=tf.keras.initializers.glorot_normal(),
                            activation = 'softplus') 
    
     
    layer = tf.layers.dense(layer, 
                            units=1,
                            kernel_constraint = tf.keras.constraints.NonNeg(), 
                            kernel_initializer=positiveKernelInitializer, 
                            activation = 'softplus')
    
    return layer, layer 
  

def convexLayerHybridHard(n_units,
                          tensor,
                          isTraining,
                          name,
                          activationFunction2 = Act.softplus,
                          activationFunction1 = Act.exponential,
                          isNonDecreasing = True):
  with tf.name_scope(name):
    layer = tf.layers.dense(tensor if isNonDecreasing else (- tensor), 
                            units=n_units,
                            kernel_constraint = tf.keras.constraints.NonNeg(), 
                            kernel_initializer=positiveKernelInitializer)
    l1,l2 = tf.split(layer,2,1)
    output = tf.concat([activationFunction1(l1),activationFunction2(l2)],axis=-1)
    return output , layer

def sigmoidGradientHard(inputTensor):
  return tf.nn.sigmoid(inputTensor) * ( 1 - tf.nn.sigmoid(inputTensor) )

def sigmoidHessianHard(inputTensor) :
  return (tf.square(1 - tf.nn.sigmoid(inputTensor)) -
          tf.nn.sigmoid(inputTensor) * (1 - tf.nn.sigmoid(inputTensor)))
  


  
def NNArchitectureHardConstrainedDupire(n_units, strikeTensor, 
                                        maturityTensor,
                                        scaleTensor,
                                        strikeMinTensor, 
                                        vegaRef,
                                        hyperparameters,
                                        IsTraining=True):
  #First layer
  hidden1S, layer1S = convexLayerHard(n_units = n_units,
                                      tensor = strikeTensor,
                                      isTraining=IsTraining,
                                      name = "Hidden1S")
  
  hidden1M,layer1M = monotonicLayerHard(n_units = n_units,
                                        tensor = maturityTensor,
                                        isTraining = IsTraining,
                                        name = "Hidden1M")
  
  hidden1 = tf.concat([hidden1S, hidden1M], axis=-1)
  
  #Second layer and output layer
  out, layer = convexOutputLayerHard(n_units = n_units,
                                     tensor = hidden1,
                                     isTraining = IsTraining,
                                     name = "Output")
  #Local volatility
  dupireVol, theta, hK, dupireVar = rawDupireFormula(out, strikeTensor,
                                                     maturityTensor,
                                                     scaleTensor,
                                                     strikeMinTensor,
                                                     IsTraining=IsTraining)
  
  return out, [out, dupireVol, theta, hK, dupireVar], [], evalAndFormatDupireResult

In [None]:
y_pred6, volLocale6, dNN_T6, gNN_K6, lossSerie6 = create_train_model(NNArchitectureHardConstrainedDupire,
                                                                     scaledDataSet,
                                                                     False, 
                                                                     hyperparameters,
                                                                     modelName = "convexHardDupireVolModel")

In [None]:
plotEpochLoss(lossSerie6)

In [None]:
lossSerie6.iloc[-1]

In [None]:
y_pred6, volLocale6, dNN_T6, gNN_K6, lossSerie6 = create_eval_model(NNArchitectureHardConstrainedDupire, 
                                                                    scaledDataSet, 
                                                                    False, 
                                                                    hyperparameters,
                                                                    modelName = "convexHardDupireVolModel")
modelSummary(y_pred6, volLocale6, dNN_T6, gNN_K6, dataSet)
impV6 = plotImpliedVol(y_pred6, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
volLocale6.loc[(midS0,slice(None))]

In [None]:
y_pred6Test, volLocale6Test, dNN_T6Test, gNN_K6Test, lossSerie6Test = create_eval_model(NNArchitectureHardConstrainedDupire, 
                                                                                        scaledDataSetTest, 
                                                                                        False, 
                                                                                        hyperparameters,
                                                                                        modelName = "convexHardDupireVolModel")
modelSummary(y_pred6Test, volLocale6Test, dNN_T6Test, gNN_K6Test, dataSetTest)
impV6Test = plotImpliedVol(y_pred6Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
modelSummary(y_pred6, 
             volLocale6, 
             dNN_T6, 
             gNN_K6, 
             dataSet)
impV6 = plotImpliedVol(y_pred6, 
                       dataSet["ImpliedVol"], 
                       rIntegralSpline=riskFreeIntegral, 
                       qIntegralSpline=divSpreadIntegral,
                       logMoneynessScale = True)

In [None]:
modelSummary(y_pred6Test, 
             volLocale6Test, 
             dNN_T6Test, 
             gNN_K6Test, 
             dataSetTest)
impV6Test = plotImpliedVol(y_pred6Test, 
                           dataSetTest["ImpliedVol"], 
                           rIntegralSpline=riskFreeIntegral, 
                           qIntegralSpline=divSpreadIntegral,
                           logMoneynessScale = True)

## Dupire regularization 

Same lines as above except that dupire regularization is now activated.

### Hybrid architecture (Exact derivatives)

In [None]:
y_pred8, volLocale8, dNN_T8, gNN_K8, lossSerie8 = create_train_model(NNArchitectureConstrainedDupire,
                                                                     scaledDataSet,
                                                                     True, 
                                                                     hyperparameters,
                                                                     modelName = "regularizedConvexHybridMatthewDupireVolModel")

In [None]:
plotEpochLoss(lossSerie8)

In [None]:
lossSerie8.iloc[-1]

In [None]:
y_pred8, volLocale8, dNN_T8, gNN_K8, lossSerie8 = create_eval_model(NNArchitectureConstrainedDupire, 
                                                                    scaledDataSet, 
                                                                    True, 
                                                                    hyperparameters,
                                                                    modelName = "regularizedConvexHybridMatthewDupireVolModel")
modelSummary(y_pred8, volLocale8, dNN_T8, gNN_K8, dataSet)
impV8 = plotImpliedVol(y_pred8, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
y_pred8Test, volLocale8Test, dNN_T8Test, gNN_K8Test, lossSerie8Test = create_eval_model(NNArchitectureConstrainedDupire, 
                                                                                        scaledDataSetTest, 
                                                                                        True, 
                                                                                        hyperparameters,
                                                                                        modelName = "regularizedConvexHybridMatthewDupireVolModel")
modelSummary(y_pred8Test, volLocale8Test, dNN_T8Test, gNN_K8Test, dataSetTest)
impV8Test = plotImpliedVol(y_pred8Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

### Unconstrained standard network

In [None]:
y_pred9, volLocale9, dNN_T9, gNN_K9, lossSerie9 = create_train_model(NNArchitectureUnconstrainedDupire,
                                                                     scaledDataSet,
                                                                     True, 
                                                                     hyperparameters,
                                                                     modelName = "regularizedUnconstrainedDupireVolModel")

In [None]:
plotEpochLoss(lossSerie9)

In [None]:
lossSerie9.iloc[-1]

In [None]:
y_pred9, volLocale9, dNN_T9, gNN_K9, lossSerie9 = create_eval_model(NNArchitectureUnconstrainedDupire, 
                                                                    scaledDataSet, 
                                                                    True, 
                                                                    hyperparameters,
                                                                    modelName = "regularizedUnconstrainedDupireVolModel")
modelSummary(y_pred9, volLocale9, dNN_T9, gNN_K9, dataSet)
impV9 = plotImpliedVol(y_pred9, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
y_pred9Test, volLocale9Test, dNN_T9Test, gNN_K9Test, lossSerie9Test = create_eval_model(NNArchitectureUnconstrainedDupire, 
                                                                                        scaledDataSetTest, 
                                                                                        True, 
                                                                                        hyperparameters,
                                                                                        modelName = "regularizedUnconstrainedDupireVolModel")
modelSummary(y_pred9Test, volLocale9Test, dNN_T9Test, gNN_K9Test, dataSetTest)
impV9Test = plotImpliedVol(y_pred9Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
modelSummary(y_pred9, 
             volLocale9, 
             dNN_T9, 
             gNN_K9, 
             dataSet,
             logMoneynessScale = True)
impV9 = plotImpliedVol(y_pred9, 
                       dataSet["ImpliedVol"], 
                       rIntegralSpline=riskFreeIntegral, 
                       qIntegralSpline=divSpreadIntegral,
                       logMoneynessScale = True)

In [None]:
modelSummary(y_pred9Test, 
             volLocale9Test, 
             dNN_T9Test, 
             gNN_K9Test, 
             dataSetTest,
             logMoneynessScale = True)
impV9Test = plotImpliedVol(y_pred9Test, 
                           dataSetTest["ImpliedVol"], 
                           rIntegralSpline=riskFreeIntegral, 
                           qIntegralSpline=divSpreadIntegral,
                           logMoneynessScale = True)

### Hybrid Network (Derivatives from algorithmic differentiation) 

In [None]:
y_pred10, volLocale10, dNN_T10, gNN_K10, lossSerie10 = create_train_model(NNArchitectureConstrainedRawDupire,
                                                                          scaledDataSet,
                                                                          True,
                                                                          hyperparameters,
                                                                          modelName = "regularizedConvexHybridDupireVolModel")

In [None]:
plotEpochLoss(lossSerie10)

In [None]:
lossSerie10.iloc[-1]

In [None]:
y_pred10, volLocale10, dNN_T10, gNN_K10, lossSerie10 = create_eval_model(NNArchitectureConstrainedRawDupire,
                                                                         scaledDataSet,
                                                                         True,
                                                                         hyperparameters,
                                                                         modelName = "regularizedConvexHybridDupireVolModel")
modelSummary(y_pred10, volLocale10, dNN_T10, gNN_K10, dataSet)
impV10 = plotImpliedVol(y_pred10, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
y_pred10Test, volLocale10Test, dNN_T10Test, gNN_K10Test, lossSerie10Test = create_eval_model(NNArchitectureConstrainedRawDupire,
                                                                                             scaledDataSetTest,
                                                                                             True,
                                                                                             hyperparameters,
                                                                                             modelName = "regularizedConvexHybridDupireVolModel")
modelSummary(y_pred10Test, volLocale10Test, dNN_T10Test, gNN_K10Test, dataSetTest)
impV10Test = plotImpliedVol(y_pred10Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
modelSummary(y_pred10, 
             volLocale10, 
             dNN_T10, 
             gNN_K10, 
             dataSet,
             logMoneynessScale = True)
impV10 = plotImpliedVol(y_pred10, 
                        dataSet["ImpliedVol"], 
                        rIntegralSpline=riskFreeIntegral, 
                        qIntegralSpline=divSpreadIntegral,
                        logMoneynessScale = True)

In [None]:
modelSummary(y_pred10Test, 
             volLocale10Test, 
             dNN_T10Test, 
             gNN_K10Test, 
             dataSetTest,
             logMoneynessScale = True)
impV10Test = plotImpliedVol(y_pred10Test, 
                            dataSetTest["ImpliedVol"], 
                            rIntegralSpline=riskFreeIntegral, 
                            qIntegralSpline=divSpreadIntegral,
                            logMoneynessScale = True)

### Standard network with soft constraints

In [None]:
y_pred11, volLocale11, dNN_T11, gNN_K11, lossSerie11 = create_train_model(NNArchitectureVanillaSoftDupire,
                                                                          scaledDataSet,
                                                                          True,
                                                                          hyperparameters,
                                                                          modelName = "regularizedConvexSoftDupireVolModel")

In [None]:
plotEpochLoss(lossSerie11)

In [None]:
lossSerie11.iloc[-1]

In [None]:
y_pred11, volLocale11, dNN_T11, gNN_K11, lossSerie11 = create_eval_model(NNArchitectureVanillaSoftDupire,
                                                                         scaledDataSet,
                                                                         True,
                                                                         hyperparameters,
                                                                         modelName = "regularizedConvexSoftDupireVolModel")
modelSummary(y_pred11, volLocale11, dNN_T11, gNN_K11, dataSet)
impV11 = plotImpliedVol(y_pred11, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
y_pred11Test, volLocale11Test, dNN_T11Test, gNN_K11Test, lossSerie11Test = create_eval_model(NNArchitectureVanillaSoftDupire,
                                                                                             scaledDataSetTest,
                                                                                             True,
                                                                                             hyperparameters,
                                                                                             modelName = "regularizedConvexSoftDupireVolModel")
modelSummary(y_pred11Test, volLocale11Test, dNN_T11Test, gNN_K11Test, dataSetTest)
impV11Test = plotImpliedVol(y_pred11Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
modelSummary(y_pred11, 
             volLocale11, 
             dNN_T11, 
             gNN_K11, 
             dataSet,
             logMoneynessScale = True)
impV11 = plotImpliedVol(y_pred11, 
                        dataSet["ImpliedVol"], 
                        rIntegralSpline=riskFreeIntegral, 
                        qIntegralSpline=divSpreadIntegral,
                        logMoneynessScale = True)

In [None]:
modelSummary(y_pred11Test, 
             volLocale11Test, 
             dNN_T11Test, 
             gNN_K11Test, 
             dataSetTest,
             logMoneynessScale = True)
impV11Test = plotImpliedVol(y_pred11Test, 
                            dataSetTest["ImpliedVol"], 
                            rIntegralSpline=riskFreeIntegral, 
                            qIntegralSpline=divSpreadIntegral,
                            logMoneynessScale = True)

In [None]:
priceTrain = convertToLogMoneyness(dataSet[dataSet.Maturity > 0])["Price"]
plot2Series(convertToLogMoneyness(dataSetTest[dataSetTest.Maturity > 0])["Price"], 
            priceTrain[priceTrain < 1500],
            Title = "Reference Price Surfaces",
            yMin = -1000,
            az=140)

In [None]:
priceTrain = convertToLogMoneyness(dataSet[dataSet.Maturity > 0])["Price"]
plotSerie(convertToLogMoneyness(dataSet[dataSet.Maturity > 0])["Price"],
          Title = "Reference Price Surfaces",
          yMin = -1000,
          az=140)

In [None]:
priceTrain = convertToLogMoneyness(dataSet[dataSet.Maturity > 0])["Price"]
plot2Series(convertToLogMoneyness(dataSetTest[dataSetTest.Maturity > 0])["Price"], 
            priceTrain,
            Title = "",
            yMin = -1000,
            az=140)

In [None]:
priceTrain = convertToLogMoneyness(y_pred11[y_pred11.index.get_level_values("Maturity") > 0]) 
plot2Series(convertToLogMoneyness(y_pred11Test[y_pred11Test.index.get_level_values("Maturity") > 0]), 
            priceTrain,
            Title = '',
            yMin = -1000,
            az=140)

In [None]:
priceTrain = convertToLogMoneyness(dataSet)["Price"]
plot2Series(convertToLogMoneyness(y_pred11Test), 
            priceTrain,
            Title = '',
            yMin = -1000,
            az=140)

In [None]:
volTrain = (impV11[impV11.index.get_level_values("Maturity") > 0])
plot2Series((impV11Test[impV11Test.index.get_level_values("Maturity") > 0])[impV11Test > 0.05], 
            volTrain[volTrain < 0.3][volTrain > 0.05],
            Title = "Dense Soft Implied volatility Surfaces",
            yMin = -1000,
            az=230)

In [None]:
volTrain = convertToLogMoneyness((dataSet["ImpliedVol"][dataSet["ImpliedVol"].index.get_level_values("Maturity") > 0]))
plot2Series((impV11Test[impV11Test.index.get_level_values("Maturity") > 0])[impV11Test > 0.05], #convertToLogMoneyness((dataSetTest["ImpliedVol"][dataSetTest["ImpliedVol"].index.get_level_values("Maturity") > 0])), 
            volTrain[volTrain < 0.3],
            Title = "Dense Soft Implied volatility Surfaces",
            yMin = -1000,
            az=230)

### Hard constrained architecture

In [None]:
y_pred12, volLocale12, dNN_T12, gNN_K12, lossSerie12 = create_train_model(NNArchitectureHardConstrainedDupire,
                                                                          scaledDataSet,
                                                                          True,
                                                                          hyperparameters,
                                                                          modelName = "regularizedConvexHardDupireVolModel")

In [None]:
plotEpochLoss(lossSerie12)

In [None]:
lossSerie12.iloc[-1]

In [None]:
y_pred12, volLocale12, dNN_T12, gNN_K12, lossSerie12 = create_eval_model(NNArchitectureHardConstrainedDupire,
                                                                         scaledDataSet,
                                                                         True,
                                                                         hyperparameters,
                                                                         modelName = "regularizedConvexHardDupireVolModel")
modelSummary(y_pred12, volLocale12, dNN_T12, gNN_K12, dataSet)
impV12 = plotImpliedVol(y_pred12, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
y_pred12Test, volLocale12Test, dNN_T12Test, gNN_K12Test, lossSerie12Test = create_eval_model(NNArchitectureHardConstrainedDupire,
                                                                                             scaledDataSetTest,
                                                                                             True,
                                                                                             hyperparameters,
                                                                                             modelName = "regularizedConvexHardDupireVolModel")
modelSummary(y_pred12Test, volLocale12Test, dNN_T12Test, gNN_K12Test, dataSetTest)
impV12Test = plotImpliedVol(y_pred12Test, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
modelSummary(y_pred12, 
             volLocale12, 
             dNN_T12, 
             gNN_K12, 
             dataSet,
             logMoneynessScale = True)
impV12 = plotImpliedVol(y_pred12, 
                        dataSet["ImpliedVol"], 
                        rIntegralSpline=riskFreeIntegral, 
                        qIntegralSpline=divSpreadIntegral,
                        logMoneynessScale = True)

In [None]:
modelSummary(y_pred12Test, 
             volLocale12Test, 
             dNN_T12Test, 
             gNN_K12Test, 
             dataSetTest,
             logMoneynessScale = True)
impV12Test = plotImpliedVol(y_pred12Test, 
                            dataSetTest["ImpliedVol"], 
                            rIntegralSpline=riskFreeIntegral, 
                            qIntegralSpline=divSpreadIntegral,
                            logMoneynessScale = True)

In [None]:
priceTrain = convertToLogMoneyness(y_pred12[y_pred12.index.get_level_values("Maturity") > 0]) 
plot2Series(convertToLogMoneyness(y_pred12Test[y_pred12Test.index.get_level_values("Maturity") > 0]), 
            priceTrain,
            Title = '',
            yMin = -1000,
            az=140)

In [None]:
volTrain = (impV12[impV12.index.get_level_values("Maturity") > 0])
plot2Series((impV12Test[impV12Test.index.get_level_values("Maturity") > 0]), 
            volTrain[volTrain < 0.3],
            Title = "Hard Implied volatility Surfaces",
            yMin = -1000,
            az=140)

## Monte Carlo pricing

### Monte Carlo with implied vol

In [None]:
nbTimeStep = 100
nbPaths = 100000
def MonteCarloPricerImplicit(S,
                             Strike,
                             Maturity,
                             rSpline,
                             divSpline,
                             nbPaths,
                             nbTimeStep,
                             impliedVol):
  time_grid = np.linspace(0, Maturity, int(nbTimeStep + 1))
  timeStep = Maturity / nbTimeStep
  gaussianNoise = np.random.normal(scale = np.sqrt(timeStep), size=(nbTimeStep, nbPaths))

  logReturn = np.zeros((nbTimeStep + 1, nbPaths))
  logReturn[0,:] = 0

  for i in range(nbTimeStep) :
      t = time_grid[i]

      St = S0 * np.exp(logReturn[i,:])
      volLocale = impliedVol

      mu = rSpline(t) - divSpline(t)
      drift = np.ones(nbPaths) * (mu - np.square(volLocale) / 2.0) 
      logReturn[i + 1, :] = logReturn[i,:] + drift * timeStep + gaussianNoise[i,:] * volLocale
  SFinal = S0 * np.exp(logReturn[-1, :])
  return np.mean(np.maximum(Strike - SFinal, 0))

def MonteCarloPricerVectorizedImplicit(S,
                                       dataSet,
                                       rSpline,
                                       divSpline,
                                       nbPaths,
                                       nbTimeStep):
  func = lambda x : MonteCarloPricerImplicit(S, x["Strike"], x["Maturity"], riskCurvespline, divSpline, nbPaths, nbTimeStep, x["ImpliedVol"])
  return dataSet.apply(func, axis=1) * np.exp(-riskFreeIntegral(dataSet.index.get_level_values("Maturity")))

In [None]:
mcResRef = MonteCarloPricerVectorizedImplicit(S0[0],
                                              dataSet,
                                              riskCurvespline,
                                              divSpline,
                                              nbPaths,
                                              nbTimeStep)
mcResRef.head()

In [None]:
predictionDiagnosis(mcResRef, dataSet["Price"], " Price ", yMin=4100)

### Monte Carlo local volatility

#### Constant local volatility

In [None]:
def volLocaleTest(S, T):
  return np.ones_like(S) * 0.23

In [None]:
nbTimeStep = 100
nbPaths = 100000
def MonteCarloPricer(S, 
                     Strike, 
                     Maturity, 
                     rSpline, 
                     divSpline, 
                     nbPaths, 
                     nbTimeStep, 
                     volLocaleFunction):
  time_grid = np.linspace(0, Maturity, int(nbTimeStep + 1))
  timeStep = Maturity / nbTimeStep
  gaussianNoise = np.random.normal(scale = np.sqrt(timeStep), size=(nbTimeStep, nbPaths))

  logReturn = np.zeros((nbTimeStep + 1, nbPaths))
  logReturn[0,:] = 0

  for i in range(nbTimeStep) :
      t = time_grid[i]

      St = S0 * np.exp(logReturn[i,:])
      volLocale = volLocaleFunction(St, np.ones(nbPaths) * t)

      mu = rSpline(t) - divSpline(t)
      drift = np.ones(nbPaths) * (mu - np.square(volLocale) / 2.0) 
      logReturn[i + 1, :] = logReturn[i,:] + drift * timeStep + gaussianNoise[i,:] * volLocale
  SFinal = S0 * np.exp(logReturn[-1, :])
  return np.mean(np.maximum(Strike - SFinal, 0))

def MonteCarloPricerVectorized(S, 
                               dataSet,
                               rSpline, 
                               divSpline, 
                               nbPaths, 
                               nbTimeStep, 
                               volLocaleFunction):
  func = lambda x : MonteCarloPricer(S, x["Strike"], x["Maturity"], riskCurvespline, divSpline, nbPaths, nbTimeStep, volLocaleFunction)
  return dataSet.apply(func, axis=1) * np.exp(-riskFreeIntegral(dataSet.index.get_level_values("Maturity")))

In [None]:
mcResSigmaRef = MonteCarloPricerVectorized(S0[0],
                                           dataSetTest,
                                           riskCurvespline,
                                           divSpline,
                                           nbPaths,
                                           nbTimeStep,
                                           volLocaleTest)
mcResSigmaRef.head()

In [None]:
predictionDiagnosis(mcResSigmaRef, dataSetTest["Price"], " Price ", yMin=4100)

In [None]:
mcResSigmaRef.to_csv("mcResSigmaRef.csv")

#### Extracting neural local volatility





In [None]:
def evalVolLocale(NNFactory,
                  strikes,
                  maturities,
                  dataSet,
                  hyperParameters,
                  modelName = "bestModel"):
    
    hidden_nodes = hyperParameters["nbUnits"] 

    # Reset the graph
    tf.reset_default_graph()
    
    # Placeholders for input and output data   
    Strike = tf.placeholder(tf.float32,[None,1])
    Maturity = tf.placeholder(tf.float32,[None,1])
    factorPrice = tf.placeholder(tf.float32,[None,1])
    y = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='y')
    vegaRef = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='vegaRef')
    learningRateTensor = tf.placeholder(tf.float32,[])
    
    #Get scaling for strike
    colStrikeIndex = dataSet.columns.get_loc("ChangedStrike")
    maxColFunction = scaler.data_max_[colStrikeIndex]
    minColFunction = scaler.data_min_[colStrikeIndex]
    scF = (maxColFunction - minColFunction) 
    scaleTensor = tf.constant(scF, dtype=tf.float32)
    strikeMinTensor = tf.constant(minColFunction, dtype=tf.float32)

    price_pred_tensor = None
    TensorList = None
    penalizationList = None 
    formattingFunction = None
    price_pred_tensor, TensorList, penalizationList, formattingFunction = NNFactory(hidden_nodes,
                                                                                    Strike,
                                                                                    Maturity,
                                                                                    scaleTensor,
                                                                                    strikeMinTensor,
                                                                                    vegaRef,
                                                                                    hyperParameters,
                                                                                    IsTraining=False)# one hidden layer


    price_pred_tensor_sc= tf.multiply(factorPrice,price_pred_tensor)
    TensorList[0] = price_pred_tensor_sc
    
    # Define a loss function
    pointwiseError = tf.reduce_mean(tf.abs(price_pred_tensor_sc - y) / vegaRef)
    errors = tf.add_n([pointwiseError] + penalizationList) 
    loss = tf.log(tf.reduce_mean(errors))

    optimizer = tf.train.AdamOptimizer(learning_rate=learningRateTensor)
    train = optimizer.minimize(loss)

    # Initialize variables and run session
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(init)
    n = strikes.shape[0]
    changedVar = changeOfVariable(strikes, maturities)
    scaledStrike = (changedVar[0]-minColFunction)/scF
    dividendFactor = changedVar[1]

    def createFeedDict(s, t, d):
        batchSize = s.shape[0]
        feedDict = {Strike : np.reshape(s, (batchSize,1)), 
                    Maturity : np.reshape(t, (batchSize,1)) ,  
                    factorPrice : np.reshape(d, (batchSize,1)), 
                    vegaRef : np.ones((batchSize,1))}
        return feedDict
    
    epochFeedDict = createFeedDict(scaledStrike, maturities, dividendFactor)
    
    saver.restore(sess, modelName)  

    evalList = sess.run(TensorList, feed_dict=epochFeedDict)
    
    sess.close()
    
    return pd.Series(evalList[1].flatten(), index = pd.MultiIndex.from_arrays([strikes, maturities], names=('Strike', 'Maturity')))





In [None]:
strikeLow = dataSet["Strike"].min()#min(dataSet["Strike"].min(),dataSetTest["Strike"].min())
strikeUp = dataSet["Strike"].max()#max(dataSet["Strike"].max(),dataSetTest["Strike"].max())
strikeGrid = np.linspace(strikeLow, strikeUp, 100)
matLow = dataSet["Maturity"].min()#min(dataSet["Maturity"].min(),dataSetTest["Maturity"].min())
matUp = dataSet["Maturity"].max()#max(dataSet["Maturity"].max(),dataSetTest["Maturity"].max())
matGrid = np.linspace(matLow, matUp, 100)
volLocaleGrid = np.meshgrid(strikeGrid, matGrid)

In [None]:
def interpolatedMCLocalVolatility(localVol, strikes, maturities):
    coordinates =  np.array( customInterpolator(localVol, strikes, maturities) ).flatten()  
    return pd.Series(coordinates, index = pd.MultiIndex.from_arrays([strikes, maturities], names=('Strike', 'Maturity')))


##### Standard network, soft constraints

In [None]:
def neuralVolLocale(s,t):
  vLoc = evalVolLocale(NNArchitectureVanillaSoftDupire,
                       s, t,
                       dataSetTest,
                       hyperparameters,
                       modelName = "convexSoftDupireVolModel")
  return vLoc.dropna()

In [None]:
volLocalInterp = neuralVolLocale(volLocaleGrid[0].flatten(), 
                                 volLocaleGrid[1].flatten())
volLocalInterp.head()

In [None]:
volLocalInterp.to_csv("Dense08082001VolLocalGrid.csv")

In [None]:
volLocalInterp2 = neuralVolLocale(dataSetTest.index.get_level_values("Strike").values.flatten(), 
                                  dataSetTest.index.get_level_values("Maturity").values.flatten())
volLocalInterp2.head()

In [None]:
volLocalInterp.to_csv("Dense08082001dataSetTest.csv")

In [None]:
nnVolLocale = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp, x, y)

In [None]:
nnVolLocale2 = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp2, x, y)

In [None]:
plotSerie(volLocalInterp,
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:

plotSerie(nnVolLocale2(volLocaleGrid[0].flatten(), volLocaleGrid[1].flatten()),
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
plotSerie(volLocalInterp2,
          Title = 'Testing Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
plotSerie(dataSetTest["locvol"],
          Title = 'Testing Tikhonov Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
plotSerie(dataSet["locvol"],
          Title = 'Tikohnov Train Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
plotSerie(neuralVolLocale(dataSet.index.get_level_values("Strike").values.flatten(), dataSet.index.get_level_values("Maturity").values.flatten()),
          Title = 'Training Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
plotSerie(localVolatility["LocalVolatility"],
          Title = 'Complete tikhonov Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
localVolatility.head()

##### Hard constraint Regularized

In [None]:
def neuralVolLocaleHardReg(s,t):
  vLoc = evalVolLocale(NNArchitectureHardConstrainedDupire,
                       s, t,
                       dataSet,
                       hyperparameters,
                       modelName = "regularizedConvexHardDupireVolModel")
  return vLoc

In [None]:
volLocalInterp3 = neuralVolLocaleHardReg(volLocaleGrid[0].flatten(),
                                         volLocaleGrid[1].flatten())
volLocalInterp3.head()

In [None]:
volLocalInterp4 = neuralVolLocaleHardReg(dataSetTest.index.get_level_values("Strike").values.flatten(),
                                         dataSetTest.index.get_level_values("Maturity").values.flatten())
volLocalInterp4.head()

In [None]:
nnVolLocale3 = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp3, x, y)

In [None]:
nnVolLocale4 = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp4, x, y)

In [None]:
plotSerie(volLocalInterp3,
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
plotSerie(volLocalInterp4,
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

##### Hard constraint

In [None]:
def neuralVolLocaleHard(s,t):
  vLoc = evalVolLocale(NNArchitectureHardConstrainedDupire,
                       s, t,
                       dataSet,
                       hyperparameters,
                       modelName = "convexHardDupireVolModel")
  return vLoc

In [None]:
volLocalInterp5 = neuralVolLocaleHard(volLocaleGrid[0].flatten(),
                                      volLocaleGrid[1].flatten())
volLocalInterp5.head()

In [None]:
volLocalInterp6 = neuralVolLocaleHard(dataSetTest.index.get_level_values("Strike").values.flatten(),
                                      dataSetTest.index.get_level_values("Maturity").values.flatten())
volLocalInterp6.head()

In [None]:
nnVolLocale5 = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp5, x, y)

In [None]:
nnVolLocale6 = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp6, x, y)

In [None]:
plotSerie(volLocalInterp5,
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
plotSerie(volLocalInterp6,
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

#### Tikhonov local volatility

In [None]:
nnTikhonov = lambda x,y : interpolatedMCLocalVolatility(localVolatility["LocalVolatility"], x, y)

In [None]:
mcResTikhonov = MonteCarloPricerVectorized(S0[0],
                                           dataSetTest,
                                           riskCurvespline,
                                           divSpline,
                                           nbPaths,
                                           nbTimeStep,
                                           nnTikhonov)
mcResTikhonov.head()

In [None]:
predictionDiagnosis(mcResTikhonov, dataSetTest["Price"], " Price ", yMin=4100)

In [None]:
mcResTikhonov.to_csv("mcResTikhonov.csv")

#### Neural local Volatility

##### Standard Network soft constraint

In [None]:
mcResVolLocale = MonteCarloPricerVectorized(S0[0],
                                            dataSetTest,
                                            riskCurvespline,
                                            divSpline,
                                            nbPaths,
                                            nbTimeStep,
                                            nnVolLocale)
mcResVolLocale.head()

In [None]:
mcResVolLocale.to_csv("mcResVolLocale.csv")

In [None]:
predictionDiagnosis(mcResVolLocale, dataSetTet["Price"], " Price ", yMin=4100)

In [None]:
dataSetTest.tail()

In [None]:
mcResVolLocale2 = MonteCarloPricerVectorized(S0[0],
                                            dataSetTest,
                                            riskCurvespline,
                                            divSpline,
                                            nbPaths,
                                            nbTimeStep,
                                            nnVolLocale2)
mcResVolLocale2.head()

In [None]:
predictionDiagnosis(mcResVolLocale2, dataSetTest["Price"], " Price ", yMin=4100)

In [None]:
mcResVolLocale2.to_csv("mcResVolLocale2.csv")

##### Hard constraint Regularized

In [None]:
mcResVolLocale3 = MonteCarloPricerVectorized(S0[0],
                                             dataSetTest,
                                             riskCurvespline,
                                             divSpline,
                                             nbPaths,
                                             nbTimeStep,
                                             nnVolLocale3)
mcResVolLocale3.head()

In [None]:
predictionDiagnosis(mcResVolLocale3, dataSetTest["Price"], " Price ", yMin=4100)

In [None]:
mcResVolLocale3.to_csv("mcResVolLocale3.csv")

In [None]:
mcResVolLocale4 = MonteCarloPricerVectorized(S0[0],
                                             dataSetTest,
                                             riskCurvespline,
                                             divSpline,
                                             nbPaths,
                                             nbTimeStep,
                                             nnVolLocale4)
mcResVolLocale4.head()

In [None]:
predictionDiagnosis(mcResVolLocale4, dataSetTest["Price"], " Price ", yMin=4100)

In [None]:
mcResVolLocale4.to_csv("mcResVolLocale4.csv")

##### Hard constraint

In [None]:
mcResVolLocale5 = MonteCarloPricerVectorized(S0[0],
                                             dataSetTest,
                                             riskCurvespline,
                                             divSpline,
                                             nbPaths,
                                             nbTimeStep,
                                             nnVolLocale5)
mcResVolLocale5.head()

In [None]:
predictionDiagnosis(mcResVolLocale5, dataSetTest["Price"], " Price ", yMin=4100)

In [None]:
mcResVolLocale5.to_csv("mcResVolLocale5.csv")

In [None]:
mcResVolLocale6 = MonteCarloPricerVectorized(S0[0],
                                             dataSetTest,
                                             riskCurvespline,
                                             divSpline,
                                             nbPaths,
                                             nbTimeStep,
                                             nnVolLocale6)
mcResVolLocale6.head()

In [None]:
predictionDiagnosis(mcResVolLocale6, dataSetTest["Price"], " Price ", yMin=4100)

In [None]:
mcResVolLocale6.to_csv("mcResVolLocale6.csv")

## Gatheral transformation

#### Select Data

In [None]:
trainingDataSet = generateData(formattedTrainingData["ImpliedVol"], S0, riskFreeIntegral, divSpreadIntegral, riskCurvespline, divSpline)
trainingDataSet.tail()

In [None]:
trainingDataSet["locvol"] = interpolatedLocalVolatility(localVolatility, trainingDataSet["Price"])

In [None]:
dataSet = trainingDataSet #Training set

In [None]:
scaler = skl.preprocessing.MinMaxScaler(feature_range=(0, 1))
scaler.fit(dataSet)
scaledDataSet = transformCustom(dataSet, scaler)
scaledDataSetTest = transformCustom(dataSetTest, scaler)
scaledDataSet.head()

In [None]:
#Search strike for ATM option
midS0 = dataSet[dataSet.index.get_level_values("Strike") >= S0[0]].index.get_level_values("Strike").min()

#### Plot functions

In [None]:

  
#Diagnose Price, theta, gamma and local volatility
def modelSummaryGatheral(totalVariance,
                         volLocale,
                         delta_T,
                         gamma_K,
                         benchDataset,
                         sigma=0.3,
                         az=40,
                         yMin = KMin,
                         yMax = KMax,
                         logMoneynessScale = False):
  refDataset = benchDataset.loc[totalVariance.index]
  if logMoneynessScale : 
    totalVariancePred = convertToLogMoneyness(totalVariance)
    volLocalePred = convertToLogMoneyness(volLocale)
    delta_TPred = convertToLogMoneyness(delta_T)
    gKRefPred = convertToLogMoneyness(gamma_K)
    benchDatasetScaled = convertToLogMoneyness(refDataset)
    yMinScaled = np.log(S0[0]/yMax)
    yMaxScaled = np.log(S0[0]/yMin)
    azimutIncrement = 180
  else : 
    totalVariancePred = totalVariance
    volLocalePred = volLocale
    delta_TPred = delta_T
    gKRefPred = gamma_K
    benchDatasetScaled = refDataset
    yMinScaled = yMin
    yMaxScaled = yMax
    azimutIncrement = 0
    
  priceRef = benchDatasetScaled["impliedTotalVariance"]
  predictionDiagnosis(totalVariancePred, 
                      priceRef, 
                      "Implied Variance",
                      az=320 + azimutIncrement,
                      yMin = yMinScaled,
                      yMax = yMaxScaled)
  
  volLocaleRef = benchDatasetScaled["locvol"]
  predictionDiagnosis(volLocalePred, 
                      volLocaleRef, 
                      "Local volatility",
                      az=az + azimutIncrement,
                      yMin = yMinScaled,
                      yMax = yMaxScaled)
  
  impliedVolPred = np.sqrt(totalVariancePred) #np.sqrt(totalVariance / refDataset["Maturity"])
  predictionDiagnosis(impliedVolPred, 
                      benchDatasetScaled["ImpliedVol"], 
                      "Implied volatility",
                      az=az + azimutIncrement,
                      yMin = yMinScaled,
                      yMax = yMaxScaled)
  
  dTRef = benchDatasetScaled["Theta"]
  predictionDiagnosis(delta_TPred, 
                      dTRef, 
                      "Theta",
                      az=340 + azimutIncrement,
                      yMin = yMinScaled,
                      yMax = yMaxScaled)
  
  gKRef = benchDatasetScaled["Gamma Strike"]
  predictionDiagnosis(gKRefPred, 
                      gKRef, 
                      "Gamma Strike",
                      az=340 + azimutIncrement,
                      yMin = yMinScaled,
                      yMax = yMaxScaled)
  return

#### Execution functions

In [None]:

#Train neural network with a decreasing rule for learning rate
#NNFactory :  function creating the architecture
#dataSet : training data
#activateRegularization : boolean, if true add bound penalization to dupire variance
#hyperparameters : dictionnary containing various hyperparameters
#modelName : name under which tensorflow model is saved
def create_train_model_gatheral(NNFactory, 
                                dataSet, 
                                activateRegularization, 
                                hyperparameters,
                                modelName = "bestModel"):
    hidden_nodes = hyperparameters["nbUnits"] 
    nbEpoch = hyperparameters["maxEpoch"] 
    fixedLearningRate = (None if hyperparameters["FixedLearningRate"] else hyperparameters["LearningRateStart"])
    patience = hyperparameters["Patience"]
    
    # Go through num_iters iterations (ignoring mini-batching)
    activateLearningDecrease = (~ hyperparameters["FixedLearningRate"])
    learningRate = hyperparameters["LearningRateStart"]
    learningRateEpoch = 0
    finalLearningRate = hyperparameters["FinalLearningRate"]

    batch_size = hyperparameters["batchSize"]

    start = time.time()
    # Reset the graph
    tf.reset_default_graph()
    
    # Placeholders for input and output data   
    Moneyness = tf.placeholder(tf.float32,[None,1])
    Maturity = tf.placeholder(tf.float32,[None,1])
    y = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='y')
    vegaRef = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='vegaRef')
    learningRateTensor = tf.placeholder(tf.float32,[])
    
    #Get scaling for strike
    colMoneynessIndex = dataSet.columns.get_loc("logMoneyness")
    maxColFunction = scaler.data_max_[colMoneynessIndex]
    minColFunction = scaler.data_min_[colMoneynessIndex]
    scF = (maxColFunction - minColFunction) 
    scaleTensor = tf.constant(scF, dtype=tf.float32)
    moneynessMinTensor = tf.constant(minColFunction, dtype=tf.float32)

    price_pred_tensor = None
    TensorList = None
    penalizationList = None 
    formattingFunction = None
    if activateRegularization : #Add pseudo local volatility regularisation
        vol_pred_tensor, TensorList, penalizationList, formattingFunction = addDupireRegularisation( *NNFactory(hidden_nodes,
                                                                                                                Moneyness,
                                                                                                                Maturity, 
                                                                                                                scaleTensor, 
                                                                                                                moneynessMinTensor, 
                                                                                                                vegaRef, 
                                                                                                                hyperparameters) ,
                                                                                                    vegaRef, 
                                                                                                    hyperparameters)
    else :
        vol_pred_tensor, TensorList, penalizationList, formattingFunction = NNFactory(hidden_nodes,
                                                                                      Moneyness, 
                                                                                      Maturity, 
                                                                                      scaleTensor, 
                                                                                      moneynessMinTensor, 
                                                                                      vegaRef, 
                                                                                      hyperparameters)

    vol_pred_tensor_sc= vol_pred_tensor
    TensorList[0] = vol_pred_tensor_sc
    
    # Define a loss function
    pointwiseError = tf.reduce_mean(tf.abs(vol_pred_tensor_sc - y) / vegaRef)
    errors = tf.add_n([pointwiseError] + penalizationList) 
    loss = tf.log(tf.reduce_mean(errors))



    # Define a train operation to minimize the loss
    lr = learningRate

    optimizer = tf.train.AdamOptimizer(learning_rate=learningRateTensor)
    train = optimizer.minimize(loss)

    # Initialize variables and run session
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(init)
    n = dataSet.shape[0]
    scaledInput = transformCustomMinMax(dataSet, scaler)

    
    maturity = dataSet["Maturity"].values.reshape(n,1)
    loss_serie = []

    def createFeedDict(batch):
        batchSize = batch.shape[0]
        feedDict = {Moneyness : scaledInput["logMoneyness"].loc[batch.index].values.reshape(batchSize,1),
                    Maturity : batch["Maturity"].values.reshape(batchSize,1), 
                    y : batch["impliedTotalVariance"].values.reshape(batchSize,1),
                    learningRateTensor : learningRate,
                    vegaRef : np.ones_like(batch["VegaRef"].values.reshape(batchSize,1))}
        return feedDict

    #Learning rate is divided by 10 if no imporvement is observed for training loss after "patience" epochs
    def updateLearningRate(iterNumber, lr, lrEpoch):
        if not activateLearningDecrease :
            print("Constant learning rate, stop training")
            return False, lr, lrEpoch
        if learningRate > finalLearningRate :
            lr *= 0.1
            lrEpoch = iterNumber
            saver.restore(sess, modelName)
            print("Iteration : ", lrEpoch, "new learning rate : ", lr)
        else :
          print("Last Iteration : ", lrEpoch, "final learning rate : ", lr)
          return False, lr, lrEpoch
        return True, lr, lrEpoch
    
    epochFeedDict = createFeedDict(dataSet)

    def evalBestModel():
        if not activateLearningDecrease :
            print("Learning rate : ", learningRate, " final loss : ", min(loss_serie))
        currentBestLoss = sess.run(loss, feed_dict=epochFeedDict)
        currentBestPenalizations = sess.run([pointwiseError, penalizationList], feed_dict=epochFeedDict)
        print("Best loss (hidden nodes: %d, iterations: %d): %.2f" % (hidden_nodes, len(loss_serie), currentBestLoss))
        print("Best Penalization : ", currentBestPenalizations)
        return
    
    for i in range(nbEpoch):
        miniBatchList = [dataSet]
        penalizationResult = sess.run(penalizationList, feed_dict=epochFeedDict)
        lossResult = sess.run(pointwiseError, feed_dict=epochFeedDict)

        #miniBatchList = selectMiniBatchWithoutReplacement(dataSet, batch_size)
        for k in range(len(miniBatchList)) :
            batchFeedDict = createFeedDict(miniBatchList[k])
            sess.run(train, feed_dict=batchFeedDict)
        
        
        loss_serie.append(sess.run(loss, feed_dict=epochFeedDict))

        if (len(loss_serie) < 2) or (loss_serie[-1] <= min(loss_serie)):
          #Save model as model is improved
          saver.save(sess, modelName)
        if (np.isnan(loss_serie[-1]) or  #Unstable model
            ( (i-learningRateEpoch >= patience) and (min(loss_serie[-patience:]) > min(loss_serie)) ) ) : #No improvement for training loss during the latest 100 iterations
          continueTraining, learningRate, learningRateEpoch = updateLearningRate(i, learningRate, learningRateEpoch)
          if continueTraining :
            evalBestModel()
          else :
            break
    saver.restore(sess, modelName)  
    
    evalBestModel()

    evalList  = sess.run(TensorList, feed_dict=epochFeedDict)
    
    sess.close()
    end = time.time()
    print("Training Time : ", end - start)
    
    return formattingFunction(*evalList, loss_serie, dataSet) 

In [None]:
#Evaluate neural network without training, it restores parameters obtained from a pretrained model 
#NNFactory :  function creating the neural architecture
#dataSet : dataset on which neural network is evaluated 
#activateRegularization : boolean, if true add bound penalization for dupire variance
#hyperparameters : dictionnary containing various hyperparameters
#modelName : name of tensorflow model to restore
def create_eval_model_gatheral(NNFactory, 
                               dataSet, 
                               activateRegularization, 
                               hyperparameters,
                               modelName = "bestModel"):
    hidden_nodes = hyperparameters["nbUnits"] 
    
    # Go through num_iters iterations (ignoring mini-batching)
    activateLearningDecrease = (~ hyperparameters["FixedLearningRate"])
    learningRate = hyperparameters["LearningRateStart"]

    # Reset the graph
    tf.reset_default_graph()
    
    # Placeholders for input and output data   
    Moneyness = tf.placeholder(tf.float32,[None,1])
    Maturity = tf.placeholder(tf.float32,[None,1])
    y = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='y')
    vegaRef = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='vegaRef')
    learningRateTensor = tf.placeholder(tf.float32,[])
    
    #Get scaling for strike
    colMoneynessIndex = dataSet.columns.get_loc("logMoneyness")
    maxColFunction = scaler.data_max_[colMoneynessIndex]
    minColFunction = scaler.data_min_[colMoneynessIndex]
    scF = (maxColFunction - minColFunction) 
    scaleTensor = tf.constant(scF, dtype=tf.float32)
    moneynessMinTensor = tf.constant(minColFunction, dtype=tf.float32)

    price_pred_tensor = None
    TensorList = None
    penalizationList = None 
    formattingFunction = None
    if activateRegularization : #Add pseudo local volatility regularisation
        vol_pred_tensor, TensorList, penalizationList, formattingFunction = addDupireRegularisation( *NNFactory(hidden_nodes,
                                                                                                                Moneyness,
                                                                                                                Maturity, 
                                                                                                                scaleTensor, 
                                                                                                                moneynessMinTensor, 
                                                                                                                vegaRef, 
                                                                                                                hyperparameters) ,
                                                                                                    vegaRef, 
                                                                                                    hyperparameters)
    else :
        vol_pred_tensor, TensorList, penalizationList, formattingFunction = NNFactory(hidden_nodes,
                                                                                      Moneyness, 
                                                                                      Maturity, 
                                                                                      scaleTensor, 
                                                                                      moneynessMinTensor, 
                                                                                      vegaRef, 
                                                                                      hyperparameters)

    vol_pred_tensor_sc= vol_pred_tensor
    TensorList[0] = vol_pred_tensor_sc
    
    # Define a loss function
    pointwiseError = tf.reduce_mean(tf.abs(vol_pred_tensor_sc - y) / vegaRef)
    errors = tf.add_n([pointwiseError] + penalizationList)
    loss = tf.log(tf.reduce_mean(errors))


    # Define a train operation to minimize the loss
    lr = learningRate 

    optimizer = tf.train.AdamOptimizer(learning_rate=learningRateTensor)
    train = optimizer.minimize(loss)

    # Initialize variables and run session
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(init)
    n = dataSet.shape[0]
    scaledInput = transformCustomMinMax(dataSet, scaler)

    
    maturity = dataSet["Maturity"].values.reshape(n,1)
    loss_serie = []

    def createFeedDict(batch):
        batchSize = batch.shape[0]
        feedDict = {Moneyness : scaledInput["logMoneyness"].loc[batch.index].values.reshape(batchSize,1),
                    Maturity : batch["Maturity"].values.reshape(batchSize,1), 
                    y : batch["impliedTotalVariance"].values.reshape(batchSize,1),
                    learningRateTensor : learningRate,
                    vegaRef : np.ones_like(batch["VegaRef"].values.reshape(batchSize,1))}
        return feedDict
    
    epochFeedDict = createFeedDict(dataSet)

    def evalBestModel():
        if not activateLearningDecrease :
            print("Learning rate : ", learningRate, " final loss : ", min(loss_serie))
        currentBestLoss = sess.run(loss, feed_dict=epochFeedDict)
        currentBestPenalizations = sess.run([pointwiseError, penalizationList], feed_dict=epochFeedDict)
        print("Best loss (hidden nodes: %d, iterations: %d): %.2f" % (hidden_nodes, len(loss_serie), currentBestLoss))
        print("Best Penalization : ", currentBestPenalizations)
        return
    
    saver.restore(sess, modelName)  
    
    evalBestModel()

    evalList  = sess.run(TensorList, feed_dict=epochFeedDict)
    
    sess.close()
    
    return formattingFunction(*evalList, [0], dataSet)

In [None]:
def evalVolLocaleGatheral(NNFactory,
                          strikes,
                          maturities,
                          dataSet,
                          hyperParameters,
                          modelName = "bestModel"):
    
    hidden_nodes = hyperParameters["nbUnits"] 

    # Reset the graph
    tf.reset_default_graph()
    
    # Placeholders for input and output data   
    Moneyness = tf.placeholder(tf.float32,[None,1])
    Maturity = tf.placeholder(tf.float32,[None,1])
    y = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='y')
    vegaRef = tf.placeholder(shape=(None, 1), dtype=tf.float32, name='vegaRef')
    learningRateTensor = tf.placeholder(tf.float32,[])
    
    #Get scaling for strike
    colMoneynessIndex = dataSet.columns.get_loc("logMoneyness")
    maxColFunction = scaler.data_max_[colMoneynessIndex]
    minColFunction = scaler.data_min_[colMoneynessIndex]
    scF = (maxColFunction - minColFunction) 
    scaleTensor = tf.constant(scF, dtype=tf.float32)
    moneynessMinTensor = tf.constant(minColFunction, dtype=tf.float32)

    price_pred_tensor = None
    TensorList = None
    penalizationList = None 
    formattingFunction = None
    vol_pred_tensor, TensorList, penalizationList, formattingFunction = NNFactory(hidden_nodes,
                                                                                  Moneyness,
                                                                                  Maturity,
                                                                                  scaleTensor,
                                                                                  moneynessMinTensor,
                                                                                  vegaRef,
                                                                                  hyperparameters)

    vol_pred_tensor_sc= vol_pred_tensor
    TensorList[0] = vol_pred_tensor_sc
    
    # Define a loss function
    pointwiseError = tf.reduce_mean(tf.abs(vol_pred_tensor_sc - y) / vegaRef)
    errors = tf.add_n([pointwiseError] + penalizationList)
    loss = tf.log(tf.reduce_mean(errors))

    optimizer = tf.train.AdamOptimizer(learning_rate=learningRateTensor)
    train = optimizer.minimize(loss)

    # Initialize variables and run session
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(init)
    n = strikes.shape[0]
    changedVar = changeOfVariable(strikes, maturities)

    moneyness = np.log(changedVar[0] / S0[0]) 
    scaledMoneyness = (moneyness-minColFunction)/scF

    def createFeedDict(m, t):
        batchSize = m.shape[0]
        feedDict = {Moneyness : np.reshape(m, (batchSize,1)), 
                    Maturity : np.reshape(t, (batchSize,1)) ,  
                    vegaRef : np.ones((batchSize,1))}
        return feedDict
    
    epochFeedDict = createFeedDict(scaledMoneyness, maturities)
    
    saver.restore(sess, modelName)  

    evalList = sess.run(TensorList, feed_dict=epochFeedDict)
    
    sess.close()
    
    return pd.Series(evalList[1].flatten(), index = pd.MultiIndex.from_arrays([strikes, maturities], names=('Strike', 'Maturity')))

#### Architecture

In [None]:

#Dupire formula from exact derivative computation
def dupireFormulaGatheral(HessianMoneyness, 
                          GradMoneyness,
                          GradMaturity, 
                          totalVariance,
                          ScaledMoneyness,
                          scaleTensor,
                          MoneynessMinTensor,
                          IsTraining=True):
  twoConstant = tf.constant(2.0)
  oneConstant = tf.constant(1.0)
  quarterConstant = tf.constant(0.25)
  halfConstant = tf.constant(0.5)

  moneyness = ScaledMoneyness * scaleTensor + MoneynessMinTensor 
  
  dT = GradMaturity

  dMoneyness = GradMoneyness / scaleTensor
  dMoneynessFactor = (moneyness/totalVariance)
  dMoneynessSquaredFactor = quarterConstant * (-quarterConstant - oneConstant/totalVariance + tf.square(dMoneynessFactor))

  gMoneyness =  HessianMoneyness / tf.square(scaleTensor)
  gMoneynessFactor = halfConstant
  denominator = oneConstant - dMoneynessFactor * (dMoneyness) + dMoneynessSquaredFactor * tf.square(dMoneyness) + gMoneynessFactor *  gMoneyness
  
  gatheralVar = dT / denominator
  #Initial weights of neural network can be random which lead to negative dupireVar
  gatheralVolTensor = tf.sqrt(gatheralVar) 
  return gatheralVolTensor, gatheralVar, gatheralDenominator

In [None]:
#Dupire formula with derivative obtained from native tensorflow algorithmic differentiation
def rawDupireFormulaGatheral(totalVarianceTensor, 
                             scaledMoneynessTensor, 
                             maturityTensor,
                             scaleTensor,
                             moneynessMinTensor,
                             IsTraining=True):
  batchSize = tf.shape(scaledMoneynessTensor)[0]
  twoConstant = tf.constant(2.0)
  oneConstant = tf.constant(1.0)
  quarterConstant = tf.constant(0.25)
  halfConstant = tf.constant(0.5)

  moneyness = scaledMoneynessTensor * scaleTensor + moneynessMinTensor 

  dMoneyness = tf.reshape(tf.gradients(totalVarianceTensor, scaledMoneynessTensor, name="dK")[0], shape=[batchSize,-1]) / scaleTensor
  dMoneynessFactor = (moneyness/totalVarianceTensor)
  dMoneynessSquaredFactor = quarterConstant * (-quarterConstant - oneConstant/totalVarianceTensor + tf.square(dMoneynessFactor))

  gMoneyness = tf.reshape(tf.gradients(dMoneyness, scaledMoneynessTensor, name="hK")[0], shape=[batchSize,-1]) / scaleTensor
  gMoneynessFactor = halfConstant


  gatheralDenominator = oneConstant - dMoneynessFactor * (dMoneyness) + dMoneynessSquaredFactor * tf.square(dMoneyness) + gMoneynessFactor *  gMoneyness

  dT = tf.reshape(tf.gradients(totalVarianceTensor,maturityTensor,name="dT")[0], shape=[batchSize,-1])

  #Initial weights of neural network can be random which lead to negative dupireVar
  gatheralVar = dT / gatheralDenominator
  gatheralVol = tf.sqrt(gatheralVar) 
  return  gatheralVol, dT, gMoneyness, gatheralVar, gatheralDenominator

In [None]:
#Soft constraints for strike convexity and strike/maturity monotonicity  
def arbitragePenalties(dT, gatheralDenominator, vegaRef, hyperparameters):
    
    lambdas = hyperparameters["lambdaSoft"]  / tf.reduce_mean(vegaRef) 
    lowerBoundTheta = tf.constant(hyperparameters["lowerBoundTheta"])
    lowerBoundGamma = tf.constant(hyperparameters["lowerBoundGamma"])
    calendar_penalty = lambdas * tf.reduce_mean(tf.nn.relu(-dT + lowerBoundTheta ))
    butterfly_penalty = lambdas * hyperparameters["lowerBoundGamma"] * tf.reduce_mean(tf.nn.relu(-gatheralDenominator + lowerBoundGamma ))
    
    return [calendar_penalty, butterfly_penalty]

In [None]:

def NNArchitectureVanillaSoftGatheralAckerer(n_units,
                                             scaledMoneynessTensor,
                                             maturityTensor,
                                             scaleTensor,
                                             moneynessMinTensor,
                                             vegaRef,
                                             hyperparameters,
                                             IsTraining=True):
  
  inputLayer = tf.concat([scaledMoneynessTensor,maturityTensor], axis=-1)
  #First layer
  hidden1 = unconstrainedLayer(n_units = n_units,
                               tensor = inputLayer,
                               isTraining=IsTraining, 
                               name = "Hidden1")
  #Second layer
  hidden2 = unconstrainedLayer(n_units = n_units,
                               tensor = hidden1,
                               isTraining=IsTraining, 
                               name = "Hidden2")
  #Third layer
  hidden3 = unconstrainedLayer(n_units = n_units,
                               tensor = hidden2,
                               isTraining=IsTraining, 
                               name = "Hidden3")
  #Output layer
  out = unconstrainedLayer(n_units = 1,
                           tensor = hidden3,
                           isTraining=IsTraining, 
                           name = "Output",
                           activation = None)
  #Local volatility 
  gatheralVol, theta, hK, gatheralVar, gatheralDenominator = rawDupireFormulaGatheral(out * maturityTensor,
                                                                                      scaledMoneynessTensor,
                                                                                      maturityTensor,
                                                                                      scaleTensor,
                                                                                      moneynessMinTensor,
                                                                                      IsTraining=IsTraining)
  #Soft constraints for no arbitrage
  penalties = arbitragePenalties(theta, gatheralDenominator, vegaRef, hyperparameters)
  grad_penalty = penalties[0]
  hessian_penalty = penalties[1]
  
  return out, [out, gatheralVol, theta, hK, gatheralVar], [grad_penalty, hessian_penalty], evalAndFormatDupireResult

In [None]:

def NNArchitectureVanillaSoftGatheral(n_units, 
                                      scaledMoneynessTensor,
                                      maturityTensor,
                                      scaleTensor,
                                      moneynessMinTensor,
                                      vegaRef,
                                      hyperparameters,
                                      IsTraining=True):
  
  inputLayer = tf.concat([scaledMoneynessTensor,maturityTensor], axis=-1)
  #First layer
  hidden1 = unconstrainedLayer(n_units = n_units,
                               tensor = inputLayer,
                               isTraining=IsTraining, 
                               name = "Hidden1")
  #Second layer
  hidden2 = unconstrainedLayer(n_units = n_units,
                               tensor = hidden1,
                               isTraining=IsTraining, 
                               name = "Hidden2")
  #Output layer
  out = unconstrainedLayer(n_units = 1,
                           tensor = hidden2,
                           isTraining=IsTraining, 
                           name = "Output",
                           activation = None)
  #Local volatility 
  gatheralVol, theta, hK, gatheralVar, gatheralDenominator = rawDupireFormulaGatheral(out * maturityTensor,
                                                                                      scaledMoneynessTensor,
                                                                                      maturityTensor,
                                                                                      scaleTensor,
                                                                                      moneynessMinTensor,
                                                                                      IsTraining=IsTraining)
  #Soft constraints for no arbitrage
  penalties = arbitragePenalties(theta, gatheralDenominator, vegaRef, hyperparameters)
  grad_penalty = penalties[0]
  hessian_penalty = penalties[1]
  
  return out, [out, gatheralVol, theta, hK, gatheralVar], [grad_penalty, hessian_penalty], evalAndFormatDupireResult

#### Execution

In [None]:
hyperparameters = {}
#penalization coefficient
hyperparameters["lambdaLocVol"] = 0.01 #100
hyperparameters["lambdaSoft"] = 10#10 #100 
hyperparameters["lambdaGamma"] = 10#10 #10000

#Derivative soft constraints parameters
hyperparameters["lowerBoundTheta"] = 0.01
hyperparameters["lowerBoundGamma"] = 0.00001

#Local variance parameters
hyperparameters["DupireVarCap"] = 10
hyperparameters["DupireVolLowerBound"] = 0.05
hyperparameters["DupireVolUpperBound"] = 0.40

#Learning scheduler coefficient
hyperparameters["LearningRateStart"] = 0.1
hyperparameters["Patience"] = 100
hyperparameters["batchSize"] = 50
hyperparameters["FinalLearningRate"] = 1e-6
hyperparameters["FixedLearningRate"] = False

#Training parameters
hyperparameters["nbUnits"] = 200 #number of units for hidden layers
hyperparameters["maxEpoch"] = 10000 #maximum number of epochs

In [None]:
y_pred4G, volLocale4G, dNN_T4G, gNN_K4G, lossSerie4G = create_train_model_gatheral(NNArchitectureVanillaSoftGatheral,
                                                                                   scaledDataSet[scaledDataSet.index.get_level_values("Maturity") > 0.01],
                                                                                   True,
                                                                                   hyperparameters,
                                                                                   modelName = "convexSoftGatheralVolModel")

In [None]:
plotEpochLoss(lossSerie4G)
lossSerie4G.iloc[-1]

In [None]:
y_pred4G, volLocale4G, dNN_T4G, gNN_K4G, lossSerie4G = create_eval_model(NNArchitectureVanillaSoftGatheral,
                                                                         scaledDataSet[scaledDataSet.index.get_level_values("Maturity") > 0.01],
                                                                         True,
                                                                         hyperparameters,
                                                                         modelName = "convexSoftGatheralVolModel")
modelSummaryGatheral(y_pred4G, volLocale4G, dNN_T4G, gNN_K4G, dataSet[dataSet.index.get_level_values("Maturity") > 0.01])

In [None]:
volLocale4G.loc[(midS0,slice(None))]

In [None]:
y_pred4TestG, volLocale4TestG, dNN_T4TestG, gNN_K4TestG, lossSerie4TestG = create_eval_model(NNArchitectureVanillaSoftGatheral,
                                                                                             scaledDataSetTest[scaledDataSetTest.index.get_level_values("Maturity") > 0.01],
                                                                                             True,
                                                                                             hyperparameters,
                                                                                             modelName = "convexSoftGatheralVolModel")
modelSummaryGatheral(y_pred4TestG, volLocale4TestG, dNN_T4TestG, gNN_K4TestG, dataSetTest[dataSetTest.index.get_level_values("Maturity") > 0.01])

In [None]:
scaledDataSet.head()

In [None]:
modelSummaryGatheral(y_pred4G[y_pred4G.index.get_level_values("Maturity") >= 0.19], 
                     volLocale4G[volLocale4G.index.get_level_values("Maturity") >= 0.19], 
                     dNN_T4G[dNN_T4G.index.get_level_values("Maturity") >= 0.19], 
                     gNN_K4G[gNN_K4G.index.get_level_values("Maturity") >= 0.19], 
                     dataSet[dataSet.index.get_level_values("Maturity") > 0.19],
                     logMoneynessScale = True)

In [None]:
modelSummaryGatheral(y_pred4TestG, 
                     volLocale4TestG, 
                     dNN_T4TestG, 
                     gNN_K4TestG, 
                     dataSetTest[dataSetTest.index.get_level_values("Maturity") > 0.01],
                     logMoneynessScale = True)

#### Monte Carlo backtest

In [None]:
def neuralVolLocaleDugas(s,t):
  vLoc = evalVolLocaleGatheral(NNArchitectureVanillaSoftGatheral,
                               s, t,
                               dataSetTest,
                               hyperparameters,
                               modelName = "convexSoftGatheralVolModel")
  return vLoc.dropna()

In [None]:
volLocalInterp7 = neuralVolLocaleDugas(volLocaleGrid[0].flatten(),
                                       volLocaleGrid[1].flatten())
volLocalInterp7.head()

In [None]:
volLocalInterp8 = neuralVolLocaleDugas(dataSetTest.index.get_level_values("Strike").values.flatten(),
                                       dataSetTest.index.get_level_values("Maturity").values.flatten())
volLocalInterp8.head()

In [None]:
nnVolLocale7 = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp7, x, y)

In [None]:
nnVolLocale8 = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp8, x, y)

In [None]:
plotSerie(volLocalInterp7,
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
plotSerie(volLocalInterp8,
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
mcResVolLocale7 = MonteCarloPricerVectorized(S0[0],
                                             dataSetTest,
                                             riskCurvespline,
                                             divSpline,
                                             nbPaths,
                                             nbTimeStep,
                                             nnVolLocale7)
mcResVolLocale7.head()

In [None]:
predictionDiagnosis(mcResVolLocale7, dataSetTest["Price"], " Price ", yMin=4100)

In [None]:
mcResVolLocale7.to_csv("mcResVolLocale7.csv")

In [None]:
mcResVolLocale8 = MonteCarloPricerVectorized(S0[0],
                                             dataSetTest,
                                             riskCurvespline,
                                             divSpline,
                                             nbPaths,
                                             nbTimeStep,
                                             nnVolLocale8)
mcResVolLocale8.head()

In [None]:
predictionDiagnosis(mcResVolLocale8, dataSetTest["Price"], " Price ", yMin=4100)

In [None]:
mcResVolLocale8.to_csv("mcResVolLocale8.csv")

## Hyperparameter selection

In [None]:
def selectHyperparameters(hyperparameters, parameterOfInterest, modelFactory, modelName, activateDupireReg, logGrid = True):
    oldValue = hyperparameters[parameterOfInterest]
    gridValue = oldValue * ( np.exp( np.log(10) * np.array([-2,-1, 0, 1, 2])) if logGrid else np.array([0.2, 0.5, 1, 2, 5]) )
    
    oldNbEpochs = hyperparameters["maxEpoch"]
    hyperparameters["maxEpoch"] = int(oldNbEpochs / 10)
    trainLoss = {}
    arbitrageViolation = {}
    for v in gridValue :
        hyperparameters[parameterOfInterest] = int(v)
        pred, volLoc, theta, gammaK, loss = create_train_model(modelFactory,
                                                               scaledDataSet,
                                                               activateDupireReg,
                                                               hyperparameters,
                                                               modelName = modelName)
        nbArbitrageViolation = np.sum((theta <= 0)) + np.sum((gammaK <= 0))
        trainLoss[v] = min(loss)
        arbitrageViolation[v] = nbArbitrageViolation
        print()
        print()

    hyperparameters["maxEpoch"] = oldNbEpochs
    hyperparameters[parameterOfInterest] = oldValue
    # Plot curves
    
    fig, ax1 = plt.subplots()
    if logGrid :
        plt.xscale('symlog')
    
    color = 'tab:red'
    ax1.set_xlabel('Value')
    ax1.set_ylabel('Loss', color=color)
    ax1.plot(pd.Series(trainLoss), color=color)
    ax1.tick_params(axis='y', labelcolor=color)
    
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    
    color = 'tab:blue'
    ax2.set_ylabel('Arbitrage violation', color=color)  # we already handled the x-label with ax1
    ax2.plot(pd.Series(arbitrageViolation), color=color)
    ax2.tick_params(axis='y', labelcolor=color)
    
    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.show()
    
    return

In [None]:
def selectHyperparametersRandom(hyperparameters, 
                                parametersOfInterest, 
                                modelFactory, 
                                modelName, 
                                activateDupireReg, 
                                nbAttempts,
                                logGrid = True):
    oldValue = {} 
    for k in parametersOfInterest :
        oldValue[k] = hyperparameters[k]
    
    gridValue = np.exp( np.log(10) * np.array([-2,-1, 0, 1, 2])) if logGrid else np.array([0.2, 0.5, 1, 2, 5]) 
    
    oldNbEpochs = hyperparameters["maxEpoch"]
    hyperparameters["maxEpoch"] = int(oldNbEpochs / 10)
    trainLoss = {}
    arbitrageViolation = {}
    nbTry = nbAttempts
    for v in range(nbTry) :
        combination = np.random.randint(5, size = len(parametersOfInterest) )
        for p in range(len(parametersOfInterest)):
            hyperparameters[parametersOfInterest[p]] = oldValue[parametersOfInterest[p]] * gridValue[int(combination[p])]
            print(parametersOfInterest[p] , " : ", hyperparameters[parametersOfInterest[p]])
        pred, volLoc, theta, gammaK, loss = create_train_model(modelFactory,
                                                               scaledDataSet,
                                                               activateDupireReg,
                                                               hyperparameters,
                                                               modelName = modelName)
        nbArbitrageViolation = np.sum((theta <= 0)) + np.sum((gammaK <= 0))
        print("loss : ", min(loss))
        print("nbArbitrageViolation : ", nbArbitrageViolation)
        print()
        print()
        print()

    hyperparameters["maxEpoch"] = oldNbEpochs
    for k in parametersOfInterest :
        hyperparameters[k] = oldValue[k]
    
    return

In [None]:
selectHyperparametersRandom(hyperparameters,
                            ["lambdaLocVol","lambdaSoft","lambdaGamma"],
                            NNArchitectureConstrainedRawDupire,
                            "hyperParameters",
                            True, 
                            100,
                            logGrid = True)

In [None]:

hyperparameters["lambdaLocVol"] = 100
hyperparameters["lambdaSoft"] = 100 
hyperparameters["lambdaGamma"] = 10000

In [None]:
selectHyperparameters(hyperparameters, 
                      "lambdaLocVol", 
                      NNArchitectureVanillaSoftDupire, 
                      "hyperParameters", 
                      True, 
                      logGrid = True)

In [None]:
selectHyperparameters(hyperparameters, 
                      "DupireVarCap", 
                      NNArchitectureConstrainedRawDupire, 
                      "hyperParameters", 
                      True, 
                      logGrid = True)

In [None]:
selectHyperparameters(hyperparameters, 
                      "lambdaLocVol", 
                      NNArchitectureUnconstrainedDupire, 
                      "hyperParameters", 
                      True, 
                      logGrid = True)

In [None]:
hyperparameters["lambdaLocVol"] = 100

In [None]:
selectHyperparameters(hyperparameters, 
                      "lambdaLocVol", 
                      NNArchitectureConstrainedRawDupire, 
                      "hyperParameters", 
                      True, 
                      logGrid = True)

In [None]:
hyperparameters["nbUnits"] = 40

In [None]:
selectHyperparameters(hyperparameters, 
                      "nbUnits", 
                      NNArchitectureVanillaSoftDupire, 
                      "hyperParameters", 
                      True, 
                      logGrid = False)

In [None]:
hyperparameters["nbUnits"] = 200

## Dugas network

In [None]:
hyperparameters = {}
#penalization coefficient
hyperparameters["lambdaLocVol"] = 1000
hyperparameters["lambdaSoft"] = 100 
hyperparameters["lambdaGamma"] = 10000

#Derivative soft constraints parameters
hyperparameters["lowerBoundTheta"] = 0.01
hyperparameters["lowerBoundGamma"] = 0.00001

#Local variance parameters
hyperparameters["DupireVarCap"] = 10
hyperparameters["DupireVolLowerBound"] = 0.05
hyperparameters["DupireVolUpperBound"] = 0.40

#Learning scheduler coefficient
hyperparameters["LearningRateStart"] = 0.1
hyperparameters["Patience"] = 100
hyperparameters["batchSize"] = 50
hyperparameters["FinalLearningRate"] = 1e-6
hyperparameters["FixedLearningRate"] = False

#Training parameters
hyperparameters["nbUnits"] = 200 #number of units for hidden layers
hyperparameters["maxEpoch"] = 10000 #maximum number of epochs

In [None]:
def convexDugasLayer(n_units,  tensor, isTraining, name):
  with tf.name_scope(name):
    nbInputFeatures = tensor.get_shape().as_list()[1]
    bias = tf.Variable(initial_value = tf.zeros_initializer()([n_units], dtype=tf.float32), 
                       trainable = True, 
                       shape = [n_units],
                       dtype = tf.float32, 
                       name = name + "Bias")
    weights = tf.exp(tf.Variable(initial_value = tf.keras.initializers.glorot_normal()([nbInputFeatures, n_units], dtype=tf.float32), 
                                 trainable = True, 
                                 shape = [nbInputFeatures, n_units],
                                 dtype = tf.float32, 
                                 name = name + "Weights"))
    layer = tf.matmul(tensor, weights) + bias
    return K.softplus(layer)

def monotonicDugasLayer(n_units,  tensor, isTraining, name):
  with tf.name_scope(name):
    nbInputFeatures = tensor.get_shape().as_list()[1]
    bias = tf.Variable(initial_value = tf.zeros_initializer()([n_units], dtype=tf.float32), 
                       trainable = True, 
                       shape = [n_units],
                       dtype = tf.float32, 
                       name = name + "Bias")
    weights = tf.exp(tf.Variable(initial_value = tf.keras.initializers.glorot_normal()([nbInputFeatures, n_units], dtype=tf.float32), 
                                 trainable = True, 
                                 shape = [nbInputFeatures, n_units],
                                 dtype = tf.float32, 
                                 name = name + "Weights"))
    layer = tf.matmul(tensor, weights) + bias
    return K.sigmoid(layer)

def convexDugasOutputLayer(tensor, isTraining, name):
  with tf.name_scope(name):
    nbInputFeatures = tensor.get_shape().as_list()[1]
    bias = tf.exp(tf.Variable(initial_value = tf.zeros_initializer()([], dtype=tf.float32), 
                              shape = [],
                              trainable = True, 
                              dtype = tf.float32, 
                              name = name + "Bias"))
    weights = tf.exp(tf.Variable(initial_value = tf.keras.initializers.glorot_normal()([nbInputFeatures, 1], dtype=tf.float32), 
                                 shape = [nbInputFeatures, 1],
                                 trainable = True, 
                                 dtype = tf.float32, 
                                 name = name + "Weights"))
    layer = tf.matmul(tensor, weights) + bias
    return layer



def NNArchitectureHardConstrainedDugas(n_units, strikeTensor, 
                                       maturityTensor,
                                       scaleTensor,
                                       strikeMinTensor, 
                                       vegaRef,
                                       hyperparameters,
                                       IsTraining=True):
  #First layer
  hidden1S = convexDugasLayer(n_units = n_units,
                              tensor = strikeTensor,
                              isTraining=IsTraining,
                              name = "Hidden1S")
  
  hidden1M = monotonicDugasLayer(n_units = n_units,
                                 tensor = maturityTensor,
                                 isTraining = IsTraining,
                                 name = "Hidden1M")
  
  hidden1 = hidden1S * hidden1M
  
  #Second layer and output layer
  out= convexDugasOutputLayer(tensor = hidden1,
                              isTraining = IsTraining,
                              name = "Output")
  #Local volatility
  dupireVol, theta, hK, dupireVar = rawDupireFormula(out, strikeTensor,
                                                     maturityTensor,
                                                     scaleTensor,
                                                     strikeMinTensor,
                                                     IsTraining=IsTraining)
  
  return out, [out, dupireVol, theta, hK, dupireVar], [], evalAndFormatDupireResult

In [None]:
y_predDugas, volLocaleDugas, dNN_TDugas, gNN_KDugas, lossSerieDugas = create_train_model(NNArchitectureHardConstrainedDugas,
                                                                                         scaledDataSet,
                                                                                         True,
                                                                                         hyperparameters,
                                                                                         modelName = "convexHardDugasVolModel")

In [None]:
plotEpochLoss(lossSerieDugas)

In [None]:
lossSerieDugas.iloc[-1]

In [None]:
y_predDugas, volLocaleDugas, dNN_TDugas, gNN_KDugas, lossSerieDugas = create_eval_model(NNArchitectureHardConstrainedDugas,
                                                                                        scaledDataSet,
                                                                                        True,
                                                                                        hyperparameters,
                                                                                        modelName = "convexHardDugasVolModel")
modelSummary(y_predDugas, volLocaleDugas, dNN_TDugas, gNN_KDugas, dataSet)
impVDugas = plotImpliedVol(y_predDugas, dataSet["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

In [None]:
volLocaleDugas.loc[(midS0,slice(None))]

In [None]:
y_predDugasTest, volLocaleDugasTest, dNN_TDugasTest, gNN_KDugasTest, lossSerie6Test = create_eval_model(NNArchitectureHardConstrainedDugas, 
                                                                                        scaledDataSetTest, 
                                                                                        True, 
                                                                                        hyperparameters,
                                                                                        modelName = "convexHardDugasVolModel")
modelSummary(y_predDugasTest, volLocaleDugasTest, dNN_TDugasTest, gNN_KDugasTest, dataSetTest)
impVDugasTest = plotImpliedVol(y_predDugasTest, dataSetTest["ImpliedVol"], rIntegralSpline=riskFreeIntegral, qIntegralSpline=divSpreadIntegral)

#### Monte Carlo backtest

In [None]:
def neuralVolLocaleDugas(s,t):
  vLoc = evalVolLocale(NNArchitectureHardConstrainedDugas,
                       s, t,
                       dataSet,
                       hyperparameters,
                       modelName = "convexHardDugasVolModel")
  return vLoc

In [None]:
volLocalInterp9 = neuralVolLocaleDugas(volLocaleGrid[0].flatten(),
                                       volLocaleGrid[1].flatten())
volLocalInterp9.head()

In [None]:
volLocalInterp10 = neuralVolLocaleDugas(dataSetTest.index.get_level_values("Strike").values.flatten(),
                                        dataSetTest.index.get_level_values("Maturity").values.flatten())
volLocalInterp10.head()

In [None]:
nnVolLocale9 = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp9, x, y)

In [None]:
nnVolLocale10 = lambda x,y : interpolatedMCLocalVolatility(volLocalInterp10, x, y)

In [None]:
plotSerie(volLocalInterp9,
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
plotSerie(volLocalInterp10,
          Title = 'Interpolated Local Volatility Surface',
          az=30,
          yMin=0.0*S0,
          yMax=2.0*S0, 
          zAsPercent=True)

In [None]:
mcResVolLocale9 = MonteCarloPricerVectorized(S0[0],
                                             dataSet,
                                             riskCurvespline,
                                             divSpline,
                                             nbPaths,
                                             nbTimeStep,
                                             nnVolLocale9)
mcResVolLocale9.head()

In [None]:
predictionDiagnosis(mcResVolLocale9, dataSet["Price"], " Price ", yMin=4100)

In [None]:
mcResVolLocale10 = MonteCarloPricerVectorized(S0[0],
                                             dataSet,
                                             riskCurvespline,
                                             divSpline,
                                             nbPaths,
                                             nbTimeStep,
                                             nnVolLocale10)
mcResVolLocale10.head()

In [None]:
predictionDiagnosis(mcResVolLocale10, dataSet["Price"], " Price ", yMin=4100)