In [1]:
from biogeme.database import Database
import pandas as pd
swissmetro = pd.read_csv("examples/swissmetro/swissmetro.dat", sep="\t")
database = Database('swissmetro', swissmetro)

# Removing some observations
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
database.remove(exclude)
database.data["CHOICE"] -= 1

NameError: name 'PURPOSE' is not defined

In [2]:
database.variables

{'GROUP': GROUP,
 'SURVEY': SURVEY,
 'SP': SP,
 'ID': ID,
 'PURPOSE': PURPOSE,
 'FIRST': FIRST,
 'TICKET': TICKET,
 'WHO': WHO,
 'LUGGAGE': LUGGAGE,
 'AGE': AGE,
 'MALE': MALE,
 'INCOME': INCOME,
 'GA': GA,
 'ORIGIN': ORIGIN,
 'DEST': DEST,
 'TRAIN_AV': TRAIN_AV,
 'CAR_AV': CAR_AV,
 'SM_AV': SM_AV,
 'TRAIN_TT': TRAIN_TT,
 'TRAIN_CO': TRAIN_CO,
 'TRAIN_HE': TRAIN_HE,
 'SM_TT': SM_TT,
 'SM_CO': SM_CO,
 'SM_HE': SM_HE,
 'SM_SEATS': SM_SEATS,
 'CAR_TT': CAR_TT,
 'CAR_CO': CAR_CO,
 'CHOICE': CHOICE}

In [1385]:
import aesara
import aesara.tensor as aet
import numpy as np
from biogeme.expressions import Beta
from biogeme.database import Database
from aesara import shared
from aesara.tensor.var import TensorVariable
import pandas as pd


class DatabaseShared(Database):
	def __init__(self, name, pandasDatabase, choiceVar):
		super().__init__(name, pandasDatabase)

		for v in self.variables:
			if v in self.data.columns:
				if self.variables[v].name == choiceVar:
					self.variables[v].y = aet.ivector(self.variables[v].name)
				else:
					self.variables[v].x = aet.matrix(self.variables[v].name)

	def get_x(self):
		list_of_x = []
		for var in self.variables:
			if hasattr(self.variables[var], 'x'):
				list_of_x.append(self.variables[var].x)
		return list_of_x

	def get_x_data(self):
		x_data = []
		list_of_x = self.get_x()
		for x in list_of_x:
			x_data.append(self.data[[x.name]])
		return x_data

	def get_y(self):
		list_of_y = []
		for var in self.variables:
			if hasattr(self.variables[var], 'y'):
				list_of_y.append(self.variables[var].y)
		return list_of_y

	def get_y_data(self):
		y_data = []
		list_of_y = self.get_y()
		for y in list_of_y:
			y_data.append(self.data[y.name])
		return y_data

class BetaShared(Beta):
	def __init__(self, name, value, lowerbound, upperbound, status):
		super().__init__(name, value, lowerbound, upperbound, status)

		self.sharedVar = aesara.shared(np.array(value).astype(aesara.config.floatX))

	def __add__(self, other):
		if isinstance(other, (TensorVariable, BetaShared)):
			return self.sharedVar + other
		else:
			return super().__add__(other)
	
	def __radd__(self, other):
		if isinstance(other, (TensorVariable, BetaShared)):
			return self.sharedVar + other
		return super().__radd__(other)

	def __sub__(self, other):
		if isinstance(other, (TensorVariable, BetaShared)):
			return self.sharedVar + other
		return super().__sub__(other)
	
	def __mul__(self, other):
		if isinstance(other, (TensorVariable, BetaShared)):
			return self.sharedVar * other
		else:
			return super().__mul__(other)

	def __truediv__(self, other):
		if isinstance(other, (TensorVariable, BetaShared)):
			return self.sharedVar / other
		return super().__truediv__(other)

	def __pow__(self, other):
		return super().__pow__(other)

	def __rsub__(self, other):
		return super().__rsub__(other)

	def __rmul__(self, other):
		return super().__rmul__(other)



swissmetro = pd.read_csv("examples/swissmetro/swissmetro.dat", sep="\t")
db = DatabaseShared('swissmetro', swissmetro, choiceVar="CHOICE")
globals().update(db.variables)


# Removing some observations
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
db.remove(exclude)
db.data["CHOICE"] -= 1

b_cost = BetaShared("b_cost", 0.2, None, None, 0)
b_time = BetaShared("b_time", 0.2, None, None, 0)
asc_train = BetaShared("asc_train", 0.2, None, None, 0)
asc_car = BetaShared("asc_car", 0.2, None, None, 0)
asc_sm = BetaShared("asc_sm", 0.2, None, None, 0)

print(b_cost)
print(b_cost + 1)
print(type(asc_train))

U_1 = b_cost * TRAIN_CO.x + b_time * TRAIN_TT.x + asc_train
U_2 = b_cost * CAR_CO.x + b_time * CAR_TT.x + asc_car
U_3 = b_cost * SM_CO.x + b_time * SM_TT.x + asc_sm

print('U_1', type(U_1))

print(db.data[[TRAIN_CO.name]].shape)
print(b_cost.sharedVar.eval().shape)
Uvec = aet.concatenate([U_1, U_2, U_3], axis=1)
prob = aet.nnet.softmax(Uvec)

loglike = -aet.mean(aet.log(prob)[aet.arange(CHOICE.y.shape[0]), CHOICE.y])
g_b_cost = aet.grad(loglike, b_cost.sharedVar)
g_b_time = aet.grad(loglike, b_time.sharedVar)

print("b_cost_shape", b_cost.sharedVar.shape.eval())

hessians = []
hess, updates = aesara.scan(
	lambda i, y, x: aet.grad(
		y[i],
		x,
		consider_constant=None,
		disconnected_inputs="ignore",
	),
	sequences=aet.arange(2),
	non_sequences=[[g_b_cost, g_b_time], [b_cost.sharedVar, b_time.sharedVar]],
)
hessians.append(hess)

double1 = aet.grad(g_b_cost, b_time.sharedVar)
double11 = aet.grad(g_b_cost, b_cost.sharedVar)
double2 = aet.grad(g_b_time, b_time.sharedVar)
double22 = aet.grad(g_b_time, b_cost.sharedVar)


updates = [
	(b_cost.sharedVar, b_cost.sharedVar - 0.0001 * g_b_cost),
	(b_time.sharedVar, b_time.sharedVar - 0.0001 * g_b_time),
]
f = aesara.function(db.get_x() + db.get_y(), [loglike] + hessians, updates=updates,on_unused_input='ignore')

output = f(*(db.get_x_data() + db.get_y_data()))
output

b_cost(0.2)
(b_cost(0.2) + `1`)
<class '__main__.BetaShared'>
U_1 <class 'aesara.tensor.var.TensorVariable'>
(6768, 1)
()
b_cost_shape []


[array(76.10709, dtype=float32),
 array([[0., 0.],
        [0., 0.]], dtype=float32)]

In [1711]:
output = f(*(db.get_x_data() + db.get_y_data()))
output

[array(5.411472, dtype=float32),
 array([[0., 0.],
        [0., 0.]], dtype=float32)]