In [1]:
from ucb import LinUCB
from scripts import evaluate_policy, model
from oracle import LinearOracle, OnlineRandomForestOracle, NeuralOracle, GPOracle, AdaptiveOracle
from ucb import NeuralUCB, NeuralTS, LinUCB
import torch

In [2]:
import torch
import torch.nn as nn

class FCNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=512, output_dim=1, dropout_prob=0.25):
        super(FCNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout_prob),
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout_prob),
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout_prob),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x):
        return self.net(x)

In [3]:
import pandas as pd
import pickle

offline_model = pickle.load(open('fcnn_model.pkl', 'rb'))

df = pd.read_csv('data/stroke_data_2000.csv')

In [4]:
df.head()

Unnamed: 0,ohs6,treatment,pretrialexp,phase,age,randdelay,sbprand,dbprand,weight,glucose,...,visuospat_rand_Unknown,visuospat_rand_Yes,brainstemsigns_rand_Unknown,brainstemsigns_rand_Yes,otherdeficit_rand_Unknown,otherdeficit_rand_Yes,stroketype_Other,stroketype_PACI,stroketype_POCI,stroketype_TACI
0,1.0,0,0.809606,-3.378978,0.930334,-0.320712,0.637048,1.201647,0.169181,,...,True,False,True,False,True,False,False,True,False,False
1,0.833333,1,-1.234669,0.295828,-2.470784,-0.416129,-0.88796,-0.717437,-0.165178,-0.918008,...,False,False,False,False,False,False,False,True,False,False
2,0.666667,0,-1.234669,0.295828,-2.312592,1.246842,-1.481019,,1.774104,-0.106904,...,False,True,False,False,False,False,False,True,False,False
3,0.833333,0,0.809606,0.295828,-0.256102,-0.184403,-0.54907,2.092651,-0.499536,-0.512456,...,False,False,False,False,False,False,False,True,False,False
4,0.333333,1,-1.234669,0.295828,-1.04706,1.410412,1.272469,0.858954,0.904771,-0.512456,...,False,False,False,False,False,False,False,False,False,False


In [5]:
import pandas as pd
df = pd.read_csv('data/stroke_data_2000.csv')

context = df.drop(columns=['ohs6', 'treatment']).values
action = df['treatment'].values
reward = df['ohs6'].values

context = torch.tensor(context.astype(float))
action = torch.tensor(action.astype(float))
reward = torch.tensor(reward.astype(float))

data_stream = list(zip(context, action, reward))

In [6]:
lin_ucb_policy = LinUCB(context.shape[1], 1.0)

In [7]:
import numpy as np
import torch

offline_model(torch.tensor(np.concatenate([context[0], [1.]]).astype(float), dtype=torch.float32).view(1, -1))

tensor([[nan]], grad_fn=<AddmmBackward0>)

In [9]:
# Main experimental loop
possible_actions = [0, 1]
results = {}
for oracle in [LinearOracle, OnlineRandomForestOracle, NeuralOracle, GPOracle]:
    for policy in [lin_ucb_policy]:
        cumulative_reward = 0
        
        for t, (context, true_action, true_reward) in enumerate(data_stream):
            # Select action
            action = policy.select_action(context, possible_actions)
            
            # Get reward (true or imputed)
            if action == true_action:
                reward = true_reward
            else:
                reward = offline_model(torch.tensor(np.concatenate([context, [action]]).astype(float), dtype=torch.float32).view(1, -1))
            
            # Update policy
            policy.update(context, action, reward)
            # Ensure reward is a scalar before adding to cumulative_reward
            if isinstance(reward, torch.Tensor):
                cumulative_reward += reward.item()
            else:
                cumulative_reward += reward
            
        results[(oracle, policy)] = cumulative_reward

In [10]:
reward

tensor([[0.6813]], grad_fn=<AddmmBackward0>)