In [1]:
import pytorch_lightning as pl
from pytorch_lightning import Trainer

from collections import defaultdict

from sklearn.model_selection import KFold

from torch.utils.data import Dataset, TensorDataset, DataLoader, RandomSampler,SequentialSampler

from transformers import DistilBertTokenizer
from transformers import DistilBertModel, DistilBertPreTrainedModel
from transformers import get_linear_schedule_with_warmup

from torch.nn import CrossEntropyLoss

import torch
import torch.nn as nn
from torch.optim import AdamW

from PIL import Image
import torchvision.transforms as transforms

import numpy as np
from scipy.special import softmax
from scipy.special import logit
from sklearn.linear_model import LogisticRegression 

from tqdm import tqdm
import math

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
CUDA = (torch.cuda.device_count() > 0)
MASK_IDX = 103

In [3]:
def platt_scale(outcome,probs):
    logits = logit(probs)
    logits = logits.reshape(-1,1)
    log_reg = LogisticRegression(penalty='none', warm_start = True, solver = 'lbfgs' )
    log_reg.fit(logits, outcome)
    return log_reg.predict_proba(logits)

def gelu(x):
    return 0.5 * x * (1.0 + torch.erf(x/math.sqrt(2.0)))

In [4]:
def make_confound_vector(ids, vocab_size, use_counts = False):
    vec = torch.zeros(ids.shape[0],vocab_size)
    ones = torch.ones_like(ids,dtype = torch.float)
    
    print("vec_ones")
    print(vec)
    print(ones)
    if CUDA:
        vec = vec.cuda()
        ones = ones.cuda()
        ids = ids.cuda()
    print("scatter_add_")
    #vec.scatter_add_(1, ids,ones)
    print(vec)
    vec[:,1] = 0.0
    if not use_counts:
        vec = (vec != 0).float()
    return vec.float()

In [5]:
import timm
import torch
from torch import nn

class ImageCausalModel(nn.Module):
    """The model itself."""
    def __init__(self, num_labels = 2,pretrained_model_names = "resnet50"):
        super().__init__()

        self.num_labels = num_labels

        self.base_model = timm.create_model(pretrained_model_names,pretrained = True)
        self.base_model.fc = nn.Identity()

        # 因果推論用の追加レイヤー
        self.classifier = nn.Linear(self.base_model.num_features, num_labels)
        self.Q_cls = nn.ModuleDict()

        # self.base_model.num_features は、事前学習済みの画像モデルからの特徴量サイズです。
        input_size = self.base_model.num_features + self.num_labels

        for T in range(2):
            # ModuleDict keys have to be strings..
            self.Q_cls['%d' % T] = nn.Sequential(
                nn.Linear(input_size, 200),
                nn.ReLU(),
                nn.Linear(200, self.num_labels))
        

        self.g_cls = nn.Linear(self.base_model.num_features + self.num_labels, 
            self.num_labels)

        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
    
    def forward(self,images, confounds, treatment=None, outcome = None):
        features = self.base_model(images)
        # print("features")
        # print("C", confounds.shape)
        # print(confounds)
        # print(confounds.unsqueeze(1).shape)
        C = make_confound_vector(confounds.unsqueeze(1), self.num_labels)
        # print("C",C.shape)  
        print(features.shape)
        print(confounds.shape)
        print(confounds.unsqueeze(1))
        print(confounds.unsqueeze(1).shape)
        inputs = torch.cat((features, C), dim =  1)

        print(inputs.shape)
        g_logits = self.g_cls(inputs)
        g_prob = torch.sigmoid(g_logits)

        Q_logits_T0 = self.Q_cls['0'](inputs)
        Q_logits_T1 = self.Q_cls['1'](inputs)

        Q_prob_T0 = torch.sigmoid(Q_logits_T0)
        Q_prob_T1 = torch.sigmoid(Q_logits_T1)
        if outcome is not None:
            return g_prob, Q_prob_T0, Q_prob_T1, g_logits, Q_logits_T0, Q_logits_T1
        else:
            return g_prob, Q_prob_T0, Q_prob_T1,
        

In [6]:
class CausalImageModelWrapper:
    def __init__(self, g_weight=1.0, Q_weight=0.1, mlm_weight=1.0, batch_size=32):
        self.model = ImageCausalModel(num_labels=2, pretrained_model_names="resnet50")
        if CUDA:
            self.model = self.model.cuda()

        self.loss_weights = {
            'g': g_weight,
            'Q': Q_weight
        }
        self.batch_size = batch_size

    def train(self,images, confounds, treatments, outcomes , learning_rate = 2e-5, epochs  = 3):
        dataloader = self.build_dataloader(images, confounds, treatments, outcomes, batch_size = self.batch_size)
        self.model.train()
        optimizer = AdamW(self.model.parameters(), lr = learning_rate, eps = 1e-8)
        total_steps = len(dataloader) * epochs
        warmup_steps = total_steps * 0.1
        scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps = warmup_steps,num_training_steps = total_steps)

        for epoch in range(epochs):
            losses = []
            for batch in dataloader:
                if CUDA:
                    batch = tuple(x.cuda() for x in batch)
                images, confounds, treatments, outcomes = batch

                self.model.zero_grad()
                g_prob, Q_prob_T0, Q_prob_T1, g_logits, Q_logits_T0, Q_logits_T1 = self.model(images, confounds, treatments, outcomes)
                g_loss = CrossEntropyLoss()(g_logits, treatments)
                Q_loss_T0 = CrossEntropyLoss()(Q_logits_T0, outcomes)
                Q_loss_T1 = CrossEntropyLoss()(Q_logits_T1, outcomes)

                loss = self.loss_weights['g'] * g_loss + self.loss_weights['Q'] * (Q_loss_T0 + Q_loss_T1)

                loss.backward()
                optimizer.step()
                scheduler.step()
                losses.append(loss.detach().cpu().item())
            
        return self.model
    
    def inference(self, images, confounds, outcome = None):
        self.model.eval()
        dataloader = self.build_dataloader(images, confounds,outcomes = outcome,
                                           sampler = 'sequential')
        Q0s = []
        Q1s = []
        Ys = []
        for i, batch in tqdm(enumerate(dataloader),total = len(dataloader)):
            if CUDA: 
                batch = (x.cuda() for x in batch)
            images, confounds, outcomes = batch
            g_prob, Q0, Q1 = self.model(images, confounds, outcomes = outcomes)
            Q0s += Q0.detach().cpu().numpy().tolist()
            Q1s += Q1.detach().cpu().numpy().tolist()
            Ys += outcomes.detach().cpu().numpy().tolist()

            ## [todo] inferenceメソッドの形式?
        probs = np.array(list(zip(Q0s, Q1s)))
        preds = np.argmax(probs, axis = 1)    
        return probs, preds,Ys
    
    def ATE(self,C,image, Y = None, platt_scaling = False):
        ## [todo] ATEの計算方法
        Q_probs,_,Ys = self.inference(image,C,outcome = Y)
        if platt_scaling and Y is not None:
            Q0 = platt_scale(Ys, Q_probs[:,0])[:,0]
            Q1 = platt_scale(Ys, Q_probs[:,1])[:,1]
        else:
            Q0 = Q_probs[:,0]
            Q1 = Q_probs[:,1]
        return np.mean(Q0 - Q1)

    def build_dataloader(self,image_paths, confounds, treatments = None, outcomes = None,batch_size = 32):
        dataset = CausalImageDataset(image_paths, confounds, treatments, outcomes)
        sampler = RandomSampler(dataset) if treatments is not None else SequentialSampler(dataset)
        dataloader = DataLoader(dataset, batch_size = batch_size,sampler = sampler)
        return dataloader
    


In [7]:
class CausalImageDataset(Dataset):
    def __init__(self,image_paths, confounds, treatments = None, outcomes = None,transform = None):
        self.image_paths = image_paths
        self.confounds = confounds
        self.treatments = treatments
        self.outcomes = outcomes

        if transform is None:
            self.transform = transforms.Compose(
                [
                    transforms.Resize((224,224)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406] , std = [0.229, 0.224, 0.225])
                ]
            )
        else:
            self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self,idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        image = self.transform(image)
        
        confounds = self.confounds[idx]
        treatment = self.treatments[idx] if self.treatments is not None else -1
        outcome = self.outcomes[idx] if self.outcomes is not None else -1
        return image , confounds, treatment, outcome

    

In [8]:
import sys 
sys.path.append("../../")
import pandas as pd
df = pd.read_csv("../input/outputs.csv")
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,name,main_category,sub_category,image,link,ratings,no_of_ratings,discount_price,actual_price,img_path,actual_price_yen,embedding_path,embedding,price_ave,output
0,0,72,AmazonBasics High Speed 55 Watt Oscillating Pe...,appliances,All Appliances,https://m.media-amazon.com/images/I/71QfUcEOg8...,https://www.amazon.in/AmazonBasics-400mm-Pedes...,4.1,6113,"₹2,099",3300.0,/root/graduation_thetis/causal-bert-pytorch/in...,363000.0,/root/graduation_thetis/causal-bert-pytorch/in...,29.492722,1,1863.892722
1,1,73,Farberware Mini Blender Fruit Mixer Machine Po...,appliances,All Appliances,https://m.media-amazon.com/images/I/716mmFt0PG...,https://www.amazon.in/Farberware-Portable-Elec...,2.9,6071,₹499,1199.0,/root/graduation_thetis/causal-bert-pytorch/in...,131890.0,/root/graduation_thetis/causal-bert-pytorch/in...,68.038506,0,1889.338506
2,2,74,PHILIPS Handheld Garment Steamer STH3000/20 - ...,appliances,All Appliances,https://m.media-amazon.com/images/I/71W2XPQdBq...,https://www.amazon.in/PHILIPS-Handheld-Garment...,4.0,1553,"₹3,995",4095.0,/root/graduation_thetis/causal-bert-pytorch/in...,450450.0,/root/graduation_thetis/causal-bert-pytorch/in...,43.871647,1,510.271647
3,3,75,"Cookwell Bullet Mixer Grinder (5 Jars, 3 Blade...",appliances,All Appliances,https://m.media-amazon.com/images/I/81yobRRV8n...,https://www.amazon.in/Cookwell-Bullet-Mixer-Gr...,4.1,9592,"₹2,479",6000.0,/root/graduation_thetis/causal-bert-pytorch/in...,660000.0,/root/graduation_thetis/causal-bert-pytorch/in...,45.319656,1,2923.419656
4,4,76,"Bajaj ATX 4 750-Watt Pop-up Toaster, 2-Slice A...",appliances,All Appliances,https://m.media-amazon.com/images/I/51D5T7TGVb...,https://www.amazon.in/Bajaj-ATX-750-Watt-Pop-u...,4.3,9520,"₹1,499",2250.0,/root/graduation_thetis/causal-bert-pytorch/in...,247500.0,/root/graduation_thetis/causal-bert-pytorch/in...,51.195602,0,2907.195602


In [9]:
import pandas as pd
df = pd.read_csv("../input/outputs.csv")
print(df["no_of_ratings"].shape)

(5111,)


In [10]:
ci = CausalImageModelWrapper(batch_size = 2, g_weight=0.1, Q_weight=0.1)
ci.train(df["img_path"],df["no_of_ratings"], df["price_ave"], df["output"],epochs = 1)
print(ci.ATE(df["price_ave"], df["img_path"], platt_scaling = False))

vec_ones
tensor([[0., 0.],
        [0., 0.]])
tensor([[1.],
        [1.]], device='cuda:0')
scatter_add_
tensor([[0., 0.],
        [0., 0.]], device='cuda:0')
torch.Size([2, 2048])
torch.Size([2])
tensor([[ 3],
        [20]], device='cuda:0')
torch.Size([2, 1])
torch.Size([2, 2050])


RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Double'