In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

In [None]:
class Visual_SpatialMultiHeadAttention(nn.Module):
    def __init__(self, m: int, H: int) -> None:
        super().__init__()
        self.m = m
        self.H = H
        
        #making sure m is divisible by  H, otherwise problem!
        assert m % H == 0, "dimensions of model are divisible by number of heads"
        
        self.dim_head = m // H
        self.w_q = nn.Linear(m, m, bias = False)
        self.w_k = nn.Linear(m, m, bias = False)
        self.w_v = nn.Linear(m, m, bias = False)
        self.w_o = nn.Linear(m, m, bias = False)
        
    
    @staticmethod
    def attention(query, key, value):
        dim_head = query.shape[-1]
        # Just apply the formula from the paper
        # (batch, h, seq_len, d_k) --> (batch, h, seq_len, seq_len)
        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(dim_head)
        attention_scores = attention_scores.softmax(dim=-1) # (batch, h, seq_len, seq_len) # Apply softmax
        
        # (batch, h, seq_len, seq_len) --> (batch, h, seq_len, d_k)
        # return attention scores which can be used for visualization
        return (attention_scores @ value), attention_scores

    def forward(self, q):
        query = self.w_q(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        key = self.w_k(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        value = self.w_v(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)

        # (batch, seq_len, d_model) --> (batch, seq_len, h, d_k) --> (batch, h, seq_len, d_k)
        query = query.view(query.shape[0], query.shape[1], self.H, self.dim_head).transpose(1, 2)
        key = key.view(key.shape[0], key.shape[1], self.H, self.dim_head).transpose(1, 2)
        value = value.view(value.shape[0], value.shape[1], self.H, self.dim_head).transpose(1, 2)

        # Calculate attention
        x, self.attention_scores = Visual_SpatialMultiHeadAttention.attention(query, key, value)
        
        # Combine all the heads together
        # (batch, h, seq_len, d_k) --> (batch, seq_len, h, d_k) --> (batch, seq_len, d_model)
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.H * self.dim_head)

        # Multiply by Wo
        # (batch, seq_len, d_model) --> (batch, seq_len, d_model)  
        return self.w_o(x)

In [None]:
class Visual_ChannelMultiHeadAttention(nn.Module):
    def __init__(self, s: int, H: int) -> None:
        super().__init__()
        self.s = s
        self.H = H

        #making sure s is divisible by  H, otherwise problem!
        assert s%H == 0, "dimensions of model are divisble by number of heads"

        self.dim_head = s // H
        self.w_q = nn.Linear(s, s, bias = False)
        self.w_k = nn.Linear(s, s, bias = False)
        self.w_v = nn.Linear(s, s, bias = False)
        self.w_o = nn.Linear(s, s, bias = False)

    @staticmethod
    def attention(query, key, value):
        dim_head = query.shape[-1]
        # Just apply the formula from the paper
        # (batch, h, seq_len, d_k) --> (batch, h, seq_len, seq_len)
        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(dim_head)
        attention_scores = attention_scores.softmax(dim=-1) # (batch, h, seq_len, seq_len) # Apply softmax
        
        # (batch, h, seq_len, seq_len) --> (batch, h, seq_len, d_k)
        # return attention scores which can be used for visualization
        return (attention_scores @ value), attention_scores

    def forward(self, q):
        query = self.w_q(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        key = self.w_k(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        value = self.w_v(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)

        # (batch, seq_len, d_model) --> (batch, seq_len, h, d_k) --> (batch, h, seq_len, d_k)
        query = query.view(query.shape[0], query.shape[1], self.H, self.dim_head).transpose(1, 2)
        key = key.view(key.shape[0], key.shape[1], self.H, self.dim_head).transpose(1, 2)
        value = value.view(value.shape[0], value.shape[1], self.H, self.dim_head).transpose(1, 2)

        # Calculate attention
        x, self.attention_scores = Visual_ChannelMultiHeadAttention.attention(query, key, value)
        
        # Combine all the heads together
        # (batch, h, seq_len, d_k) --> (batch, seq_len, h, d_k) --> (batch, seq_len, d_model)
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.H * self.dim_head)

        # Multiply by Wo
        # (batch, seq_len, d_model) --> (batch, seq_len, d_model)  
        return self.w_o(x)
        
        


In [None]:
class Visual_TemporalMultiHeadAttention(nn.Module):
    def __init__(self, m: int, H: int) -> None:
        super().__init__()
        self.H = H
        self.m = m

        #making sure s is divisible by  H, otherwise problem!
        assert m%H == 0, "dimensions of model are divisble by number of heads"

        self.dim_head = m // H
        self.w_q = nn.Linear(m, m, bias = False)
        self.w_k = nn.Linear(m, m, bias = False)
        self.w_v = nn.Linear(m, m, bias = False)
        self.w_o = nn.Linear(m, self.dim_head*2, bias = False)

    @staticmethod
    def attention(query, key, value):
        dim_head = query.shape[-1]
        # Just apply the formula from the paper
        # (batch, h, seq_len, d_k) --> (batch, h, seq_len, seq_len)
        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(dim_head)
        attention_scores = attention_scores.softmax(dim=-1) # (batch, h, seq_len, seq_len) # Apply softmax
        
        # (batch, h, seq_len, seq_len) --> (batch, h, seq_len, d_k)
        # return attention scores which can be used for visualization
        return (attention_scores @ value), attention_scores

    def forward(self, q):
        query = self.w_q(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        key = self.w_k(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        value = self.w_v(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)

        # (batch, seq_len, d_model) --> (batch, seq_len, h, d_k) --> (batch, h, seq_len, d_k)
        query = query.view(query.shape[0], query.shape[1], self.H, self.dim_head).transpose(1, 2)
        key = key.view(key.shape[0], key.shape[1], self.H, self.dim_head).transpose(1, 2)
        value = value.view(value.shape[0], value.shape[1], self.H, self.dim_head).transpose(1, 2)

        # Calculate attention
        x, self.attention_scores = Visual_TemporalMultiHeadAttention.attention(query, key, value)
        
        # Combine all the heads together
        # (batch, h, seq_len, d_k) --> (batch, seq_len, h, d_k) --> (batch, seq_len, d_model)
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.H * self.dim_head)

        # Multiply by Wo
        # (batch, seq_len, d_model) --> (batch, seq_len, d_model)  
        return self.w_o(x)
        
        
        

In [None]:
class SpatialAveragePooling(nn.Module):
    def __init__(self):
        super(SpatialAveragePooling, self).__init__()

    def forward(self, x):
        #dimension of x --> N x m x s
        pooled = torch.mean(x, dim=2)
        return pooled


In [None]:
class Visual_BeforeCross(nn.Module):
    def __init__(self, spatial: Visual_SpatialMultiHeadAttention, channel: Visual_ChannelMultiHeadAttention, sap: SpatialAveragePooling, temporal:Visual_TemporalMultiHeadAttention) -> None:
        super().__init__()
        self.spatial_selfAttention = spatial
        self.channel_selfAttention = channel
        self.pool = sap
        self.temporal_selfAttention = temporal

        #making sure s is divisible by  H, otherwise problem!
        # assert m%H == 0, "dimensions of model are divisble by number of heads"

    def forward(self, x):
        x = self.spatial_selfAttention(x)
        print("After Spatial")
        print(x.shape)
        x = x.transpose(1,2)
        x = self.channel_selfAttention(x)
        print("After Channel")
        print(x.shape)
        x = self.pool(x)
        print("After pool")
        print(x.shape)
        x = x.unsqueeze(1)
        x = self.temporal_selfAttention(x)
        print("After Temporal")
        x= x.squeeze(1)
        print(x.shape)
        return x

In [None]:
m = 2048
s = 64
H = 8

input_tensor = torch.randn(6, s, m)

spatialSA = Visual_SpatialMultiHeadAttention(m, H)

channelSA = Visual_ChannelMultiHeadAttention(s, H)

sap = SpatialAveragePooling() 

temporalSA = Visual_TemporalMultiHeadAttention(m, H)

v = Visual_BeforeCross(spatialSA, channelSA, sap, temporalSA)
output_tensor = v.forward(input_tensor)

In [None]:
class AV_CrossAttention(nn.Module):
    def __init__(self, m: int) -> None:
        super().__init__()
        self.m = m
        self.w = nn.Linear(m, m, bias = False)

    def forward(self, audio_data, video_data):
        Corr =  self.w(audio_data) @ video_data.transpose(0,1) # N*m * m*m * m*N -> N*N
        
        w_audio = F.softmax(Corr, dim =0)
        w_video = F.softmax(Corr.transpose(0,1), dim = 0)
        Dvideo = w_video @ video_data
        Daudio = w_audio @ audio_data
        
        DCorrVideo = torch.tanh(Dvideo + video_data)
        DCorrAudio = torch.tanh(Daudio + audio_data)
       
        return torch.cat((DCorrVideo,DCorrAudio), dim = 1)
        
        



N = 6
m = 512
audio_tensor = torch.randn(N, m)
video_tensor = torch.randn(N, m)
avCA = AV_CrossAttention(m)
output = avCA(audio_tensor, video_tensor)
print(output.shape)




In [None]:
flattened_output = output.view(-1)
features = flattened_output.shape[0]

out_classes = 8

fc_layer = nn.Linear(features, out_classes)

In [None]:
class Visual_SpatialMultiHeadAttention(nn.Module):
    def __init__(self, m: int, H: int) -> None:
        super().__init__()
        self.m = m
        self.H = H
        
        #making sure m is divisible by  H, otherwise problem!
        assert m % H == 0, "dimensions of model are divisible by number of heads"
        
        self.dim_head = m // H
        self.w_q = nn.Linear(m, m, bias = False)
        self.w_k = nn.Linear(m, m, bias = False)
        self.w_v = nn.Linear(m, m, bias = False)
        self.w_o = nn.Linear(m, m, bias = False)
        
    
    @staticmethod
    def attention(query, key, value):
        dim_head = query.shape[-1]
        # Just apply the formula from the paper
        # (batch, h, seq_len, d_k) --> (batch, h, seq_len, seq_len)
        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(dim_head)
        attention_scores = attention_scores.softmax(dim=-1) # (batch, h, seq_len, seq_len) # Apply softmax
        
        # (batch, h, seq_len, seq_len) --> (batch, h, seq_len, d_k)
        # return attention scores which can be used for visualization
        return (attention_scores @ value), attention_scores

    def forward(self, q):
        query = self.w_q(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        key = self.w_k(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        value = self.w_v(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)

        # (batch, seq_len, d_model) --> (batch, seq_len, h, d_k) --> (batch, h, seq_len, d_k)
        query = query.view(query.shape[0], query.shape[1], self.H, self.dim_head).transpose(1, 2)
        key = key.view(key.shape[0], key.shape[1], self.H, self.dim_head).transpose(1, 2)
        value = value.view(value.shape[0], value.shape[1], self.H, self.dim_head).transpose(1, 2)

        # Calculate attention
        x, self.attention_scores = Visual_SpatialMultiHeadAttention.attention(query, key, value)
        
        # Combine all the heads together
        # (batch, h, seq_len, d_k) --> (batch, seq_len, h, d_k) --> (batch, seq_len, d_model)
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.H * self.dim_head)

        # Multiply by Wo
        # (batch, seq_len, d_model) --> (batch, seq_len, d_model)  
        return self.w_o(x)

In [None]:
class Visual_TemporalMultiHeadAttention(nn.Module):
    def __init__(self, m: int, H: int) -> None:
        super().__init__()
        self.H = H
        self.m = m

        #making sure s is divisible by  H, otherwise problem!
        assert m%H == 0, "dimensions of model are divisble by number of heads"

        self.dim_head = m // H
        self.w_q = nn.Linear(m, m, bias = False)
        self.w_k = nn.Linear(m, m, bias = False)
        self.w_v = nn.Linear(m, m, bias = False)
        self.w_o = nn.Linear(m, self.dim_head*2, bias = False)

    @staticmethod
    def attention(query, key, value):
        dim_head = query.shape[-1]
        # Just apply the formula from the paper
        # (batch, h, seq_len, d_k) --> (batch, h, seq_len, seq_len)
        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(dim_head)
        attention_scores = attention_scores.softmax(dim=-1) # (batch, h, seq_len, seq_len) # Apply softmax
        
        # (batch, h, seq_len, seq_len) --> (batch, h, seq_len, d_k)
        # return attention scores which can be used for visualization
        return (attention_scores @ value), attention_scores

    def forward(self, q):
        query = self.w_q(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        key = self.w_k(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        value = self.w_v(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)

        # (batch, seq_len, d_model) --> (batch, seq_len, h, d_k) --> (batch, h, seq_len, d_k)
        query = query.view(query.shape[0], query.shape[1], self.H, self.dim_head).transpose(1, 2)
        key = key.view(key.shape[0], key.shape[1], self.H, self.dim_head).transpose(1, 2)
        value = value.view(value.shape[0], value.shape[1], self.H, self.dim_head).transpose(1, 2)

        # Calculate attention
        x, self.attention_scores = Visual_TemporalMultiHeadAttention.attention(query, key, value)
        
        # Combine all the heads together
        # (batch, h, seq_len, d_k) --> (batch, seq_len, h, d_k) --> (batch, seq_len, d_model)
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.H * self.dim_head)

        # Multiply by Wo
        # (batch, seq_len, d_model) --> (batch, seq_len, d_model)  
        return self.w_o(x)
        
        
        

In [None]:
class Visual_BeforeCross(nn.Module):
    def __init__(self, spatial: Visual_SpatialMultiHeadAttention, channel: Visual_ChannelMultiHeadAttention, sap: SpatialAveragePooling, temporal:Visual_TemporalMultiHeadAttention) -> None:
        super().__init__()
        self.spatial_selfAttention = spatial
        self.channel_selfAttention = channel
        self.pool = sap
        self.temporal_selfAttention = temporal

        #making sure s is divisible by  H, otherwise problem!
        # assert m%H == 0, "dimensions of model are divisble by number of heads"

    def forward(self, x):
        x = self.spatial_selfAttention(x)
        print("After Spatial")
        print(x.shape)
        x = x.transpose(1,2)
        x = self.channel_selfAttention(x)
        print("After Channel")
        print(x.shape)
        x = self.pool(x)
        print("After pool")
        print(x.shape)
        x = x.unsqueeze(1)
        x = self.temporal_selfAttention(x)
        print("After Temporal")
        x= x.squeeze(1)
        print(x.shape)
        return x

In [None]:
class AV_CrossAttention(nn.Module):
    def __init__(self, m: int) -> None:
        super().__init__()
        self.m = m
        self.w = nn.Linear(m, m, bias = False)

    def forward(self, audio_data, video_data):
        Corr =  self.w(audio_data) @ video_data.transpose(0,1) # N*m * m*m * m*N -> N*N
        
        w_audio = F.softmax(Corr, dim =0)
        w_video = F.softmax(Corr.transpose(0,1), dim = 0)
        Dvideo = w_video @ video_data
        Daudio = w_audio @ audio_data
        
        DCorrVideo = torch.tanh(Dvideo + video_data)
        DCorrAudio = torch.tanh(Daudio + audio_data)
       
        return torch.cat((DCorrVideo,DCorrAudio), dim = 1)
        
        



N = 6
m = 512
audio_tensor = torch.randn(N, m)
video_tensor = torch.randn(N, m)
avCA = AV_CrossAttention(m)
output = avCA(audio_tensor, video_tensor)
print(output.shape)




In [None]:
class Visual_ChannelMultiHeadAttention(nn.Module):
    def __init__(self, s: int, H: int) -> None:
        super().__init__()
        self.s = s
        self.H = H

        #making sure s is divisible by  H, otherwise problem!
        assert s%H == 0, "dimensions of model are divisble by number of heads"

        self.dim_head = s // H
        self.w_q = nn.Linear(s, s, bias = False)
        self.w_k = nn.Linear(s, s, bias = False)
        self.w_v = nn.Linear(s, s, bias = False)
        self.w_o = nn.Linear(s, s, bias = False)

    @staticmethod
    def attention(query, key, value):
        dim_head = query.shape[-1]
        # Just apply the formula from the paper
        # (batch, h, seq_len, d_k) --> (batch, h, seq_len, seq_len)
        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(dim_head)
        attention_scores = attention_scores.softmax(dim=-1) # (batch, h, seq_len, seq_len) # Apply softmax
        
        # (batch, h, seq_len, seq_len) --> (batch, h, seq_len, d_k)
        # return attention scores which can be used for visualization
        return (attention_scores @ value), attention_scores

    def forward(self, q):
        query = self.w_q(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        key = self.w_k(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        value = self.w_v(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)

        # (batch, seq_len, d_model) --> (batch, seq_len, h, d_k) --> (batch, h, seq_len, d_k)
        query = query.view(query.shape[0], query.shape[1], self.H, self.dim_head).transpose(1, 2)
        key = key.view(key.shape[0], key.shape[1], self.H, self.dim_head).transpose(1, 2)
        value = value.view(value.shape[0], value.shape[1], self.H, self.dim_head).transpose(1, 2)

        # Calculate attention
        x, self.attention_scores = Visual_ChannelMultiHeadAttention.attention(query, key, value)
        
        # Combine all the heads together
        # (batch, h, seq_len, d_k) --> (batch, seq_len, h, d_k) --> (batch, seq_len, d_model)
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.H * self.dim_head)

        # Multiply by Wo
        # (batch, seq_len, d_model) --> (batch, seq_len, d_model)  
        return self.w_o(x)
        
        


In [None]:
class SpatialAveragePooling(nn.Module):
    def __init__(self):
        super(SpatialAveragePooling, self).__init__()

    def forward(self, x):
        #dimension of x --> N x m x s
        pooled = torch.mean(x, dim=2)
        return pooled


In [None]:
m = 2048
s = 64
H = 8

input_tensor = torch.randn(6, s, m)

spatialSA = Visual_SpatialMultiHeadAttention(m, H)

channelSA = Visual_ChannelMultiHeadAttention(s, H)

sap = SpatialAveragePooling() 

temporalSA = Visual_TemporalMultiHeadAttention(m, H)

v = Visual_BeforeCross(spatialSA, channelSA, sap, temporalSA)
output_tensor = v.forward(input_tensor)

In [None]:
flattened_output = output.view(-1)
features = flattened_output.shape[0]

out_classes = 8

fc_layer = nn.Linear(features, out_classes)

In [4]:
class Visual_SpatialMultiHeadAttention(nn.Module):
    def __init__(self, m: int, H: int) -> None:
        super().__init__()
        self.m = m
        self.H = H
        
        #making sure m is divisible by  H, otherwise problem!
        assert m % H == 0, "dimensions of model are divisible by number of heads"
        
        self.dim_head = m // H
        self.w_q = nn.Linear(m, m, bias = False)
        self.w_k = nn.Linear(m, m, bias = False)
        self.w_v = nn.Linear(m, m, bias = False)
        self.w_o = nn.Linear(m, m, bias = False)
        
    
    @staticmethod
    def attention(query, key, value):
        dim_head = query.shape[-1]
        # Just apply the formula from the paper
        # (batch, h, seq_len, d_k) --> (batch, h, seq_len, seq_len)
        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(dim_head)
        attention_scores = attention_scores.softmax(dim=-1) # (batch, h, seq_len, seq_len) # Apply softmax
        
        # (batch, h, seq_len, seq_len) --> (batch, h, seq_len, d_k)
        # return attention scores which can be used for visualization
        return (attention_scores @ value), attention_scores

    def forward(self, q):
        query = self.w_q(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        key = self.w_k(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        value = self.w_v(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)

        # (batch, seq_len, d_model) --> (batch, seq_len, h, d_k) --> (batch, h, seq_len, d_k)
        query = query.view(query.shape[0], query.shape[1], self.H, self.dim_head).transpose(1, 2)
        key = key.view(key.shape[0], key.shape[1], self.H, self.dim_head).transpose(1, 2)
        value = value.view(value.shape[0], value.shape[1], self.H, self.dim_head).transpose(1, 2)

        # Calculate attention
        x, self.attention_scores = Visual_SpatialMultiHeadAttention.attention(query, key, value)
        
        # Combine all the heads together
        # (batch, h, seq_len, d_k) --> (batch, seq_len, h, d_k) --> (batch, seq_len, d_model)
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.H * self.dim_head)

        # Multiply by Wo
        # (batch, seq_len, d_model) --> (batch, seq_len, d_model)  
        return self.w_o(x)

In [8]:
class Visual_BeforeCross(nn.Module):
    def __init__(self, spatial: Visual_SpatialMultiHeadAttention, channel: Visual_ChannelMultiHeadAttention, sap: SpatialAveragePooling, temporal:Visual_TemporalMultiHeadAttention) -> None:
        super().__init__()
        self.spatial_selfAttention = spatial
        self.channel_selfAttention = channel
        self.pool = sap
        self.temporal_selfAttention = temporal

        #making sure s is divisible by  H, otherwise problem!
        # assert m%H == 0, "dimensions of model are divisble by number of heads"

    def forward(self, x):
        x = self.spatial_selfAttention(x)
        print("After Spatial")
        print(x.shape)
        x = x.transpose(1,2)
        x = self.channel_selfAttention(x)
        print("After Channel")
        print(x.shape)
        x = self.pool(x)
        print("After pool")
        print(x.shape)
        x = x.unsqueeze(1)
        x = self.temporal_selfAttention(x)
        print("After Temporal")
        x= x.squeeze(1)
        print(x.shape)
        return x

tensor([[[[-1.5324,  0.6976,  0.4354, -0.2854],
          [ 0.5968, -0.2856,  1.1082, -1.3365],
          [-0.0670,  2.0308, -0.3248, -0.3032]],

         [[-0.2464,  0.8150, -0.3019, -0.3060],
          [ 0.9260,  0.7585,  1.3252,  0.6573],
          [ 3.6754, -0.2269, -0.2730, -0.3768]]]])
tensor([[-0.2737,  0.5084,  0.3569, -0.0329,  0.5556],
        [-0.8872,  0.5788, -0.9865,  0.2042, -0.5409],
        [-0.2331,  1.9347,  0.3152,  1.2046, -0.4526],
        [-0.1262, -0.2897, -1.2247, -1.4188, -0.3763]])
torch.Size([1, 2, 3, 5])
tensor([[[[-2.6496e-01,  5.4976e-01, -7.4832e-01,  1.1223e+00, -1.3184e+00],
          [ 5.4759e-04,  2.6693e+00,  2.4808e+00,  3.1531e+00,  4.8746e-01],
          [-1.6694e+00,  6.0084e-01, -1.7584e+00,  4.5579e-01, -8.7468e-01]],

         [[-5.4661e-01, -1.4900e-01, -6.1234e-01,  2.4494e-01, -3.2597e-01],
          [-1.3182e+00,  3.2831e+00, -8.0503e-01,  7.8822e-01, -7.4291e-01],
          [-6.9340e-01,  1.3183e+00,  1.9109e+00,  3.8389e-02,  2.4300e+00