In [1]:
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset, random_split
import torch_geometric
import torch
import networkx as nx
from tqdm import tqdm
from torchmetrics.classification import BinaryConfusionMatrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from util.preprocess import findSensorActuator

device="cuda" if torch.cuda.is_available() else "cpu"
print(f"USING {device}")

USING cuda


In [2]:

# Number of rows in the DataFrame
samples = 1000
features=6
# Creating random data for the first two columns
col1 = np.random.randint(1, 100, samples)
col2 = np.random.randint(1, 100, samples)

# Define the mathematical relation for the 'Attack' column
attack = np.where(col1 + col2 > 150, 1, 0)

# Creating the DataFrame with six columns
df = pd.DataFrame({
    'col_1': col1,
    'col_2': col2,
    'attack': attack,  # Renaming Affair to Attack
}|{f"col_{i+1}":np.random.randint(1, 100, samples) for i in range(2,features)})

# Apply Min-Max Scaling, excluding the 'Attack' column
# columns_to_scale = df.columns.difference(['attack'])
# df_scaled = df.copy()
# df_scaled[columns_to_scale] = (df[columns_to_scale] - df[columns_to_scale].min()) / (df[columns_to_scale].max() - df[columns_to_scale].min())

# Display the original and scaled DataFrames
print("Original DataFrame:")
print(df)
# print("\nScaled DataFrame (with 'Attack' excluded from scaling):")
# print(df_scaled)


Original DataFrame:
     col_1  col_2  attack  col_3  col_4  col_5  col_6
0       74     56       0     80     67     73     13
1       32     42       0     62     59     53     62
2       86     60       0     86     53     22     76
3       27     58       0      8     28     58     50
4       68      6       0     81     48     21     58
..     ...    ...     ...    ...    ...    ...    ...
995     44     77       0     86     65     88     36
996     47     69       0     34     24     63     24
997     66     64       0     15     14     45     13
998      3      9       0     85     73     85     72
999     76     60       0     62     11     78     42

[1000 rows x 7 columns]


In [61]:
df=pd.read_csv("./data/batadal/test.csv")

In [63]:
findSensorActuator(df)

KeyboardInterrupt: 

In [3]:
class MyDataset(torch.utils.data.Dataset):
    """docstring for MyDataset."""
    def __init__(self, df:pd.DataFrame,device="cuda"):
        super(MyDataset, self).__init__()
        self.df=df.__deepcopy__()
        self.labels=self.df["attack"].__deepcopy__()
        self.df.drop(columns=["attack"],inplace=True)
        self.device=device
    def __len__(self):
        return self.df.shape[0]
    def __getitem__(self, index):
        return torch.tensor(self.df.iloc[index].to_numpy(),device=self.device).float()\
    ,torch.tensor([self.labels.iloc[index]],device=self.device).float()
    
# Create a Dataset
dataset=MyDataset(df,device=device)

In [4]:
def norm(train:pd.DataFrame, test:pd.DataFrame,excludes=["attack"]):
    cols=[col for col in train.columns if col not in excludes]
    normalizer = MinMaxScaler(feature_range=(0, 1)).fit(train[cols]) # scale training data to [0,1] range
    train[cols] = normalizer.transform(train[cols])
    test[cols] = normalizer.transform(test[cols])
    train=train.reindex()
    test=test.reindex()
    
    return train, test

In [5]:
# Split the dataset into train and test sets
train_df, test_df = train_test_split(df,test_size=.2,random_state=42)
train_df, test_df=norm(train_df, test_df)
train_loader=torch.utils.data.DataLoader(dataset=MyDataset(train_df),batch_size=16)    
test_loader=torch.utils.data.DataLoader(dataset=MyDataset(test_df),batch_size=16)    


In [6]:
import torch_geometric.nn
from laf_model import LAFLayer
class GCNLafConv(torch_geometric.nn.GCNConv):
    def __init__(self,  units=1, node_dim=32, **kwargs):
        super(GCNLafConv, self).__init__( **kwargs)
        self.laf = LAFLayer(units=units, kernel_initializer='random_uniform')
        self.mlp = torch.nn.Linear(node_dim*units, node_dim)
        self.dim = node_dim
        self.units = units
    
    def aggregate(self, inputs, index,**kwargs):
        x = torch.sigmoid(inputs)
        x = self.laf(x, index)
        x = x.view((-1, self.dim * self.units))
        x = self.mlp(x)
        return x



class CustomModule(torch.nn.Module):
    def __init__(self, input_features, hidden_size,device="cuda",edge_index=None):
        """
        Initialize the custom PyTorch module with adj1, embedding layer, and soil tensor.

        Args:
            input_features (int): The size of the square matrix (adj1) and embedding size.
            hidden_size (int): The size of the hidden dimension for the soil tensor.
        """
        super(CustomModule, self).__init__()
        self.input_features=input_features
        self.hidden_size=hidden_size
        # Define adj1 as a square matrix parameter
        self.linear_transformations = torch.nn.ModuleList([
            torch.nn.Linear(1, hidden_size) for _ in range(input_features)
        ])
        if edge_index is None:
            G = nx.complete_graph(input_features)
            self.adj1=torch.tensor([[[x,y] for y in l.keys() ] for x,l in G.adjacency()]).reshape(-1,2).T.reshape(2,-1).to(device)
            self.adj1_weigthts = torch.nn.Parameter(torch.ones(self.adj1.shape[-1],requires_grad=True))
        else:
            self.adj1=torch.tensor(edge_index).to(device)
            self.adj1_weigthts=None
        # Define an embedding layer
        # self.embedding = torch.nn.Embedding(in
        self.lin1=torch.nn.Linear(hidden_size,1)
        # put_features, hidden_size)
        self.gcn1=torch_geometric.nn.GATConv(hidden_size,1,add_self_loops=False)
        self.lin1=torch.nn.Linear(input_features,1)
        
    def forward(self, x:torch.Tensor):
        """
        Forward pass to compute the embedding and add the soil tensor.

        Args:
            indices (torch.Tensor): Input tensor with indices for the embedding layer.
        
        Returns:
            torch.Tensor: Resulting tensor after embedding and addition.
        """
        shape=x.shape
        if x.dim()==1:
            x.unsqueeze_(0)
        embedded_columns = [
            self.linear_transformations[i](x[:,i].unsqueeze(-1)) for i in range(self.input_features)
        ]
        x=torch.cat(embedded_columns, dim=-1).reshape(*shape,self.hidden_size)
        x=self.gcn1(x,self.adj1,self.adj1_weigthts)
        x.squeeze_(-1)
        x=torch.nn.functional.relu(x)
        x=self.lin1(x)
        x=torch.nn.functional.sigmoid(x)
        return x

    @property
    def prop(self):
        """
        Property to return adj1.

        Returns:
            torch.Tensor: The adj1 parameter (square matrix).
        """
        return self.adj1

In [7]:
model=CustomModule(features,10,edge_index=[[2,3],[4,5]])

In [8]:
epoch=50
optimizer=torch.optim.Adam(model.parameters(),lr=.001)
loss_fn=torch.nn.BCELoss(reduction="sum")

In [9]:
model=model.train().to(device=device)
t=tqdm(torch.arange(epoch),postfix="loss")
with torch.autograd.set_detect_anomaly(False):
    for e in t:
        acu_loss=0
        for i,(x,y) in enumerate(train_loader):
            optimizer.zero_grad(set_to_none=True)   
            y_pred=model(x)
            loss=loss_fn(y_pred,y)
            acu_loss+=loss.item()
            loss.backward()
            optimizer.step()
        t.set_postfix({"loss":(acu_loss/dataset.__len__())})


conf=BinaryConfusionMatrix().to(device=device)

model=model.eval()

for i,(x,y) in enumerate(test_loader):
    y_pred=model(x)
    conf.update(y_pred,y)

conf.compute()

  0%|          | 0/50 [00:01<?, ?it/s, loss]


AssertionError: Static graphs not supported in 'GATConv'

In [None]:
adj=model.adj1.detach().numpy()
w=model.adj1_weigthts.detach().numpy()

result_df=pd.DataFrame({
    "weigth":w,"x":adj[0],"y":adj[1]
})
result_df.sort_values(by="weigth",ascending=False).head()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.