## The NGNet
--*Neural G_Factor Net*--
Simple neural network used for the computation of the G_Factor, a numerical value defined by specific values of a graph built on the links of a page.

### Training Phase

The training process is quite standard and straight-forward: given the n G_features we want to directly predict the associated class.

### Employment Phase

The training model will be inserted in a wider model called X and utilized as a function for the computation of the G_Factor

In [933]:
!export CUDA_LAUNCH_BLOCKING=1

In [934]:
import torch
from torch.nn import Module
from torch import nn

In [935]:
class NGNet(Module):
    def __init__(self, num_features:int, G_dim:int, n_class:int) -> None:
        super().__init__()
        

        self.linear = nn.Linear(in_features=num_features, out_features=num_features)
        self.act1 = nn.Sigmoid()
        self.feacture = nn.Linear(in_features=num_features, out_features=G_dim)
        self.act2 = nn.Sigmoid()
        self.classifier = nn.Linear(in_features=G_dim, out_features=n_class)
        
    def forward(self, x:torch.Tensor) -> torch.Tensor:
        x = self.linear(x)
        x = self.act1(x)
        x = self.feacture(x)
        x = self.act2(x) 
        x = self.classifier(x)
        return x

## Dataset

Load the dataset

In [936]:
from Train_Classificator import Train_Classificator
from PyDataset import PyDataset
from torch.utils.data import DataLoader
from pathlib import PosixPath

In [937]:
dataset = PyDataset(PosixPath('./dataset/validation.tsv'), ['G_num_cliques','G_mean_pr','G_nodes'],  target_label='label')
dataloader = DataLoader(dataset, batch_size=16)

## Network

In [938]:
network = NGNet(num_features=3, G_dim=16, n_class=3)

## Train

In [939]:
trainer = Train_Classificator(network, torch.nn.CrossEntropyLoss(), torch.optim.Adam(params=network.parameters(), lr=0.001))

In [940]:
m = trainer.fit_and_get(dataloader, PosixPath('./NGNet/'), 100)

Epoch 1: 100%|██████████| 19/19 [00:00<00:00, 406.30it/s]
Epoch 2: 100%|██████████| 19/19 [00:00<00:00, 495.48it/s]
Epoch 3: 100%|██████████| 19/19 [00:00<00:00, 506.30it/s]
Epoch 4: 100%|██████████| 19/19 [00:00<00:00, 479.12it/s]
Epoch 5: 100%|██████████| 19/19 [00:00<00:00, 506.63it/s]
Epoch 6: 100%|██████████| 19/19 [00:00<00:00, 490.34it/s]
Epoch 7: 100%|██████████| 19/19 [00:00<00:00, 526.01it/s]
Epoch 8: 100%|██████████| 19/19 [00:00<00:00, 484.04it/s]
Epoch 9: 100%|██████████| 19/19 [00:00<00:00, 509.40it/s]
Epoch 10: 100%|██████████| 19/19 [00:00<00:00, 514.78it/s]
Epoch 11: 100%|██████████| 19/19 [00:00<00:00, 489.62it/s]
Epoch 12: 100%|██████████| 19/19 [00:00<00:00, 510.28it/s]
Epoch 13: 100%|██████████| 19/19 [00:00<00:00, 518.41it/s]
Epoch 14: 100%|██████████| 19/19 [00:00<00:00, 520.63it/s]
Epoch 15: 100%|██████████| 19/19 [00:00<00:00, 548.43it/s]
Epoch 16: 100%|██████████| 19/19 [00:00<00:00, 552.85it/s]
Epoch 17: 100%|██████████| 19/19 [00:00<00:00, 533.62it/s]
Epoch 

## Test

# X Net
--*full-no transformer model for Cultural Classification*--


In [941]:
import numpy as np
from sklearn.decomposition import SparsePCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score, precision_score


In [942]:
class XNet:
    def __init__(self, num_features:int, G_dim:int, G_weigths, num_comp:int):
        self._dimensional_red = SparsePCA(n_components=num_comp)
        self._NGN = NGNet(num_features=num_features, G_dim=G_dim, n_class=3)
        self.classifier = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=2025)
        
        
    def fit(self, base_fe, G_fe, cu_fe, y) -> None:
       
            
        fe_to_reduce = base_fe
        reduced_fe = self._dimensional_red.fit_transform(fe_to_reduce)
        embeddings = np.concat((reduced_fe, G_fe, cu_fe), axis=1)
       

        self.classifier = self.classifier.fit(embeddings, y)
    
    def predict(self, base_fe, G_fe,cu_fe, y):
        fe_to_reduce = base_fe
        reduced_fe = self._dimensional_red.transform(fe_to_reduce)
        
        embeddings = np.concat((reduced_fe, G_fe, cu_fe), axis=1)
        y_pred = self.classifier.predict(embeddings)

        print(accuracy_score(y, y_pred))
        print(recall_score(y, y_pred, average='macro'))
        print(precision_score(y, y_pred, average='macro'))
        print(f1_score(y, y_pred, average='macro'))
        return y_pred
       




            


## Load Data


In [943]:
import pandas as pd

In [944]:
dataset = pd.read_csv('dataset/validation.tsv', sep='\t', index_col=None)
dataset = dataset.drop('Unnamed: 0', axis=1)

## Prepare Data

In [945]:
y = dataset['label']
X = dataset.drop('label', axis=1)


In [946]:
X_train = X.iloc[0:250, :]
X_test = X.iloc[250:, :]

y_train = y.iloc[0:250]
y_test = y.iloc[250:]

In [947]:
X_test.head 

<bound method NDFrame.head of      reference                         name  languages     G_avg   G_nodes  \
250        3.0              single whip law        4.0  0.752577  1.000000   
251        0.0                        Siuuu        1.0  0.680412  1.000000   
252       21.0                   skateboard       10.0  0.268041  1.000000   
253       10.0                  small press        4.0  0.422680  1.000000   
254       46.0            socks and sandals        3.0  0.350515  1.000000   
255        2.0           software publisher        3.0  0.000000  0.000000   
256        1.0                   solo dance        3.0  0.000000  0.010101   
257        8.0            Soo Line Railroad        5.0  0.505376  1.000000   
258      222.0                  South Korea       10.0  0.440860  1.000000   
259        7.0                   space rock        9.0  1.000000  1.000000   
260        7.0             Stone Bell House        5.0  0.580645  1.000000   
261       12.0     Storming of the

In [948]:
G_fe = X_train[['G_nodes','G_num_cliques','G_rank','G_mean_pr']].to_numpy()
page_fe = X_train.iloc[:, 12:].to_numpy()
cu_fe = X_train[['languages', 'reference']]

KeyError: "['G_rank'] not in index"

In [None]:
model = XNet(num_features=4, G_dim=16, G_weigths=torch.load('./NGNet/checkpoint_20250422_210000.pth', weights_only=True), num_comp=8)

## Train

In [None]:
model.fit(page_fe, G_fe, cu_fe, y_train)
model.predict(page_fe, G_fe,cu_fe, y_train)

1.0
1.0
1.0
1.0


array([2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2, 1, 2,
       2, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 0, 0, 2,
       0, 2, 1, 2, 2, 2, 0, 0, 2, 2, 0, 1, 2, 1, 1, 1, 0, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 1, 0, 2, 2, 1, 0, 2, 2, 1, 0, 1, 0,
       1, 0, 1, 0, 2, 1, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 0, 0, 1,
       1, 2, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 1, 2, 1, 2, 2, 2, 1,
       1, 0, 0, 2, 2, 1, 0, 0, 2, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 1, 0,
       1, 0, 2, 0, 2, 2, 0, 2, 1, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 1, 2, 1,
       1, 2, 0, 0, 1, 0, 2, 0, 2, 0, 2, 2, 1, 0, 1, 2, 0, 1, 0, 2, 2, 1,
       2, 1, 2, 0, 0, 2, 0, 2, 2, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2, 0, 0, 2,
       2, 0, 0, 1, 0, 0, 0, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 1, 2,
       2, 0, 0, 1, 2, 1, 0, 2])

In [None]:
G_fe = X_test[['G_nodes','G_num_cliques','G_rank','G_mean_pr']].to_numpy()
page_fe = X_test.iloc[:, 12:].to_numpy()
cu_fe = X_test[['languages', 'reference']]

In [None]:
y_pred = model.predict(page_fe, G_fe,cu_fe, y_test)

0.58
0.5402777777777777
0.5476190476190476
0.535373760488177
