## The NGNet
--*Neural G_Factor Net*--
Simple neural network used for the computation of the G_Factor, a numerical value defined by specific values of a graph built on the links of a page.

### Training Phase

The training process is quite standard and straight-forward: given the n G_features we want to directly predict the associated class.

### Employment Phase

The training model will be inserted in a wider model called X and utilized as a function for the computation of the G_Factor

In [None]:
!export CUDA_LAUNCH_BLOCKING=1

In [None]:
import torch
from torch.nn import Module
from torch import nn

In [None]:
class NGNet(Module):
    def __init__(self, num_features:int, G_dim:int, n_class:int) -> None:
        super().__init__()
        

        self.linear = nn.Linear(in_features=num_features, out_features=num_features)
        self.act1 = nn.Sigmoid()
        self.feacture = nn.Linear(in_features=num_features, out_features=G_dim)
        self.act2 = nn.Sigmoid()
        self.classifier = nn.Linear(in_features=G_dim, out_features=n_class)
        
    def forward(self, x:torch.Tensor) -> torch.Tensor:
        x = self.linear(x)
        x = self.act1(x)
        x = self.feacture(x)
        x = self.act2(x) 
        x = self.classifier(x)
        return x

## Dataset

Load the dataset

In [None]:
from Train_Classificator import Train_Classificator
from PyDataset import PyDataset
from torch.utils.data import DataLoader
from pathlib import PosixPath

In [None]:
dataset = PyDataset(PosixPath('./dataset/validation.tsv'), ['G_num_cliques','G_mean_pr','G_nodes'],  target_label='label')
dataloader = DataLoader(dataset, batch_size=16)

## Network

In [None]:
network = NGNet(num_features=3, G_dim=16, n_class=3)

## Train

In [None]:
trainer = Train_Classificator(network, torch.nn.CrossEntropyLoss(), torch.optim.Adam(params=network.parameters(), lr=0.001))

In [None]:
m = trainer.fit_and_get(dataloader, PosixPath('./NGNet/'), 100)

## Test

# X Net
--*full-no transformer model for Cultural Classification*--


In [None]:
import numpy as np
from sklearn.decomposition import SparsePCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score, precision_score


In [None]:
class XNet:
    def __init__(self, num_features:int, G_dim:int, G_weigths, num_comp:int):
        self._dimensional_red = SparsePCA(n_components=num_comp)
        self._NGN = NGNet(num_features=num_features, G_dim=G_dim, n_class=3)
        self.classifier = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=2025)
        
        
    def fit(self, base_fe, G_fe, cu_fe, y) -> None:
            
        fe_to_reduce = base_fe
        reduced_fe = self._dimensional_red.fit_transform(fe_to_reduce)
        embeddings = np.concat((reduced_fe, G_fe, cu_fe), axis=1)

        self.classifier = self.classifier.fit(embeddings, y)
    
    def predict(self, base_fe, G_fe,cu_fe, y):
        fe_to_reduce = base_fe
        reduced_fe = self._dimensional_red.transform(fe_to_reduce)
        
        embeddings = np.concat((reduced_fe, G_fe, cu_fe), axis=1)
        y_pred = self.classifier.predict(embeddings)

        print(accuracy_score(y, y_pred))
        print(recall_score(y, y_pred, average='macro'))
        print(precision_score(y, y_pred, average='macro'))
        print(f1_score(y, y_pred, average='macro'))
        return y_pred

## Load Data


In [None]:
import pandas as pd

In [None]:
dataset = pd.read_csv('dataset/validation.tsv', sep='\t', index_col=None)
dataset = dataset.drop('Unnamed: 0', axis=1)

## Prepare Data

In [None]:
y = dataset['label']
X = dataset.drop('label', axis=1)


In [None]:
X_train = X.iloc[0:250, :]
X_test = X.iloc[250:, :]

y_train = y.iloc[0:250]
y_test = y.iloc[250:]

In [None]:
X_test.head 

In [None]:
G_fe = X_train[['G_nodes','G_num_cliques','G_rank','G_mean_pr']].to_numpy()
page_fe = X_train.iloc[:, 12:].to_numpy()
cu_fe = X_train[['languages', 'reference']]

In [None]:
model = XNet(num_features=4, G_dim=16, G_weigths=torch.load('./NGNet/checkpoint_20250422_210000.pth', weights_only=True), num_comp=8)

## Train

In [None]:
model.fit(page_fe, G_fe, cu_fe, y_train)
model.predict(page_fe, G_fe,cu_fe, y_train)

In [None]:
G_fe = X_test[['G_nodes','G_num_cliques','G_rank','G_mean_pr']].to_numpy()
page_fe = X_test.iloc[:, 12:].to_numpy()
cu_fe = X_test[['languages', 'reference']]

In [None]:
y_pred = model.predict(page_fe, G_fe,cu_fe, y_test)