In [1]:
# Очистка предыдущих установок
!pip uninstall -y torch torchvision torchaudio torch-scatter torch-sparse torch-cluster torch-spline-conv pyg-lib torch-geometric transformers

# Совместимые версии PyTorch + CUDA
!pip install torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu118

# Совместимые версии PyG
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
!pip install torch-geometric

# Совместимая версия transformers + sentence-transformers
!pip install transformers==4.30.2
!pip install --force-reinstall sentence-transformers

# Остальные зависимости
!pip install scikit-learn pandas tqdm


Found existing installation: torch 2.7.0
Uninstalling torch-2.7.0:
  Successfully uninstalled torch-2.7.0
Found existing installation: torchvision 0.15.1+cu118
Uninstalling torchvision-0.15.1+cu118:
  Successfully uninstalled torchvision-0.15.1+cu118
Found existing installation: torchaudio 2.0.1+cu118
Uninstalling torchaudio-2.0.1+cu118:
  Successfully uninstalled torchaudio-2.0.1+cu118
Found existing installation: torch-scatter 2.1.2+pt20cu118
Uninstalling torch-scatter-2.1.2+pt20cu118:
  Successfully uninstalled torch-scatter-2.1.2+pt20cu118
Found existing installation: torch-sparse 0.6.18+pt20cu118
Uninstalling torch-sparse-0.6.18+pt20cu118:
  Successfully uninstalled torch-sparse-0.6.18+pt20cu118
Found existing installation: torch-cluster 1.6.3+pt20cu118
Uninstalling torch-cluster-1.6.3+pt20cu118:
  Successfully uninstalled torch-cluster-1.6.3+pt20cu118
Found existing installation: torch-spline-conv 1.2.2+pt20cu118
Uninstalling torch-spline-conv-1.2.2+pt20cu118:
  Successfully unin



In [1]:
import torch
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)


Torch version: 2.7.0+cu126
CUDA available: True
CUDA version: 12.6


In [None]:
# Установка зависимостей PyG
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cu118.html
!pip install torch-geometric


In [3]:
import pandas as pd
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GATConv
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import numpy as np
from torch_geometric.utils import dense_to_sparse

# === Загрузка и подготовка данных ===
df = pd.read_csv('students_interests.csv')  # замените на путь к вашему файлу

texts = df.iloc[:, 0].astype(str).tolist()
labels = df.iloc[:, 1].astype(str).tolist()

# Кодируем метки
le = LabelEncoder()
y = torch.tensor(le.fit_transform(labels), dtype=torch.long)

# Текст в эмбеддинги
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')  # поддерживает русский
X = model.encode(texts, show_progress_bar=True)
X = torch.tensor(X, dtype=torch.float)

# Построение графа по косинусной близости
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(X)
threshold = 0.7  # Порог для соединения узлов
adj_matrix = (similarity > threshold).astype(int)
np.fill_diagonal(adj_matrix, 0)  # без самосвязей

edge_index = dense_to_sparse(torch.tensor(adj_matrix))[0]

data = Data(x=X, edge_index=edge_index, y=y)

# === Деление на train/test ===
train_mask, test_mask = train_test_split(range(len(data.y)), test_size=0.3, stratify=data.y, random_state=42)
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[train_mask] = True
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.test_mask[test_mask] = True

# === GAT модель ===
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=1):
        super().__init__()
        self.gat1 = GATConv(in_channels, hidden_channels, heads=heads)
        self.gat2 = GATConv(hidden_channels * heads, out_channels, heads=1)

    def forward(self, x, edge_index):
        x = self.gat1(x, edge_index)
        x = F.elu(x)
        x = self.gat2(x, edge_index)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GAT(data.num_node_features, 32, len(le.classes_), heads=4).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

# === Обучение ===
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    test_pred = pred[data.test_mask].cpu().numpy()
    test_true = data.y[data.test_mask].cpu().numpy()

    acc = accuracy_score(test_true, test_pred)
    prec = precision_score(test_true, test_pred, average='macro')
    rec = recall_score(test_true, test_pred, average='macro')
    f1 = f1_score(test_true, test_pred, average='macro')

    return acc, prec, rec, f1

for epoch in range(1, 101):
    loss = train()
    if epoch % 10 == 0:
        acc, prec, rec, f1 = test()
        print(f'Epoch {epoch:03d}, Loss: {loss:.4f}, Acc: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}')

# === Финальная оценка ===
acc, prec, rec, f1 = test()
print("\nFinal Metrics:")
print(f"Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall: {rec:.4f}")
print(f"F1 Score: {f1:.4f}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.89k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 010, Loss: 0.0603, Acc: 0.8750, Precision: 0.8821, Recall: 0.8772, F1: 0.8732
Epoch 020, Loss: 0.0046, Acc: 0.9107, Precision: 0.9155, Recall: 0.9129, F1: 0.9091
Epoch 030, Loss: 0.0012, Acc: 0.9107, Precision: 0.9155, Recall: 0.9129, F1: 0.9091
Epoch 040, Loss: 0.0008, Acc: 0.9107, Precision: 0.9155, Recall: 0.9129, F1: 0.9091
Epoch 050, Loss: 0.0008, Acc: 0.9107, Precision: 0.9155, Recall: 0.9129, F1: 0.9091
Epoch 060, Loss: 0.0010, Acc: 0.9107, Precision: 0.9155, Recall: 0.9129, F1: 0.9091
Epoch 070, Loss: 0.0012, Acc: 0.9107, Precision: 0.9155, Recall: 0.9129, F1: 0.9091
Epoch 080, Loss: 0.0016, Acc: 0.9107, Precision: 0.9155, Recall: 0.9129, F1: 0.9091
Epoch 090, Loss: 0.0020, Acc: 0.9107, Precision: 0.9155, Recall: 0.9129, F1: 0.9091
Epoch 100, Loss: 0.0024, Acc: 0.9107, Precision: 0.9155, Recall: 0.9129, F1: 0.9091

Final Metrics:
Accuracy: 0.9107
Precision: 0.9155
Recall: 0.9129
F1 Score: 0.9091
