In [7]:
# Import yang diperlukan
import torch
import pandas as pd
import numpy as np
import networkx as nx
from torch_geometric.data import Data
import logging
import gc

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Fungsi untuk membatasi pemakaian memori
def manage_memory():
    logger.info("Managing memory...")
    gc.collect()
    torch.cuda.empty_cache()

# Membaca dataset
def read_dataset(file_path):
    logger.info(f"Reading dataset from {file_path}...")
    df = pd.read_csv(file_path)
    logger.info(f"Dataset loaded with {df.shape[0]} rows and {df.shape[1]} columns.")
    return df

# Mengonversi dataset menjadi graph
def convert_to_graph(df):
    logger.info("Converting dataset to graph...")
    
    # Anggap kita memiliki kolom 'user_id' dan 'item_id', yang membentuk edges
    user_ids = df['User_ID'].values
    item_ids = df['Product_ID'].values

    # Membuat graph dengan NetworkX
    G = nx.Graph()

    # Menambahkan edges ke dalam graph
    G.add_edges_from(zip(user_ids, item_ids))
    
    logger.info(f"Graph created with {len(G.nodes)} nodes and {len(G.edges)} edges.")
    
    # Mengambil informasi graph untuk digunakan dalam PyTorch Geometric
    edge_index = torch.tensor(list(G.edges), dtype=torch.long).t().contiguous()
    num_nodes = len(G.nodes)
    
    # Buat data PyTorch Geometric
    data = Data(edge_index=edge_index, num_nodes=num_nodes)
    return data

file_path = 'UserBehavior_5M_cleaned.csv'
df = read_dataset(file_path)

# Konversi ke bentuk graph
data = convert_to_graph(df)

# Memeriksa hasil
logger.info(f"Data object created with {data.num_nodes} nodes and {data.edge_index.shape[1]} edges.")

# Mengelola memori setelah konversi
manage_memory()

# Jika Anda ingin memeriksa perangkat keras dan apakah CUDA tersedia
if torch.cuda.is_available():
    logger.info(f"CUDA is available. Running on {torch.cuda.get_device_name(0)}.")
else:
    logger.info("CUDA is not available, running on CPU.")

# Jika Anda menggunakan dataloader atau batch data, pastikan Anda melakukan hal-hal seperti batching dan memori dapat dikelola
logger.info("Process complete!")

INFO:__main__:Reading dataset from UserBehavior_5M_cleaned.csv...
INFO:__main__:Dataset loaded with 4952632 rows and 10 columns.
INFO:__main__:Converting dataset to graph...
INFO:__main__:Graph created with 1137958 nodes and 3763911 edges.
INFO:__main__:Data object created with 1137958 nodes and 3763911 edges.
INFO:__main__:Managing memory...
INFO:__main__:CUDA is available. Running on NVIDIA GeForce RTX 3070.
INFO:__main__:Process complete!
