In [3]:
from neo4j import GraphDatabase
import json
import logging
from typing import Dict, List, Any

# Loglama yapılandırması
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class Neo4jImporter:
    def __init__(self, uri: str, user: str, password: str):
        """Neo4j veritabanına bağlantı kurulumu
        
        Args:
            uri: Neo4j URI'si (örn. 'bolt://localhost:7687')
            user: Kullanıcı adı
            password: Şifre
        """
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        logger.info("Neo4j bağlantısı kuruldu")
        
    def close(self):
        """Veritabanı bağlantısını kapatır"""
        self.driver.close()
        logger.info("Neo4j bağlantısı kapatıldı")
        
    def create_constraints(self):
        """Indeksler ve kısıtlamalar oluşturur"""
        with self.driver.session() as session:
            constraints = [
                "CREATE CONSTRAINT thesis_id IF NOT EXISTS FOR (t:Thesis) REQUIRE t.id IS UNIQUE",
                "CREATE CONSTRAINT stakeholder_name IF NOT EXISTS FOR (s:STAKEHOLDER) REQUIRE s.name IS UNIQUE",
                "CREATE CONSTRAINT problem_challenge_name IF NOT EXISTS FOR (p:PROBLEM_CHALLENGE) REQUIRE p.name IS UNIQUE",
                "CREATE CONSTRAINT solution_approach_name IF NOT EXISTS FOR (s:SOLUTION_APPROACH) REQUIRE s.name IS UNIQUE",
                "CREATE CONSTRAINT focus_area_theme_name IF NOT EXISTS FOR (f:FOCUS_AREA_THEME) REQUIRE f.name IS UNIQUE"
            ]
            
            for constraint in constraints:
                try:
                    session.run(constraint)
                    logger.info(f"Constraint oluşturuldu: {constraint}")
                except Exception as e:
                    logger.error(f"Constraint oluşturma hatası: {e}")
    
    def clear_database(self):
        """Veritabanındaki tüm veri ve ilişkileri temizler"""
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")
            logger.info("Veritabanı temizlendi")
    
    def import_thesis(self, thesis: Dict[str, Any]):
        """Tek bir tez verisini Neo4j'ye aktarır
        
        Args:
            thesis: Tez verisi
        """
        with self.driver.session() as session:
            thesis_id = thesis.get("thesis_id", "")
            year = thesis.get("year", 0)
            
            # Tez düğümü oluştur
            session.run(
                "MERGE (t:Thesis {id: $id}) "
                "SET t.year = $year",
                id=thesis_id, year=year
            )
            
            # Entity'leri ekle ve tez ile ilişkilendir
            entities = thesis.get("entities", {})
            
            # STAKEHOLDER entity'leri
            if "STAKEHOLDER" in entities:
                session.run(
                    """
                    MATCH (t:Thesis {id: $thesis_id})
                    UNWIND $entities as entity
                    MERGE (s:STAKEHOLDER {name: entity})
                    SET s.type = 'STAKEHOLDER', 
                        s.first_occurrence_year = $year, 
                        s.last_occurrence_year = $year
                    MERGE (t)-[:CONTAINS]->(s)
                    """,
                    thesis_id=thesis_id, entities=entities["STAKEHOLDER"], year=year
                )
            
            # PROBLEM_CHALLENGE entity'leri
            if "PROBLEM_CHALLENGE" in entities:
                session.run(
                    """
                    MATCH (t:Thesis {id: $thesis_id})
                    UNWIND $entities as entity
                    MERGE (p:PROBLEM_CHALLENGE {name: entity})
                    SET p.type = 'PROBLEM_CHALLENGE', 
                        p.first_occurrence_year = $year, 
                        p.last_occurrence_year = $year
                    MERGE (t)-[:CONTAINS]->(p)
                    """,
                    thesis_id=thesis_id, entities=entities["PROBLEM_CHALLENGE"], year=year
                )
            
            # SOLUTION_APPROACH entity'leri
            if "SOLUTION_APPROACH" in entities:
                session.run(
                    """
                    MATCH (t:Thesis {id: $thesis_id})
                    UNWIND $entities as entity
                    MERGE (s:SOLUTION_APPROACH {name: entity})
                    SET s.type = 'SOLUTION_APPROACH', 
                        s.first_occurrence_year = $year, 
                        s.last_occurrence_year = $year
                    MERGE (t)-[:CONTAINS]->(s)
                    """,
                    thesis_id=thesis_id, entities=entities["SOLUTION_APPROACH"], year=year
                )
            
            # FOCUS_AREA_THEME entity'leri
            if "FOCUS_AREA_THEME" in entities:
                session.run(
                    """
                    MATCH (t:Thesis {id: $thesis_id})
                    UNWIND $entities as entity
                    MERGE (f:FOCUS_AREA_THEME {name: entity})
                    SET f.type = 'FOCUS_AREA_THEME', 
                        f.first_occurrence_year = $year, 
                        f.last_occurrence_year = $year
                    MERGE (t)-[:CONTAINS]->(f)
                    """,
                    thesis_id=thesis_id, entities=entities["FOCUS_AREA_THEME"], year=year
                )
            
            # Entity'ler arası ilişkileri ekle
            relations = thesis.get("relations", [])
            if relations:
                for relation in relations:
                    source = relation.get("source", "")
                    target = relation.get("target", "")
                    relation_type = relation.get("relation", "")
                    
                    if source and target and relation_type:
                        try:
                            session.run(
                                """
                                MATCH (source) WHERE source.name = $source
                                MATCH (target) WHERE target.name = $target
                                CALL apoc.merge.relationship(source, $relation_type, 
                                    {}, 
                                    {weight: 1, first_occurrence_year: $year, last_occurrence_year: $year, thesis_ids: [$thesis_id]}, 
                                    target, 
                                    {}
                                ) YIELD rel
                                RETURN rel
                                """,
                                source=source, target=target, relation_type=relation_type, 
                                year=year, thesis_id=thesis_id
                            )
                        except Exception as e:
                            # APOC plugin yoksa veya hata varsa, standart Cypher ile dene
                            logger.warning(f"APOC ile ilişki oluşturulamadı, standart Cypher deneniyor: {e}")
                            session.run(
                                f"""
                                MATCH (source) WHERE source.name = $source
                                MATCH (target) WHERE target.name = $target
                                MERGE (source)-[r:{relation_type}]->(target)
                                ON CREATE SET r.weight = 1, 
                                              r.first_occurrence_year = $year, 
                                              r.last_occurrence_year = $year,
                                              r.thesis_ids = [$thesis_id]
                                ON MATCH SET r.weight = r.weight + 1, 
                                            r.last_occurrence_year = CASE WHEN r.last_occurrence_year < $year THEN $year ELSE r.last_occurrence_year END,
                                            r.thesis_ids = CASE WHEN NOT $thesis_id IN r.thesis_ids THEN r.thesis_ids + [$thesis_id] ELSE r.thesis_ids END
                                """,
                                source=source, target=target, year=year, thesis_id=thesis_id
                            )
            
            logger.info(f"Tez {thesis_id} veritabanına aktarıldı")
    
    def update_entity_frequencies(self):
        """Entity'lerin frekans değerlerini günceller"""
        with self.driver.session() as session:
            # Tüm entity tipleri için frekans güncelle
            entity_types = ["STAKEHOLDER", "PROBLEM_CHALLENGE", "SOLUTION_APPROACH", "FOCUS_AREA_THEME"]
            
            for entity_type in entity_types:
                session.run(
                    f"""
                    MATCH (e:{entity_type})<-[:CONTAINS]-(t:Thesis)
                    WITH e, COLLECT(DISTINCT t.id) AS thesis_ids
                    SET e.frequency = SIZE(thesis_ids),
                        e.thesis_ids = thesis_ids
                    """)
                
                logger.info(f"{entity_type} için frekans değerleri güncellendi")
    
    def bulk_import(self, data: List[Dict[str, Any]], batch_size: int = 50):
        """JSON verilerini Neo4j'ye toplu olarak aktarır
        
        Args:
            data: Tez verileri listesi
            batch_size: İşlenecek batch boyutu
        """
        # İlk olarak constraint'leri oluştur
        self.create_constraints()
        
        # Batch'ler halinde işle
        total_batches = (len(data) + batch_size - 1) // batch_size
        for i in range(0, len(data), batch_size):
            batch = data[i:i + batch_size]
            batch_num = i // batch_size + 1
            
            logger.info(f"Batch {batch_num}/{total_batches} işleniyor ({len(batch)} tez)")
            
            for thesis in batch:
                try:
                    self.import_thesis(thesis)
                except Exception as e:
                    logger.error(f"Tez {thesis.get('thesis_id', 'bilinmiyor')} aktarımı sırasında hata: {e}")
            
            logger.info(f"Batch {batch_num}/{total_batches} tamamlandı")
        
        # Entity frekanslarını güncelle
        self.update_entity_frequencies()
        
        logger.info("Veri aktarımı tamamlandı")

def load_json_data(file_path: str) -> List[Dict[str, Any]]:
    """JSON dosyasını yükler
    
    Args:
        file_path: JSON dosyasının yolu
        
    Returns:
        JSON verileri listesi
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
            logger.info(f"{len(data)} tez verisi yüklendi")
            return data
    except Exception as e:
        logger.error(f"JSON dosyası yüklenirken hata oluştu: {e}")
        return []



In [4]:
def main():
    # Neo4j bağlantı bilgileri
    neo4j_uri = "bolt://localhost:7687"
    neo4j_user = "neo4j"
    neo4j_password = "12345678"
    
    # JSON dosya yolu
    json_file_path = "/home/serdar/Documents/structural-analysis-of-distance-education-theses-via-knowledge-graphs/data/thesis_results-final/thesis_results.json"  # JSON dosyanızın yolu
    
    # JSON verisini yükle
    data = load_json_data(json_file_path)
    
    if not data:
        logger.error("Veri yüklenemedi, işlem sonlandırılıyor")
        return
    
    # Neo4j bağlantısı
    importer = Neo4jImporter(neo4j_uri, neo4j_user, neo4j_password)
    
    try:
        # Veritabanını temizle (isteğe bağlı)
        importer.clear_database()
        
        # Verileri aktar
        importer.bulk_import(data)
        
        logger.info("Veri aktarımı tamamlandı")
    except Exception as e:
        logger.error(f"Veri aktarımı sırasında hata oluştu: {e}")
    finally:
        importer.close()

if __name__ == "__main__":
    main()

2025-05-16 10:38:57,320 - INFO - 703 tez verisi yüklendi
2025-05-16 10:38:57,321 - INFO - Neo4j bağlantısı kuruldu


2025-05-16 10:38:57,334 - INFO - Veritabanı temizlendi
2025-05-16 10:38:57,450 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT thesis_id IF NOT EXISTS FOR (t:Thesis) REQUIRE t.id IS UNIQUE
2025-05-16 10:38:57,493 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT stakeholder_name IF NOT EXISTS FOR (s:STAKEHOLDER) REQUIRE s.name IS UNIQUE
2025-05-16 10:38:57,538 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT problem_challenge_name IF NOT EXISTS FOR (p:PROBLEM_CHALLENGE) REQUIRE p.name IS UNIQUE
2025-05-16 10:38:57,575 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT solution_approach_name IF NOT EXISTS FOR (s:SOLUTION_APPROACH) REQUIRE s.name IS UNIQUE
2025-05-16 10:38:57,616 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT focus_area_theme_name IF NOT EXISTS FOR (f:FOCUS_AREA_THEME) REQUIRE f.name IS UNIQUE
2025-05-16 10:38:57,622 - INFO - Batch 1/15 işleniyor (50 tez)
2025-05-16 10:38:58,565 - INFO - Tez 782422 veritabanına aktarıldı
2025-05-16 10:38:58,784 - INFO - Tez 821