In [2]:
from neo4j import GraphDatabase
import json
import logging
from typing import Dict, List, Any
from collections import defaultdict

# Loglama yapılandırması
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class Neo4jImporter:
    def __init__(self, uri: str, user: str, password: str):
        """Neo4j veritabanına bağlantı kurulumu"""
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        logger.info("Neo4j bağlantısı kuruldu")
        
    def close(self):
        """Veritabanı bağlantısını kapatır"""
        self.driver.close()
        logger.info("Neo4j bağlantısı kapatıldı")
        
    def create_constraints(self):
        """Indeksler ve kısıtlamalar oluşturur"""
        with self.driver.session() as session:
            constraints = [
                "CREATE CONSTRAINT thesis_id IF NOT EXISTS FOR (t:Thesis) REQUIRE t.id IS UNIQUE",
                "CREATE CONSTRAINT stakeholder_name IF NOT EXISTS FOR (s:STAKEHOLDER) REQUIRE s.name IS UNIQUE",
                "CREATE CONSTRAINT problem_challenge_name IF NOT EXISTS FOR (p:PROBLEM_CHALLENGE) REQUIRE p.name IS UNIQUE",
                "CREATE CONSTRAINT solution_approach_name IF NOT EXISTS FOR (s:SOLUTION_APPROACH) REQUIRE s.name IS UNIQUE",
                "CREATE CONSTRAINT focus_area_theme_name IF NOT EXISTS FOR (f:FOCUS_AREA_THEME) REQUIRE f.name IS UNIQUE"
            ]
            
            for constraint in constraints:
                try:
                    session.run(constraint)
                    logger.info(f"Constraint oluşturuldu: {constraint}")
                except Exception as e:
                    logger.warning(f"Constraint mevcut veya hata: {e}")
    
    def clear_database(self):
        """Veritabanındaki tüm veri ve ilişkileri temizler"""
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")
            logger.info("Veritabanı temizlendi")
    
    def bulk_import_optimized(self, data: List[Dict[str, Any]]):
        """Optimize edilmiş toplu veri import'u
        
        Strateji:
        1. Önce tüm entity'leri ve frekanslarını hesapla
        2. Entity'leri toplu olarak oluştur
        3. Tezleri oluştur ve CONTAINS ilişkilerini toplu ekle
        4. Relation'ları global olarak birleştir ve toplu ekle
        """
        
        # 1. ENTITY FREKANSLARINI HESAPLA
        entity_data = defaultdict(lambda: {
            'thesis_ids': set(), 
            'years': [],
            'type': None
        })
        
        logger.info("Entity frekansları hesaplanıyor...")
        for thesis in data:
            thesis_id = thesis.get("thesis_id", "")
            year = thesis.get("year", 0)
            entities = thesis.get("entities", {})
            
            for entity_type, entity_list in entities.items():
                for entity_name in entity_list:
                    key = (entity_type, entity_name)
                    entity_data[key]['thesis_ids'].add(thesis_id)
                    entity_data[key]['years'].append(year)
                    entity_data[key]['type'] = entity_type
        
        # 2. ENTITY'LERİ TOPLU OLARAK OLUŞTUR
        logger.info(f"Toplam {len(entity_data)} unique entity oluşturuluyor...")
        with self.driver.session() as session:
            for (entity_type, entity_name), data_dict in entity_data.items():
                thesis_ids = list(data_dict['thesis_ids'])
                years = data_dict['years']
                
                session.run(
                    f"""
                    MERGE (e:{entity_type} {{name: $name}})
                    SET e.type = $type,
                        e.frequency = $frequency,
                        e.first_occurrence_year = $first_year,
                        e.last_occurrence_year = $last_year,
                        e.thesis_ids = $thesis_ids
                    """,
                    name=entity_name,
                    type=entity_type,
                    frequency=len(thesis_ids),
                    first_year=min(years),
                    last_year=max(years),
                    thesis_ids=thesis_ids
                )
        
        logger.info("Entity'ler oluşturuldu")
        
        # 3. TEZLER VE CONTAINS İLİŞKİLERİNİ OLUŞTUR
        logger.info("Tezler ve CONTAINS ilişkileri oluşturuluyor...")
        with self.driver.session() as session:
            for i, thesis in enumerate(data):
                thesis_id = thesis.get("thesis_id", "")
                year = thesis.get("year", 0)
                
                # Tez oluştur
                session.run(
                    "MERGE (t:Thesis {id: $id}) SET t.year = $year",
                    id=thesis_id, year=year
                )
                
                # CONTAINS ilişkilerini toplu ekle
                entities = thesis.get("entities", {})
                for entity_type, entity_list in entities.items():
                    for entity_name in entity_list:
                        session.run(
                            f"""
                            MATCH (t:Thesis {{id: $thesis_id}})
                            MATCH (e:{entity_type} {{name: $entity_name}})
                            MERGE (t)-[r:CONTAINS]->(e)
                            SET r.year = $year
                            """,
                            thesis_id=thesis_id,
                            entity_name=entity_name,
                            year=year
                        )
                
                if (i + 1) % 100 == 0:
                    logger.info(f"İşlenen tez sayısı: {i + 1}/{len(data)}")
        
        logger.info("Tezler ve CONTAINS ilişkileri tamamlandı")
        
        # 4. RELATION'LARI TOPLU OLARAK VE BİRLEŞTİREREK OLUŞTUR
        logger.info("Relation'lar hesaplanıyor ve oluşturuluyor...")
        
        # Tüm relation'ları topla ve birleştir
        relation_data = defaultdict(lambda: {
            'thesis_ids': set(),
            'years': []
        })
        
        for thesis in data:
            thesis_id = thesis.get("thesis_id", "")
            year = thesis.get("year", 0)
            relations = thesis.get("relations", [])
            
            for relation in relations:
                source = relation.get("source", "")
                target = relation.get("target", "")
                relation_type = relation.get("relation", "")
                
                if source and target and relation_type:
                    # Normalize relation type (örn. ADRESSES -> ADDRESSES)
                    if relation_type == "ADRESSES":
                        relation_type = "ADDRESSES"
                    elif relation_type == "CAUSES":
                        continue  # Skip unknown relation types
                    
                    key = (source, target, relation_type)
                    relation_data[key]['thesis_ids'].add(thesis_id)
                    relation_data[key]['years'].append(year)
        
        # Relation'ları oluştur
        with self.driver.session() as session:
            relation_count = 0
            for (source, target, relation_type), rel_data in relation_data.items():
                thesis_ids = list(rel_data['thesis_ids'])
                years = rel_data['years']
                
                session.run(
                    f"""
                    MATCH (source) WHERE source.name = $source
                    MATCH (target) WHERE target.name = $target
                    MERGE (source)-[r:{relation_type}]->(target)
                    SET r.weight = $weight,
                        r.first_occurrence_year = $first_year,
                        r.last_occurrence_year = $last_year,
                        r.thesis_ids = $thesis_ids
                    """,
                    source=source,
                    target=target,
                    weight=len(thesis_ids),
                    first_year=min(years),
                    last_year=max(years),
                    thesis_ids=thesis_ids
                )
                
                relation_count += 1
                if relation_count % 500 == 0:
                    logger.info(f"İşlenen relation sayısı: {relation_count}")
        
        logger.info(f"Toplam {relation_count} unique relation oluşturuldu")
        logger.info("Veri aktarımı tamamlandı")

def load_json_data(file_path: str) -> List[Dict[str, Any]]:
    """JSON dosyasını yükler"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
            logger.info(f"{len(data)} tez verisi yüklendi")
            return data
    except Exception as e:
        logger.error(f"JSON dosyası yüklenirken hata oluştu: {e}")
        return []

def main():
    # Neo4j bağlantı bilgileri
    neo4j_uri = "bolt://localhost:7687"
    neo4j_user = "neo4j"
    neo4j_password = "12345678"
    
    # JSON dosya yolu
    json_file_path = "/home/serdar/Documents/structural-analysis-of-distance-education-theses-via-knowledge-graphs/data/thesis_results-final/thesis_results.json"  # JSON dosyanızın yolu
    
    # JSON verisini yükle
    data = load_json_data(json_file_path)
    
    if not data:
        logger.error("Veri yüklenemedi, işlem sonlandırılıyor")
        return
    
    # Neo4j bağlantısı
    importer = Neo4jImporter(neo4j_uri, neo4j_user, neo4j_password)
    
    try:
        # Veritabanını temizle
        importer.clear_database()
        
        # Constraint'leri oluştur
        importer.create_constraints()
        
        # Verileri aktar (optimize edilmiş metot)
        importer.bulk_import_optimized(data)
        
        logger.info("Veri aktarımı başarıyla tamamlandı")
    except Exception as e:
        logger.error(f"Veri aktarımı sırasında hata oluştu: {e}")
        raise
    finally:
        importer.close()

if __name__ == "__main__":
    main()

2025-05-18 12:39:53,287 - INFO - 703 tez verisi yüklendi
2025-05-18 12:39:53,288 - INFO - Neo4j bağlantısı kuruldu
2025-05-18 12:39:53,420 - INFO - Veritabanı temizlendi
2025-05-18 12:39:53,499 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT thesis_id IF NOT EXISTS FOR (t:Thesis) REQUIRE t.id IS UNIQUE
2025-05-18 12:39:53,539 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT stakeholder_name IF NOT EXISTS FOR (s:STAKEHOLDER) REQUIRE s.name IS UNIQUE
2025-05-18 12:39:53,570 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT problem_challenge_name IF NOT EXISTS FOR (p:PROBLEM_CHALLENGE) REQUIRE p.name IS UNIQUE
2025-05-18 12:39:53,599 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT solution_approach_name IF NOT EXISTS FOR (s:SOLUTION_APPROACH) REQUIRE s.name IS UNIQUE
2025-05-18 12:39:53,630 - INFO - Constraint oluşturuldu: CREATE CONSTRAINT focus_area_theme_name IF NOT EXISTS FOR (f:FOCUS_AREA_THEME) REQUIRE f.name IS UNIQUE
2025-05-18 12:39:53,634 - INFO - Entity frekansları hes