<a href="https://colab.research.google.com/github/marijaklisarovska/graphdb-dashboard-app/blob/master/Kreiranje_na_shema_za_bazata_Veb_Baziran_Sistemi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple
import re

In [9]:
class HappinessDataProcessor:
    def __init__(self):
        self.standardized_data = {}
        self.region_mapping = {}
        self.metric_columns = [
            'gdp_per_capita', 'social_support', 'healthy_life_expectancy',
            'freedom_to_make_life_choices', 'generosity', 'perceptions_of_corruption'
        ]

    def load_and_standardize_data(self) -> Dict:

        column_mappings = {
            '2015': {
                'Country': 'country',
                'Region': 'region',
                'Happiness Rank': 'happiness_rank',
                'Happiness Score': 'happiness_score',
                'Economy (GDP per Capita)': 'gdp_per_capita',
                'Family': 'social_support',
                'Health (Life Expectancy)': 'healthy_life_expectancy',
                'Freedom': 'freedom_to_make_life_choices',
                'Trust (Government Corruption)': 'perceptions_of_corruption',
                'Generosity': 'generosity',
                'Dystopia Residual': 'dystopia_residual'
            },
            '2016': {
                'Country': 'country',
                'Region': 'region',
                'Happiness Rank': 'happiness_rank',
                'Happiness Score': 'happiness_score',
                'Economy (GDP per Capita)': 'gdp_per_capita',
                'Family': 'social_support',
                'Health (Life Expectancy)': 'healthy_life_expectancy',
                'Freedom': 'freedom_to_make_life_choices',
                'Trust (Government Corruption)': 'perceptions_of_corruption',
                'Generosity': 'generosity',
                'Dystopia Residual': 'dystopia_residual'
            },
            '2017': {
                'Country': 'country',
                'Happiness.Rank': 'happiness_rank',
                'Happiness.Score': 'happiness_score',
                'Economy..GDP.per.Capita.': 'gdp_per_capita',
                'Family': 'social_support',
                'Health..Life.Expectancy.': 'healthy_life_expectancy',
                'Freedom': 'freedom_to_make_life_choices',
                'Trust..Government.Corruption.': 'perceptions_of_corruption',
                'Generosity': 'generosity',
                'Dystopia.Residual': 'dystopia_residual'
            },
            '2018': {
                'Country or region': 'country',
                'Overall rank': 'happiness_rank',
                'Score': 'happiness_score',
                'GDP per capita': 'gdp_per_capita',
                'Social support': 'social_support',
                'Healthy life expectancy': 'healthy_life_expectancy',
                'Freedom to make life choices': 'freedom_to_make_life_choices',
                'Generosity': 'generosity',
                'Perceptions of corruption': 'perceptions_of_corruption'
            },
            '2019': {
                'Country or region': 'country',
                'Overall rank': 'happiness_rank',
                'Score': 'happiness_score',
                'GDP per capita': 'gdp_per_capita',
                'Social support': 'social_support',
                'Healthy life expectancy': 'healthy_life_expectancy',
                'Freedom to make life choices': 'freedom_to_make_life_choices',
                'Generosity': 'generosity',
                'Perceptions of corruption': 'perceptions_of_corruption'
            }
        }

        years = ['2015', '2016', '2017', '2018', '2019']

        for year in years:
            df = pd.read_csv(f'{year}.csv')
            df.columns = df.columns.str.strip()
            df = df.rename(columns=column_mappings[year])
            df['year'] = int(year)

            df = df.fillna(0)

            df['happiness_tier'] = df['happiness_rank'].apply(self._get_happiness_tier)

            for metric in self.metric_columns:
                if metric in df.columns:
                    df[f'{metric}_level'] = df[metric].apply(self._get_performance_level)

            self.standardized_data[year] = df
            print(f"Loaded {year}: {len(df)} countries")

        return self.standardized_data

    def _get_happiness_tier(self, rank: int) -> str:
        if rank <= 10:
            return "Top_10"
        elif rank <= 25:
            return "Top_25"
        elif rank <= 50:
            return "Top_50"
        elif rank <= 100:
            return "Top_100"
        else:
            return "Bottom_Tier"

    def _get_performance_level(self, value: float) -> str:
        if pd.isna(value) or value == 0:
            return "No_Data"
        elif value >= 1.0:
            return "High"
        elif value >= 0.5:
            return "Medium"
        else:
            return "Low"

    def build_region_mapping(self):
        df_2015 = self.standardized_data['2015']

        for _, row in df_2015.iterrows():
            self.region_mapping[row['country']] = row['region']

        df_2016 = self.standardized_data['2016']
        for _, row in df_2016.iterrows():
            if row['country'] not in self.region_mapping:
                self.region_mapping[row['country']] = row['region']

        print(f"Built region mapping for {len(self.region_mapping)} countries")
        return self.region_mapping

    def calculate_regional_averages(self) -> Dict:
        regional_averages = {}

        for year, df in self.standardized_data.items():
            if 'region' not in df.columns:
                df['region'] = df['country'].map(self.region_mapping)

            regional_averages[year] = {}

            for region in df['region'].dropna().unique():
                region_data = df[df['region'] == region]
                regional_averages[year][region] = {}

                regional_averages[year][region]['happiness_score'] = region_data['happiness_score'].mean()

                for metric in self.metric_columns:
                    if metric in region_data.columns:
                        regional_averages[year][region][metric] = region_data[metric].mean()

        return regional_averages

    def find_similar_countries(self, threshold: float = 0.3) -> List[Tuple]:
        similar_pairs = []

        for year, df in self.standardized_data.items():
            countries = df['country'].tolist()

            for i, country1 in enumerate(countries):
                for country2 in countries[i+1:]:
                    row1 = df[df['country'] == country1].iloc[0]
                    row2 = df[df['country'] == country2].iloc[0]

                    score_diff = abs(row1['happiness_score'] - row2['happiness_score'])

                    if score_diff <= threshold:
                        similar_pairs.append((country1, country2, year, score_diff))

        return similar_pairs

    def find_year_over_year_changes(self) -> List[Dict]:
        changes = []
        years = ['2015', '2016', '2017', '2018', '2019']

        for i in range(len(years) - 1):
            year1, year2 = years[i], years[i + 1]
            df1, df2 = self.standardized_data[year1], self.standardized_data[year2]

            merged = df1[['country', 'happiness_score', 'happiness_rank']].merge(
                df2[['country', 'happiness_score', 'happiness_rank']],
                on='country', suffixes=('_prev', '_curr')
            )

            merged['score_change'] = merged['happiness_score_curr'] - merged['happiness_score_prev']
            merged['rank_change'] = merged['happiness_rank_prev'] - merged['happiness_rank_curr']

            for _, row in merged.iterrows():
                changes.append({
                    'country': row['country'],
                    'from_year': year1,
                    'to_year': year2,
                    'score_change': row['score_change'],
                    'rank_change': row['rank_change'],
                    'change_type': 'improvement' if row['score_change'] > 0.1 else 'decline' if row['score_change'] < -0.1 else 'stable'
                })

        return changes

    def identify_metric_leaders_laggards(self) -> Dict:
        leaders_laggards = {
            'leaders': {},
            'laggards': {}
        }

        for year, df in self.standardized_data.items():
            leaders_laggards['leaders'][year] = {}
            leaders_laggards['laggards'][year] = {}

            for metric in self.metric_columns:
                if metric in df.columns:
                    metric_data = df[df[metric] > 0].copy()

                    if len(metric_data) > 0:
                        top_10_percent = int(len(metric_data) * 0.1)
                        leaders = metric_data.nlargest(max(1, top_10_percent), metric)['country'].tolist()

                        bottom_10_percent = int(len(metric_data) * 0.1)
                        laggards = metric_data.nsmallest(max(1, bottom_10_percent), metric)['country'].tolist()

                        leaders_laggards['leaders'][year][metric] = leaders
                        leaders_laggards['laggards'][year][metric] = laggards

        return leaders_laggards

    def get_comprehensive_stats(self) -> Dict:
        stats = {
            'countries': len(self.get_all_countries()),
            'regions': len(self.get_all_regions()),
            'years': len(self.standardized_data),
            'metrics': len(self.metric_columns),
            'total_data_points': sum(len(df) for df in self.standardized_data.values())
        }

        return stats

    def get_all_countries(self) -> List[str]:
        all_countries = set()
        for year_data in self.standardized_data.values():
            all_countries.update(year_data['country'].tolist())
        return sorted(list(all_countries))

    def get_all_regions(self) -> List[str]:
        return sorted(list(set(self.region_mapping.values())))

In [10]:
processor = HappinessDataProcessor()
standardized_data = processor.load_and_standardize_data()
region_mapping = processor.build_region_mapping()

Loaded 2015: 158 countries
Loaded 2016: 157 countries
Loaded 2017: 155 countries
Loaded 2018: 156 countries
Loaded 2019: 156 countries
Built region mapping for 164 countries


In [11]:
regional_averages = processor.calculate_regional_averages()
similar_countries = processor.find_similar_countries()
year_changes = processor.find_year_over_year_changes()
leaders_laggards = processor.identify_metric_leaders_laggards()

In [12]:
stats = processor.get_comprehensive_stats()

for key, value in stats.items():
    print(f"{key.replace('_', ' ').title()}: {value}")

print(f"\nSimilar country pairs found: {len(similar_countries)}")
print(f"Year-over-year changes tracked: {len(year_changes)}")
print("Sample similar countries:", similar_countries[:3])
print("Sample improvements:", [c for c in year_changes if c['change_type'] == 'improvement'][:3])

Countries: 170
Regions: 10
Years: 5
Metrics: 6
Total Data Points: 782

Similar country pairs found: 8659
Year-over-year changes tracked: 604
Sample similar countries: [('Switzerland', 'Iceland', '2015', np.float64(0.0259999999999998)), ('Switzerland', 'Denmark', '2015', np.float64(0.05999999999999961)), ('Switzerland', 'Norway', '2015', np.float64(0.0649999999999995))]
Sample improvements: [{'country': 'Germany', 'from_year': '2015', 'to_year': '2016', 'score_change': 0.24399999999999977, 'rank_change': 10, 'change_type': 'improvement'}, {'country': 'Malta', 'from_year': '2015', 'to_year': '2016', 'score_change': 0.18600000000000083, 'rank_change': 7, 'change_type': 'improvement'}, {'country': 'Guatemala', 'from_year': '2015', 'to_year': '2016', 'score_change': 0.20099999999999962, 'rank_change': 4, 'change_type': 'improvement'}]


In [15]:
!pip install neo4j
from neo4j import GraphDatabase
import pandas as pd
from typing import Dict, List

Collecting neo4j
  Downloading neo4j-5.28.2-py3-none-any.whl.metadata (5.9 kB)
Downloading neo4j-5.28.2-py3-none-any.whl (313 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/313.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m307.2/313.2 kB[0m [31m14.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.2/313.2 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neo4j
Successfully installed neo4j-5.28.2


In [24]:
class HappinessGraphBuilder:
    def __init__(self, uri: str, username: str, password: str):
        self.driver = GraphDatabase.driver(uri, auth=(username, password))

    def close(self):
        self.driver.close()

    def clear_database(self):
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")
            print("Database cleared")

    def create_constraints_and_indexes(self):
        constraints_queries = [
            "CREATE CONSTRAINT country_name IF NOT EXISTS FOR (c:Country) REQUIRE c.name IS UNIQUE",
            "CREATE CONSTRAINT region_name IF NOT EXISTS FOR (r:Region) REQUIRE r.name IS UNIQUE",
            "CREATE CONSTRAINT year_value IF NOT EXISTS FOR (y:Year) REQUIRE y.year IS UNIQUE",
            "CREATE CONSTRAINT metric_name IF NOT EXISTS FOR (m:MetricCategory) REQUIRE m.name IS UNIQUE",
            "CREATE CONSTRAINT tier_name IF NOT EXISTS FOR (t:HappinessTier) REQUIRE t.name IS UNIQUE",
            "CREATE CONSTRAINT level_name IF NOT EXISTS FOR (l:PerformanceLevel) REQUIRE l.name IS UNIQUE"
        ]

        index_queries = [
            "CREATE INDEX country_happiness_score IF NOT EXISTS FOR (c:Country) ON (c.happiness_score)",
            "CREATE INDEX country_happiness_rank IF NOT EXISTS FOR (c:Country) ON (c.happiness_rank)"
        ]

        with self.driver.session() as session:
            for query in constraints_queries + index_queries:
                try:
                    session.run(query)
                    print(f"✓ Created: {query.split()[1]} {query.split()[2]}")
                except Exception as e:
                    print(f"✗ Failed: {e}")

    def create_base_nodes(self, processor):
        with self.driver.session() as session:
            countries = processor.get_all_countries()
            for country in countries:
                session.run("""
                    MERGE (c:Country {name: $country})
                """, country=country)
            print(f"Created {len(countries)} Country nodes")

            regions = processor.get_all_regions()
            for region in regions:
                session.run("""
                    MERGE (r:Region {name: $region})
                """, region=region)
            print(f"Created {len(regions)} Region nodes")

            years = [2015, 2016, 2017, 2018, 2019]
            for year in years:
                session.run("""
                    MERGE (y:Year {year: $year})
                """, year=year)
            print(f"Created {len(years)} Year nodes")

            metrics = [
                'GDP_per_Capita', 'Social_Support', 'Healthy_Life_Expectancy',
                'Freedom_to_Make_Life_Choices', 'Generosity', 'Perceptions_of_Corruption'
            ]
            for metric in metrics:
                session.run("""
                    MERGE (m:MetricCategory {name: $metric})
                """, metric=metric)
            print(f"Created {len(metrics)} MetricCategory nodes")

            tiers = ['Top_10', 'Top_25', 'Top_50', 'Top_100', 'Bottom_Tier']
            for tier in tiers:
                session.run("""
                    MERGE (t:HappinessTier {name: $tier})
                """, tier=tier)
            print(f"Created {len(tiers)} HappinessTier nodes")

            levels = ['High', 'Medium', 'Low', 'No_Data']
            for level in levels:
                session.run("""
                    MERGE (l:PerformanceLevel {name: $level})
                """, level=level)
            print(f"Created {len(levels)} PerformanceLevel nodes")

    def create_country_region_relationships(self, region_mapping):
        with self.driver.session() as session:
            for country, region in region_mapping.items():
                session.run("""
                    MATCH (c:Country {name: $country})
                    MATCH (r:Region {name: $region})
                    MERGE (c)-[:BELONGS_TO]->(r)
                """, country=country, region=region)
            print(f"Created {len(region_mapping)} BELONGS_TO relationships")

    def create_happiness_data_relationships(self, standardized_data):
        with self.driver.session() as session:
            for year, df in standardized_data.items():
                for _, row in df.iterrows():
                    session.run("""
                        MATCH (c:Country {name: $country})
                        MATCH (y:Year {year: $year})
                        MERGE (c)-[:HAS_HAPPINESS_DATA {
                            year: $year,
                            happiness_score: $score,
                            happiness_rank: $rank,
                            gdp_per_capita: $gdp,
                            social_support: $social,
                            healthy_life_expectancy: $life,
                            freedom_to_make_life_choices: $freedom,
                            generosity: $generosity,
                            perceptions_of_corruption: $corruption
                        }]->(y)
                    """,
                        country=row['country'],
                        year=int(year),
                        score=float(row['happiness_score']),
                        rank=int(row['happiness_rank']),
                        gdp=float(row.get('gdp_per_capita', 0)),
                        social=float(row.get('social_support', 0)),
                        life=float(row.get('healthy_life_expectancy', 0)),
                        freedom=float(row.get('freedom_to_make_life_choices', 0)),
                        generosity=float(row.get('generosity', 0)),
                        corruption=float(row.get('perceptions_of_corruption', 0))
                    )
                print(f"Created {len(df)} HAS_HAPPINESS_DATA relationships for {year}")

    def create_tier_relationships(self, standardized_data):
        with self.driver.session() as session:
            for year, df in standardized_data.items():
                for _, row in df.iterrows():
                    session.run("""
                        MATCH (c:Country {name: $country})
                        MATCH (t:HappinessTier {name: $tier})
                        MERGE (c)-[:BELONGS_TO_TIER {year: $year}]->(t)
                    """,
                        country=row['country'],
                        tier=row['happiness_tier'],
                        year=int(year)
                    )
                print(f"Created tier relationships for {year}")

    def create_metric_performance_relationships(self, leaders_laggards, standardized_data):
        with self.driver.session() as session:
            metric_mapping = {
                'gdp_per_capita': 'GDP_per_Capita',
                'social_support': 'Social_Support',
                'healthy_life_expectancy': 'Healthy_Life_Expectancy',
                'freedom_to_make_life_choices': 'Freedom_to_Make_Life_Choices',
                'generosity': 'Generosity',
                'perceptions_of_corruption': 'Perceptions_of_Corruption'
            }

            for year, metrics in leaders_laggards['leaders'].items():
                for metric, countries in metrics.items():
                    metric_name = metric_mapping[metric]
                    for country in countries:
                        df = standardized_data[year]
                        country_data = df[df['country'] == country]
                        if not country_data.empty:
                            value = float(country_data[metric].iloc[0])
                            session.run("""
                                MATCH (c:Country {name: $country})
                                MATCH (m:MetricCategory {name: $metric})
                                MERGE (c)-[:EXCELS_IN {
                                    year: $year,
                                    value: $value,
                                    percentile: 'top_10'
                                }]->(m)
                            """, country=country, metric=metric_name, year=int(year), value=value)

                print(f"Created EXCELS_IN relationships for {year}")

            for year, metrics in leaders_laggards['laggards'].items():
                for metric, countries in metrics.items():
                    metric_name = metric_mapping[metric]
                    for country in countries:
                        df = standardized_data[year]
                        country_data = df[df['country'] == country]
                        if not country_data.empty:
                            value = float(country_data[metric].iloc[0])
                            session.run("""
                                MATCH (c:Country {name: $country})
                                MATCH (m:MetricCategory {name: $metric})
                                MERGE (c)-[:STRUGGLES_WITH {
                                    year: $year,
                                    value: $value,
                                    percentile: 'bottom_10'
                                }]->(m)
                            """, country=country, metric=metric_name, year=int(year), value=value)

                print(f"Created STRUGGLES_WITH relationships for {year}")

    def create_similarity_relationships(self, similar_countries):
        if not similar_countries:
            print("No similar countries found, skipping similarity relationships")
            return

        limited_pairs = similar_countries[:500]
        print(f"Creating similarity relationships for {len(limited_pairs)} pairs (limited from {len(similar_countries)} for performance)")

        with self.driver.session() as session:
            batch_size = 50
            total_batches = (len(limited_pairs) + batch_size - 1) // batch_size

            for i in range(0, len(limited_pairs), batch_size):
                batch = limited_pairs[i:i + batch_size]
                batch_num = (i // batch_size) + 1

                with session.begin_transaction() as tx:
                    for country1, country2, year, score_diff in batch:
                        tx.run("""
                            MATCH (c1:Country {name: $country1})
                            MATCH (c2:Country {name: $country2})
                            MERGE (c1)-[:SIMILAR_TO {
                                year: $year,
                                score_difference: $score_diff,
                                similarity_threshold: 0.2
                            }]->(c2)
                        """,
                            country1=country1,
                            country2=country2,
                            year=int(year),
                            score_diff=float(score_diff)
                        )

                print(f"Batch {batch_num}/{total_batches} complete ({len(batch)} relationships)")

            print(f"Created {len(limited_pairs)} SIMILAR_TO relationships")

    def create_temporal_change_relationships(self, year_changes):
        with self.driver.session() as session:
            improvements = 0
            declines = 0

            for change in year_changes:
                if change['change_type'] == 'improvement':
                    session.run("""
                        MATCH (c:Country {name: $country})
                        MATCH (y1:Year {year: $from_year})
                        MATCH (y2:Year {year: $to_year})
                        MERGE (c)-[:IMPROVED_FROM {
                            from_year: $from_year,
                            to_year: $to_year,
                            score_change: $score_change,
                            rank_change: $rank_change
                        }]->(y2)
                    """,
                        country=change['country'],
                        from_year=int(change['from_year']),
                        to_year=int(change['to_year']),
                        score_change=float(change['score_change']),
                        rank_change=int(change['rank_change'])
                    )
                    improvements += 1
                elif change['change_type'] == 'decline':
                    session.run("""
                        MATCH (c:Country {name: $country})
                        MATCH (y1:Year {year: $from_year})
                        MATCH (y2:Year {year: $to_year})
                        MERGE (c)-[:DECLINED_FROM {
                            from_year: $from_year,
                            to_year: $to_year,
                            score_change: $score_change,
                            rank_change: $rank_change
                        }]->(y2)
                    """,
                        country=change['country'],
                        from_year=int(change['from_year']),
                        to_year=int(change['to_year']),
                        score_change=float(change['score_change']),
                        rank_change=int(change['rank_change'])
                    )
                    declines += 1

            print(f"Created {improvements} IMPROVED_FROM and {declines} DECLINED_FROM relationships")

    def create_regional_comparison_relationships(self, standardized_data, regional_averages):
        with self.driver.session() as session:
            metric_mapping = {
                'gdp_per_capita': 'GDP_per_Capita',
                'social_support': 'Social_Support',
                'healthy_life_expectancy': 'Healthy_Life_Expectancy',
                'freedom_to_make_life_choices': 'Freedom_to_Make_Life_Choices',
                'generosity': 'Generosity',
                'perceptions_of_corruption': 'Perceptions_of_Corruption'
            }

            for year, df in standardized_data.items():
                if 'region' not in df.columns:
                    print(f"Skipping regional comparisons for {year} - region mapping needed")
                    continue

                above_count = 0
                below_count = 0

                for _, row in df.iterrows():
                    if pd.notna(row.get('region')):
                        region = row['region']
                        country = row['country']

                        for metric, metric_name in metric_mapping.items():
                            if metric in row and region in regional_averages.get(year, {}):
                                country_value = row[metric]
                                regional_avg = regional_averages[year][region].get(metric, 0)

                                if country_value > 0 and regional_avg > 0:
                                    if country_value > regional_avg:
                                        session.run("""
                                            MATCH (c:Country {name: $country})
                                            MATCH (r:Region {name: $region})
                                            MERGE (c)-[:ABOVE_REGIONAL_AVERAGE {
                                                metric: $metric,
                                                year: $year,
                                                country_value: $country_value,
                                                regional_average: $regional_avg,
                                                difference: $diff
                                            }]->(r)
                                        """,
                                            country=country,
                                            region=region,
                                            metric=metric_name,
                                            year=int(year),
                                            country_value=float(country_value),
                                            regional_avg=float(regional_avg),
                                            diff=float(country_value - regional_avg)
                                        )
                                        above_count += 1
                                    else:
                                        session.run("""
                                            MATCH (c:Country {name: $country})
                                            MATCH (r:Region {name: $region})
                                            MERGE (c)-[:BELOW_REGIONAL_AVERAGE {
                                                metric: $metric,
                                                year: $year,
                                                country_value: $country_value,
                                                regional_average: $regional_avg,
                                                difference: $diff
                                            }]->(r)
                                        """,
                                            country=country,
                                            region=region,
                                            metric=metric_name,
                                            year=int(year),
                                            country_value=float(country_value),
                                            regional_avg=float(regional_avg),
                                            diff=float(regional_avg - country_value)
                                        )
                                        below_count += 1

                if above_count > 0 or below_count > 0:
                    print(f"Created {above_count} ABOVE and {below_count} BELOW regional comparison relationships for {year}")

    def build_complete_graph(self, processor, standardized_data, region_mapping,
                           regional_averages, similar_countries, year_changes, leaders_laggards,
                           include_similarity=True, include_regional_comparison=True):
        print("Building Neo4j Graph Database...")

        print("1. Clearing database and creating constraints...")
        self.clear_database()
        self.create_constraints_and_indexes()

        print("2. Creating base nodes...")
        self.create_base_nodes(processor)

        print("3. Creating core relationships...")
        self.create_country_region_relationships(region_mapping)
        self.create_happiness_data_relationships(standardized_data)
        self.create_tier_relationships(standardized_data)

        print("4. Creating metric performance relationships...")
        self.create_metric_performance_relationships(leaders_laggards, standardized_data)

        print("5. Creating temporal change relationships...")
        self.create_temporal_change_relationships(year_changes)

        if include_similarity and similar_countries:
            print("6. Creating similarity relationships...")
            self.create_similarity_relationships(similar_countries)
        else:
            print("6. Skipping similarity relationships (disabled or no data)")

        if include_regional_comparison:
            print("7. Creating regional comparison relationships...")
            self.create_regional_comparison_relationships(standardized_data, regional_averages)
        else:
            print("7. Skipping regional comparison relationships (disabled)")

        print("Graph database build complete")

        with self.driver.session() as session:
            node_count = session.run("MATCH (n) RETURN count(n) as count").single()['count']
            rel_count = session.run("MATCH ()-[r]->() RETURN count(r) as count").single()['count']
            print(f"Final Stats: {node_count} nodes, {rel_count} relationships")

In [22]:
graph_builder = HappinessGraphBuilder(
    uri="neo4j+s://d9b35a07.databases.neo4j.io",                # Neo4j URI
    username="neo4j",                                           # Username
    password="VT0pUGstL96Jmpq_pnCtzF-WKc-EE9b2lEJ5VNrq7og"      # Password
)

In [26]:
graph_builder.build_complete_graph(
    processor=processor,
    standardized_data=standardized_data,
    region_mapping=region_mapping,
    regional_averages=regional_averages,
    similar_countries=similar_countries,
    year_changes=year_changes,
    leaders_laggards=leaders_laggards,
    include_similarity=False,
    include_regional_comparison=True
)

Building Neo4j Graph Database...
1. Clearing database and creating constraints...
Database cleared
✓ Created: CONSTRAINT country_name
✓ Created: CONSTRAINT region_name
✓ Created: CONSTRAINT year_value
✓ Created: CONSTRAINT metric_name
✓ Created: CONSTRAINT tier_name
✓ Created: CONSTRAINT level_name
✓ Created: INDEX country_happiness_score
✓ Created: INDEX country_happiness_rank
2. Creating base nodes...
Created 170 Country nodes
Created 10 Region nodes
Created 5 Year nodes
Created 6 MetricCategory nodes
Created 5 HappinessTier nodes
Created 4 PerformanceLevel nodes
3. Creating core relationships...
Created 164 BELONGS_TO relationships
Created 158 HAS_HAPPINESS_DATA relationships for 2015
Created 157 HAS_HAPPINESS_DATA relationships for 2016
Created 155 HAS_HAPPINESS_DATA relationships for 2017
Created 156 HAS_HAPPINESS_DATA relationships for 2018
Created 156 HAS_HAPPINESS_DATA relationships for 2019
Created tier relationships for 2015
Created tier relationships for 2016
Created tier re