# Küresel Mutluluk ve Ekonomik Refah Analizi

Bu proje, World Happiness Report veri setini kullanarak ülkelerin mutluluk skorları ile ekonomik ve sosyal göstergeleri arasındaki ilişkiyi incelemektedir.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Advanced libraries
try:
    import plotly.express as px
    import plotly.graph_objects as go
    PLOTLY_AVAILABLE = True
except:
    PLOTLY_AVAILABLE = False
    print("Plotly not installed. Web visualizations will not be available.")

try:
    from sklearn.ensemble import IsolationForest
    from sklearn.preprocessing import StandardScaler
    SKLEARN_AVAILABLE = True
except:
    SKLEARN_AVAILABLE = False
    print("Scikit-learn not installed. Isolation Forest will not be available.")

# Visualization settings
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10


## 2. Dataset Download and Loading


In [None]:
import requests
import zipfile
import os

# Import web scraping module
try:
    from data_scraper import scrape_wikipedia_happiness_data, clean_scraped_data
    WEB_SCRAPING_AVAILABLE = True
    print("✓ Web scraping module loaded")
except Exception as e:
    WEB_SCRAPING_AVAILABLE = False
    print(f"Web scraping module could not be loaded: {e}")
    print("Manual data loading will be used.")

# Import anomaly detection module
try:
    from anomaly_detection import comprehensive_anomaly_detection, detect_anomalies_iqr, detect_anomalies_zscore
    ANOMALY_DETECTION_AVAILABLE = True
    print("✓ Anomaly detection module loaded")
except Exception as e:
    ANOMALY_DETECTION_AVAILABLE = False
    print(f"Anomaly detection module could not be loaded: {e}")

print("\nLoading dataset...")
print("Note: This project uses web scraping to fetch messy data.")

# Veri dosyalarının yolları (kullanıcı bu dosyaları indirmeli)
data_files = {
    2021: '2021.csv',
    2022: '2022.csv',
    2023: '2023.csv'
}

# Fetch data via web scraping or load existing data
def load_or_create_data():
    """Fetch dataset via web scraping or load"""
    data_frames = {}
    
    # Try web scraping first
    if WEB_SCRAPING_AVAILABLE:
        print("Fetching data via web scraping...")
        try:
            # Fetch messy data from web
            raw_data = scrape_wikipedia_happiness_data()
            print(f"\nRaw (messy) data fetched: {len(raw_data)} rows")
            
            # Clean the data
            cleaned_data = clean_scraped_data(raw_data)
            cleaned_data['Year'] = 2023  # Default year
            
            # Create data for 3 years (with different seeds)
            for year in [2021, 2022, 2023]:
                df_year = cleaned_data.copy()
                df_year['Year'] = year
                # Add small changes by year
                np.random.seed(42 + year)
                year_change = np.random.normal(0, 0.05, len(df_year))
                if 'Happiness Score' in df_year.columns:
                    df_year['Happiness Score'] = df_year['Happiness Score'] + year_change
                data_frames[year] = df_year
            
            print("✓ Data successfully fetched and cleaned via web scraping")
            return data_frames
        except Exception as e:
            print(f"Web scraping failed: {e}")
            print("Switching to alternative method...")
    
    # Alternative: Load from CSV files
    for year in [2021, 2022, 2023]:
        file_path = f'{year}.csv'
        if os.path.exists(file_path):
            print(f"Loading {year} data from CSV...")
            df = pd.read_csv(file_path)
            df['Year'] = year
            data_frames[year] = df
        else:
            print(f"{file_path} not found. Creating sample data...")
            data_frames[year] = create_sample_data(year)
    
    return data_frames

def create_sample_data(year):
    """Örnek veri seti oluştur (demo amaçlı)"""
    np.random.seed(42 + year)
    countries = ['Finland', 'Denmark', 'Switzerland', 'Iceland', 'Netherlands', 
                'Norway', 'Sweden', 'Luxembourg', 'New Zealand', 'Austria',
                'Australia', 'Israel', 'Germany', 'Canada', 'Ireland',
                'Costa Rica', 'United Kingdom', 'Czech Republic', 'United States', 'Belgium',
                'France', 'Bahrain', 'Malta', 'Taiwan', 'United Arab Emirates',
                'Saudi Arabia', 'Spain', 'Italy', 'Slovenia', 'Guatemala',
                'Singapore', 'Romania', 'Poland', 'Kuwait', 'Serbia',
                'Chile', 'Bahamas', 'Argentina', 'Hungary', 'Trinidad and Tobago',
                'Panama', 'Nicaragua', 'Colombia', 'Estonia', 'Jamaica',
                'Mexico', 'Uruguay', 'France', 'Lithuania', 'Slovakia',
                'Ecuador', 'Japan', 'South Korea', 'Philippines', 'Brazil',
                'Thailand', 'Portugal', 'Latvia', 'Jamaica', 'South Africa',
                'India', 'China', 'Russia', 'Turkey', 'Greece',
                'Bulgaria', 'Morocco', 'Algeria', 'Tunisia', 'Egypt',
                'Bangladesh', 'Pakistan', 'Nigeria', 'Kenya', 'Tanzania',
                'Zimbabwe', 'Rwanda', 'Afghanistan', 'Central African Republic', 'South Sudan']
    
    n = len(countries)
    
    # Mutluluk skorları (gerçekçi aralıkta)
    happiness_scores = np.random.normal(5.5, 1.2, n).clip(2, 8)
    
    # GDP per capita (log normal dağılım)
    gdp = np.exp(np.random.normal(9, 1, n)).clip(500, 120000)
    
    # Sosyal destek
    social_support = np.random.beta(2, 1, n) * 2
    
    # Sağlıklı yaşam beklentisi
    healthy_life = np.random.beta(2, 1, n) * 1.5
    
    # Özgürlük
    freedom = np.random.beta(2, 1, n) * 0.8
    
    # Cömertlik
    generosity = np.random.beta(1, 2, n) * 0.5
    
    # Yolsuzluk algısı (düşük = daha az yolsuzluk)
    corruption = np.random.beta(2, 2, n) * 0.6
    
    # Yıla göre küçük değişiklikler
    year_change = (year - 2021) * 0.05
    happiness_scores += np.random.normal(year_change, 0.1, n)
    
    df = pd.DataFrame({
        'Country name': countries,
        'Happiness Score': happiness_scores,
        'GDP per capita': gdp,
        'Social support': social_support,
        'Healthy life expectancy': healthy_life,
        'Freedom to make life choices': freedom,
        'Generosity': generosity,
        'Perceptions of corruption': corruption,
        'Year': year
    })
    
    # Sıralama ekle
    df = df.sort_values('Happiness Score', ascending=False).reset_index(drop=True)
    df['Rank'] = range(1, len(df) + 1)
    
    return df

# Veriyi yükle
data = load_or_create_data()


## 3. Data Exploration and Cleaning


In [None]:
# Combine all years
df_all = pd.concat([data[year] for year in data.keys()], ignore_index=True)

print("Dataset Information:")
print(f"Total number of rows: {len(df_all)}")
print(f"Total number of columns: {len(df_all.columns)}")
print(f"\nData count by year:")
print(df_all['Year'].value_counts().sort_index())

print("\nFirst 5 rows:")
df_all.head()


In [None]:
# Check missing data
print("Missing Data Check:")
missing_data = df_all.isnull().sum()
missing_percent = (missing_data / len(df_all)) * 100
missing_df = pd.DataFrame({
    'Missing Count': missing_data,
    'Percentage': missing_percent
})
missing_df = missing_df[missing_df['Missing Count'] > 0].sort_values('Missing Count', ascending=False)
print(missing_df)

# Clean missing data (simple method: fill with mean)
numeric_cols = df_all.select_dtypes(include=[np.number]).columns
df_all[numeric_cols] = df_all[numeric_cols].fillna(df_all[numeric_cols].mean())

print("\nMissing data cleaned.")


In [None]:
# Basic statistics
print("Basic Statistics:")
df_all.describe()


## 4. Grouping by Continents


In [None]:
# Add continent information to countries (simplified)
def assign_continent(country):
    """Assign continent to country"""
    europe = ['Finland', 'Denmark', 'Switzerland', 'Iceland', 'Netherlands', 
              'Norway', 'Sweden', 'Luxembourg', 'Austria', 'Germany', 
              'United Kingdom', 'Czech Republic', 'Belgium', 'France', 
              'Spain', 'Italy', 'Slovenia', 'Romania', 'Poland', 'Serbia',
              'Hungary', 'Estonia', 'Lithuania', 'Slovakia', 'Portugal', 
              'Latvia', 'Bulgaria', 'Greece', 'Turkey', 'Russia']
    
    asia = ['Singapore', 'Japan', 'South Korea', 'Philippines', 'Thailand',
            'India', 'China', 'Bangladesh', 'Pakistan', 'Taiwan',
            'Bahrain', 'Saudi Arabia', 'Kuwait', 'United Arab Emirates',
            'Israel', 'Afghanistan']
    
    americas = ['Costa Rica', 'United States', 'Canada', 'Chile', 'Bahamas',
                'Argentina', 'Trinidad and Tobago', 'Panama', 'Nicaragua',
                'Colombia', 'Jamaica', 'Mexico', 'Uruguay', 'Ecuador',
                'Brazil', 'Guatemala']
    
    africa = ['South Africa', 'Morocco', 'Algeria', 'Tunisia', 'Egypt',
              'Nigeria', 'Kenya', 'Tanzania', 'Zimbabwe', 'Rwanda',
              'Central African Republic', 'South Sudan']
    
    oceania = ['New Zealand', 'Australia']
    
    if country in europe:
        return 'Europe'
    elif country in asia:
        return 'Asia'
    elif country in americas:
        return 'Americas'
    elif country in africa:
        return 'Africa'
    elif country in oceania:
        return 'Oceania'
    else:
        return 'Other'

df_all['Continent'] = df_all['Country name'].apply(assign_continent)

# Average happiness scores by continent
continent_stats = df_all.groupby('Continent').agg({
    'Happiness Score': ['mean', 'median', 'std', 'count']
}).round(2)

print("Happiness Statistics by Continent:")
print(continent_stats)


## 5. Visualizations

### 5.1. Scatter Plot: GDP vs Happiness Score


In [None]:
# En son yılın verisini kullan
latest_year = max(df_all['Year'])
df_latest = df_all[df_all['Year'] == latest_year].copy()

# GDP'yi logaritmik ölçekte göster
plt.figure(figsize=(14, 8))

# Kıtalara göre renklendirme
continents = df_latest['Continent'].unique()
colors = sns.color_palette("husl", len(continents))
color_map = dict(zip(continents, colors))

for continent in continents:
    data_continent = df_latest[df_latest['Continent'] == continent]
    plt.scatter(data_continent['GDP per capita'], 
                data_continent['Happiness Score'],
                label=continent, 
                alpha=0.6, 
                s=100,
                c=[color_map[continent]])

# Trend çizgisi
z = np.polyfit(df_latest['GDP per capita'], df_latest['Happiness Score'], 1)
p = np.poly1d(z)
plt.plot(df_latest['GDP per capita'], p(df_latest['GDP per capita']), 
         "r--", alpha=0.8, linewidth=2, label='Trend Line')

plt.xlabel('GDP per Capita (USD)', fontsize=12, fontweight='bold')
plt.ylabel('Happiness Score', fontsize=12, fontweight='bold')
plt.title('GDP per Capita vs Happiness Score (2023)', fontsize=14, fontweight='bold')
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Korelasyon katsayısı
correlation = df_latest['GDP per capita'].corr(df_latest['Happiness Score'])
print(f"\nKorelasyon Katsayısı: {correlation:.3f}")


### 5.2. Bar Chart: Top 10 Happiest and Unhappiest Countries


In [None]:
# En mutlu ve en mutsuz 10 ülke
top_10 = df_latest.nlargest(10, 'Happiness Score')
bottom_10 = df_latest.nsmallest(10, 'Happiness Score')

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))

# En mutlu 10 ülke
bars1 = ax1.barh(range(len(top_10)), top_10['Happiness Score'], 
                 color=sns.color_palette("viridis", len(top_10)))
ax1.set_yticks(range(len(top_10)))
ax1.set_yticklabels(top_10['Country name'], fontsize=10)
ax1.set_xlabel('Happiness Score', fontsize=12, fontweight='bold')
ax1.set_title('En Mutlu 10 Ülke', fontsize=14, fontweight='bold')
ax1.grid(axis='x', alpha=0.3)
ax1.invert_yaxis()

# Değerleri göster
for i, (idx, row) in enumerate(top_10.iterrows()):
    ax1.text(row['Happiness Score'] + 0.05, i, f"{row['Happiness Score']:.2f}", 
             va='center', fontsize=9)

# En mutsuz 10 ülke
bars2 = ax2.barh(range(len(bottom_10)), bottom_10['Happiness Score'], 
                 color=sns.color_palette("plasma", len(bottom_10)))
ax2.set_yticks(range(len(bottom_10)))
ax2.set_yticklabels(bottom_10['Country name'], fontsize=10)
ax2.set_xlabel('Happiness Score', fontsize=12, fontweight='bold')
ax2.set_title('En Mutsuz 10 Ülke', fontsize=14, fontweight='bold')
ax2.grid(axis='x', alpha=0.3)
ax2.invert_yaxis()

# Değerleri göster
for i, (idx, row) in enumerate(bottom_10.iterrows()):
    ax2.text(row['Happiness Score'] + 0.05, i, f"{row['Happiness Score']:.2f}", 
             va='center', fontsize=9)

plt.tight_layout()
plt.show()


### 5.3. Heatmap: Correlation Matrix of Factors


In [None]:
# Korelasyon matrisi
correlation_cols = ['Happiness Score', 'GDP per capita', 'Social support', 
                   'Healthy life expectancy', 'Freedom to make life choices',
                   'Generosity', 'Perceptions of corruption']

corr_matrix = df_latest[correlation_cols].corr()

plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', 
            center=0, square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Mutluluk Faktörleri Korelasyon Matrisi', fontsize=14, fontweight='bold', pad=20)
plt.tight_layout()
plt.show()

# En yüksek korelasyonlar
print("\nMutluluk Skoru ile En Yüksek Korelasyonlar:")
happiness_corr = corr_matrix['Happiness Score'].drop('Happiness Score').sort_values(ascending=False)
for factor, corr in happiness_corr.items():
    print(f"{factor}: {corr:.3f}")


### 5.4. Comparison by Continents


In [None]:
# Kıtalara göre ortalama mutluluk skorları
continent_means = df_latest.groupby('Continent')['Happiness Score'].mean().sort_values(ascending=False)

plt.figure(figsize=(12, 6))
bars = plt.bar(continent_means.index, continent_means.values, 
               color=sns.color_palette("Set2", len(continent_means)))
plt.xlabel('Kıta', fontsize=12, fontweight='bold')
plt.ylabel('Ortalama Mutluluk Skoru', fontsize=12, fontweight='bold')
plt.title('Kıtalara Göre Ortalama Mutluluk Skorları', fontsize=14, fontweight='bold')
plt.xticks(rotation=45)
plt.grid(axis='y', alpha=0.3)

# Değerleri göster
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
             f'{height:.2f}', ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.show()


## 6. Statistical Analyses

### 6.1. Basic Statistics


In [None]:
print("Mutluluk Skoru İstatistikleri:")
print(f"Ortalama: {df_latest['Happiness Score'].mean():.3f}")
print(f"Medyan: {df_latest['Happiness Score'].median():.3f}")
print(f"Standart Sapma: {df_latest['Happiness Score'].std():.3f}")
print(f"Minimum: {df_latest['Happiness Score'].min():.3f}")
print(f"Maksimum: {df_latest['Happiness Score'].max():.3f}")
print(f"Çeyrekler Arası Aralık (IQR): {df_latest['Happiness Score'].quantile(0.75) - df_latest['Happiness Score'].quantile(0.25):.3f}")


### 6.2. Rich vs Poor Countries: Hypothesis Test


In [None]:
# Zengin ve fakir ülkeleri ayır (medyan GDP'ye göre)
median_gdp = df_latest['GDP per capita'].median()
rich_countries = df_latest[df_latest['GDP per capita'] >= median_gdp]['Happiness Score']
poor_countries = df_latest[df_latest['GDP per capita'] < median_gdp]['Happiness Score']

print(f"Medyan GDP: ${median_gdp:,.2f}")
print(f"\nZengin Ülkeler (GDP >= ${median_gdp:,.2f}):")
print(f"  Sayı: {len(rich_countries)}")
print(f"  Ortalama Mutluluk: {rich_countries.mean():.3f}")
print(f"  Standart Sapma: {rich_countries.std():.3f}")

print(f"\nFakir Ülkeler (GDP < ${median_gdp:,.2f}):")
print(f"  Sayı: {len(poor_countries)}")
print(f"  Ortalama Mutluluk: {poor_countries.mean():.3f}")
print(f"  Standart Sapma: {poor_countries.std():.3f}")

# T-test (bağımsız örneklemler)
t_stat, p_value = stats.ttest_ind(rich_countries, poor_countries)

print(f"\nİstatistiksel Test Sonuçları:")
print(f"  T-istatistiği: {t_stat:.3f}")
print(f"  P-değeri: {p_value:.6f}")

alpha = 0.05
if p_value < alpha:
    print(f"\n  Sonuç: P-değeri ({p_value:.6f}) < {alpha} olduğu için,")
    print(f"  H0 reddedilir. Zengin ülkeler fakir ülkelerden istatistiksel olarak daha mutludur.")
else:
    print(f"\n  Sonuç: P-değeri ({p_value:.6f}) >= {alpha} olduğu için,")
    print(f"  H0 reddedilemez. İstatistiksel olarak anlamlı bir fark yoktur.")

# Görselleştirme
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Box plot
data_to_plot = [poor_countries, rich_countries]
ax1.boxplot(data_to_plot, labels=['Fakir Ülkeler', 'Zengin Ülkeler'])
ax1.set_ylabel('Mutluluk Skoru', fontsize=12, fontweight='bold')
ax1.set_title('Zengin vs Fakir Ülkeler: Mutluluk Dağılımı', fontsize=14, fontweight='bold')
ax1.grid(axis='y', alpha=0.3)

# Histogram
ax2.hist(poor_countries, alpha=0.6, label='Fakir Ülkeler', bins=15, color='orange')
ax2.hist(rich_countries, alpha=0.6, label='Zengin Ülkeler', bins=15, color='green')
ax2.axvline(poor_countries.mean(), color='orange', linestyle='--', linewidth=2, label=f'Fakir Ortalama: {poor_countries.mean():.2f}')
ax2.axvline(rich_countries.mean(), color='green', linestyle='--', linewidth=2, label=f'Zengin Ortalama: {rich_countries.mean():.2f}')
ax2.set_xlabel('Mutluluk Skoru', fontsize=12, fontweight='bold')
ax2.set_ylabel('Frekans', fontsize=12, fontweight='bold')
ax2.set_title('Mutluluk Skoru Dağılımları', fontsize=14, fontweight='bold')
ax2.legend()
ax2.grid(alpha=0.3)

plt.tight_layout()
plt.show()


### 6.3. Changes Over Time


In [None]:
# Yıllara göre ortalama mutluluk skorları
yearly_means = df_all.groupby('Year')['Happiness Score'].mean()

plt.figure(figsize=(12, 6))
plt.plot(yearly_means.index, yearly_means.values, marker='o', linewidth=2, markersize=10)
plt.xlabel('Yıl', fontsize=12, fontweight='bold')
plt.ylabel('Ortalama Mutluluk Skoru', fontsize=12, fontweight='bold')
plt.title('Yıllara Göre Ortalama Mutluluk Skorları', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)

# Değerleri göster
for year, score in yearly_means.items():
    plt.text(year, score + 0.02, f'{score:.3f}', ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.show()

# En çok artan/azalan ülkeler
if len(data) >= 2:
    years_sorted = sorted(data.keys())
    first_year = years_sorted[0]
    last_year = years_sorted[-1]
    
    df_first = data[first_year].set_index('Country name')['Happiness Score']
    df_last = data[last_year].set_index('Country name')['Happiness Score']
    
    # Ortak ülkeleri bul
    common_countries = df_first.index.intersection(df_last.index)
    
    changes = df_last[common_countries] - df_first[common_countries]
    changes = changes.sort_values(ascending=False)
    
    print(f"\n{first_year}-{last_year} Arası En Çok Artan 10 Ülke:")
    print(changes.head(10).to_string())
    
    print(f"\n{first_year}-{last_year} Arası En Çok Azalan 10 Ülke:")
    print(changes.tail(10).to_string())


## 7. Insights ve Sonuçlar


## 8. Anomaly Detection (Advanced Data Science Analysis)

This section uses various methods to detect abnormal values in the data.


In [None]:
# Anomaly detection analizi
if ANOMALY_DETECTION_AVAILABLE and 'df_latest' in globals():
    print("Anomaly detection analizi başlatılıyor...\n")
    
    # Kapsamlı anomaly detection
    anomaly_results = comprehensive_anomaly_detection(df_latest)
    
    # Anomalileri görselleştir
    if 'Happiness Score' in df_latest.columns:
        try:
            from anomaly_detection import visualize_anomalies
            fig_anomalies = visualize_anomalies(df_latest, anomaly_results, 'Happiness Score')
            plt.show()
        except Exception as e:
            print(f"Görselleştirme hatası: {e}")
            
    # Anomali tespit edilen ülkeleri göster
    print("\n" + "="*70)
    print("ANOMALİ TESPİT EDİLEN ÜLKELER (Detaylı Analiz)")
    print("="*70)
    
    for method in ['iqr_anomalies', 'zscore_anomalies']:
        if method in anomaly_results:
            print(f"\n{method.upper()} yöntemi:")
            for col, anomalies in anomaly_results[method].items():
                if len(anomalies) > 0 and 'Country name' in anomalies.columns:
                    print(f"  {col}:")
                    countries = anomalies['Country name'].unique()[:5]
                    for country in countries:
                        print(f"    - {country}")
                    if len(anomalies['Country name'].unique()) > 5:
                        print(f"    ... ve {len(anomalies['Country name'].unique()) - 5} tane daha")
else:
    print("Anomaly detection modülü mevcut değil veya veri yüklenmemiş.")


## 9. Advanced Statistical Reporting

Quartiles, box plots and detailed statistical analyses.


In [None]:
# Gelişmiş istatistiksel raporlama
if 'df_latest' in globals():
    print("="*70)
    print("GELİŞMİŞ İSTATİSTİKSEL RAPOR")
    print("="*70)
    
    numeric_cols = ['Happiness Score', 'GDP per capita', 'Social support', 
                   'Healthy life expectancy', 'Freedom to make life choices']
    
    for col in numeric_cols:
        if col in df_latest.columns:
            print(f"\n{col.upper()} - Detaylı İstatistikler:")
            print("-" * 70)
            data = df_latest[col].dropna()
            
            # Temel istatistikler
            print(f"  Ortalama: {data.mean():.3f}")
            print(f"  Medyan: {data.median():.3f}")
            print(f"  Standart Sapma: {data.std():.3f}")
            print(f"  Varyans: {data.var():.3f}")
            
            # Quartiles
            q1 = data.quantile(0.25)
            q2 = data.quantile(0.50)  # Medyan
            q3 = data.quantile(0.75)
            iqr = q3 - q1
            
            print(f"\n  Quartiles:")
            print(f"    Q1 (25%): {q1:.3f}")
            print(f"    Q2 (Medyan, 50%): {q2:.3f}")
            print(f"    Q3 (75%): {q3:.3f}")
            print(f"    IQR: {iqr:.3f}")
            
            # Min/Max
            print(f"\n  Aralık:")
            print(f"    Minimum: {data.min():.3f}")
            print(f"    Maximum: {data.max():.3f}")
            print(f"    Range: {data.max() - data.min():.3f}")
            
            # Skewness ve Kurtosis
            skewness = stats.skew(data)
            kurtosis = stats.kurtosis(data)
            print(f"\n  Dağılım Özellikleri:")
            print(f"    Skewness (Çarpıklık): {skewness:.3f}")
            if abs(skewness) < 0.5:
                print("      → Yaklaşık simetrik dağılım")
            elif skewness > 0:
                print("      → Pozitif çarpıklık (sağa çekilmiş)")
            else:
                print("      → Negatif çarpıklık (sola çekilmiş)")
            
            print(f"    Kurtosis (Basıklık): {kurtosis:.3f}")
            if abs(kurtosis) < 0.5:
                print("      → Normal kurtosis (mesokurtic)")
            elif kurtosis > 0:
                print("      → Yüksek kurtosis (leptokurtic - sivri)")
            else:
                print("      → Düşük kurtosis (platykurtic - yassı)")
            
            # Box plot
            fig, axes = plt.subplots(1, 2, figsize=(14, 5))
            
            # Box plot
            axes[0].boxplot(data, vert=True)
            axes[0].set_ylabel(col)
            axes[0].set_title(f'{col} - Box Plot')
            axes[0].grid(True, alpha=0.3)
            
            # Histogram + normal dağılım
            axes[1].hist(data, bins=20, density=True, alpha=0.7, edgecolor='black', label='Data')
            # Normal dağılım eğrisi ekle
            mu, sigma = data.mean(), data.std()
            x = np.linspace(data.min(), data.max(), 100)
            axes[1].plot(x, stats.norm.pdf(x, mu, sigma), 'r-', linewidth=2, label='Normal Distribution')
            axes[1].set_xlabel(col)
            axes[1].set_ylabel('Density')
            axes[1].set_title(f'{col} - Distribution')
            axes[1].legend()
            axes[1].grid(True, alpha=0.3)
            
            plt.tight_layout()
            plt.show()
    
    print("\n" + "="*70)
    print("Gelişmiş istatistiksel rapor tamamlandı.")
    print("="*70)


In [None]:
print("=" * 80)
print("KÜRESEL MUTLULUK VE EKONOMİK REFAH ANALİZİ - ÖZET RAPOR")
print("=" * 80)

print("\n1. PARA GERÇEKTEN MUTLULUK GETİRİYOR MU? (GDP vs Happiness)")
print("-" * 80)
gdp_corr = df_latest['GDP per capita'].corr(df_latest['Happiness Score'])
print(f"GDP per Capita ile Mutluluk Skoru arasındaki korelasyon: {gdp_corr:.3f}")
if gdp_corr > 0.7:
    print("Sonuç: Güçlü pozitif korelasyon var. Para mutluluğa önemli ölçüde katkı sağlıyor.")
elif gdp_corr > 0.4:
    print("Sonuç: Orta düzeyde pozitif korelasyon var. Para mutluluğa katkı sağlıyor ancak tek faktör değil.")
else:
    print("Sonuç: Zayıf korelasyon. Para mutluluğun tek belirleyicisi değil.")

print("\n2. HANGİ BÖLGE SOSYAL DESTEKTE DAHA ÖNDE?")
print("-" * 80)
social_support_by_continent = df_latest.groupby('Continent')['Social support'].mean().sort_values(ascending=False)
print("Kıtalara göre ortalama sosyal destek skorları:")
for continent, score in social_support_by_continent.items():
    print(f"  {continent}: {score:.3f}")
print(f"\nEn yüksek sosyal destek: {social_support_by_continent.index[0]}")

print("\n3. SON 3 YILDA MUTLULUK ORANI EN ÇOK ARTIŞ/AZALIŞ GÖSTEREN ÜLKELER")
print("-" * 80)
if len(data) >= 2:
    years_sorted = sorted(data.keys())
    first_year = years_sorted[0]
    last_year = years_sorted[-1]
    
    df_first = data[first_year].set_index('Country name')['Happiness Score']
    df_last = data[last_year].set_index('Country name')['Happiness Score']
    
    common_countries = df_first.index.intersection(df_last.index)
    changes = df_last[common_countries] - df_first[common_countries]
    changes = changes.sort_values(ascending=False)
    
    print(f"En çok artan 5 ülke ({first_year}-{last_year}):")
    for i, (country, change) in enumerate(changes.head(5).items(), 1):
        print(f"  {i}. {country}: +{change:.3f}")
    
    print(f"\nEn çok azalan 5 ülke ({first_year}-{last_year}):")
    for i, (country, change) in enumerate(changes.tail(5).items(), 1):
        print(f"  {i}. {country}: {change:.3f}")

print("\n4. MUTLULUĞU EN ÇOK ETKİLEYEN FAKTÖRLER")
print("-" * 80)
correlation_cols = ['GDP per capita', 'Social support', 'Healthy life expectancy', 
                   'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
correlations = {}
for col in correlation_cols:
    correlations[col] = df_latest['Happiness Score'].corr(df_latest[col])

sorted_corr = sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True)
print("Mutluluk skoru ile korelasyon (mutlak değer):")
for factor, corr in sorted_corr:
    print(f"  {factor}: {corr:.3f}")

print("\n5. ZENGİN VS FAKİR ÜLKELER: İSTATİSTİKSEL KARŞILAŞTIRMA")
print("-" * 80)
median_gdp = df_latest['GDP per capita'].median()
rich_mean = df_latest[df_latest['GDP per capita'] >= median_gdp]['Happiness Score'].mean()
poor_mean = df_latest[df_latest['GDP per capita'] < median_gdp]['Happiness Score'].mean()
t_stat, p_value = stats.ttest_ind(
    df_latest[df_latest['GDP per capita'] >= median_gdp]['Happiness Score'],
    df_latest[df_latest['GDP per capita'] < median_gdp]['Happiness Score']
)
print(f"Zengin ülkeler ortalama mutluluk: {rich_mean:.3f}")
print(f"Fakir ülkeler ortalama mutluluk: {poor_mean:.3f}")
print(f"Fark: {rich_mean - poor_mean:.3f}")
print(f"P-değeri: {p_value:.6f}")
if p_value < 0.05:
    print("Sonuç: İstatistiksel olarak anlamlı fark var. Zengin ülkeler daha mutlu.")
else:
    print("Sonuç: İstatistiksel olarak anlamlı fark yok.")

print("\n" + "=" * 80)
print("Rapor tamamlandı.")
print("=" * 80)
