<a href="https://colab.research.google.com/github/omid-sakaki-ghazvini/Practices/blob/main/World__Happiness_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Install Dependencies and Setup

<div style="direction:rtl">
<font color='green' size="5px">
 کتابخانه های مورد نیاز را نصب میکنیم
    </font>
</div>

In [None]:
#Data Analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#preprocessing & Score
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

#Classifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

#Clustering
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

import warnings

warnings.filterwarnings('ignore')

<div style="direction:rtl">
<font color='green' size="5px">
 از لینک زیر دیتاست را دانلود کرده و در پوشه هم مسیر همین ژوپیتر نوت بوک قرار دهید
    </font>
</div>

## https://www.kaggle.com/datasets/abdullah0a/world-happiness-data-2024-explore-life

# 2. Load Data

<div style="direction:rtl">
<font color='green' size="5px">
توسط خط فرمان زیر، دیتا را فراخوانی میکنیم
    </font>
</div>

In [None]:
df = pd.read_csv('/kaggle/input/world-happiness-data-2024-explore-life/World Happiness Report 2024.csv')
df.info()

In [None]:
df.head(10)

In [None]:
df.describe()

In [None]:
df.isnull().sum().sort_values(ascending=True)

In [None]:
df = df.fillna(method='pad')

## 3.Data Analysis

In [None]:
df['Country name'].unique()

In [None]:
df.columns

In [None]:
df['Life Ladder'].describe()

In [None]:
print(df.loc[(df['Life Ladder'] == df['Life Ladder'].min())]['Country name'])
print(df.loc[(df['Life Ladder'] == df['Life Ladder'].max())]['Country name'])

In [None]:
fig, axes = plt.subplots(1, 2, sharex=True, figsize=(20,5))

sns.barplot(data = df.loc[df['Country name']=='Afghanistan'], x = 'year', y = 'Life Ladder', ax = axes[0]);
axes[0].set_title('Afghanistan');

sns.barplot(data = df.loc[df['Country name']=='Denmark'], x = 'year', y = 'Life Ladder', ax = axes[1]);
axes[1].set_title('Denmark');

In [None]:
fig, axes = plt.subplots(1, 2, sharex=True, figsize=(20,5))

sns.barplot(data = df.loc[df['Country name']=='Afghanistan'], x = 'year', y = 'Negative affect', ax = axes[0]);
axes[0].set_title('Afghanistan');

sns.barplot(data = df.loc[df['Country name']=='Denmark'], x = 'year', y = 'Negative affect', ax = axes[1]);
axes[1].set_title('Denmark');

In [None]:
fig, axes = plt.subplots(1, 2, sharex=True, figsize=(20,5))

sns.barplot(data = df.loc[df['Country name']=='Afghanistan'], x = 'year', y = 'Freedom to make life choices', ax = axes[0]);
axes[0].set_title('Afghanistan');

sns.barplot(data = df.loc[df['Country name']=='Denmark'], x = 'year', y = 'Freedom to make life choices', ax = axes[1]);
axes[1].set_title('Denmark');

In [None]:
fig, axes = plt.subplots(1, 2, sharex=True, figsize=(20,5))

sns.barplot(data = df.loc[df['Country name']=='Afghanistan'], x = 'year', y = 'Social support', ax = axes[0]);
axes[0].set_title('Afghanistan');

sns.barplot(data = df.loc[df['Country name']=='Denmark'], x = 'year', y = 'Social support', ax = axes[1]);
axes[1].set_title('Denmark');

In [None]:
fig, axes = plt.subplots(2, 2, sharex=True, figsize=(20,10))

sns.barplot(data = df.loc[df['Country name']=='Afghanistan'], x = 'year', y = 'Positive affect', ax = axes[0,0]);
axes[0,0].set_title('Afghanistan');

sns.barplot(data = df.loc[df['Country name']=='Afghanistan'], x = 'year', y = 'Freedom to make life choices', ax = axes[0,1]);
axes[0,1].set_title('Afghanistan');

sns.barplot(data = df.loc[df['Country name']=='Afghanistan'], x = 'year', y = 'Negative affect', ax = axes[1,0]);
axes[1,0].set_title('Afghanistan');

sns.barplot(data = df.loc[df['Country name']=='Afghanistan'], x = 'year', y = 'Life Ladder', ax = axes[1,1]);
axes[1,1].set_title('Afghanistan');

In [None]:
df['Life Ladder'].describe()

In [None]:
def target_name(index):
    if index < 4.6:
        return 'Very Sad'
    elif 4.6 <= index < 5.4:
        return 'Sad'
    elif 5.4 <= index < 6.3:
        return 'Neutral'
    elif 6.3 <= index < 8.0:
        return 'Happy'
    else:
        return 'Very Happy'

df['target_name'] = df['Life Ladder'].apply(target_name)

In [None]:
fig, axes = plt.subplots(3, 3, sharex=True, figsize=(20,10))

sns.scatterplot(data=df ,x='Log GDP per capita', y='Positive affect', hue='target_name', ax=axes[0,0]);
axes[0,0].set_title('Log GDP per capita');

sns.scatterplot(data=df ,x='Log GDP per capita', y='Negative affect', hue='target_name', ax=axes[0,1]);
axes[0,1].set_title('Log GDP per capita');

sns.scatterplot(data=df ,x='Healthy life expectancy at birth', y='Positive affect', hue='target_name', ax=axes[0,2]);
axes[0,1].set_title('Healthy life expectancy at birth');

sns.scatterplot(data=df ,x='Healthy life expectancy at birth', y='Negative affect', hue='target_name',ax=axes[1,0]);
axes[0,2].set_title('Healthy life expectancy at birth');

sns.scatterplot(data=df ,x='Freedom to make life choices', y='Positive affect', hue='target_name', ax=axes[1,1]);
axes[1,1].set_title('Freedom to make life choices');

sns.scatterplot(data=df ,x='Freedom to make life choices', y='Negative affect', hue='target_name', ax=axes[1,2]);
axes[1,2].set_title('Freedom to make life choices');

sns.scatterplot(data=df ,x='Life Ladder', y='Positive affect', hue='target_name', ax=axes[2,0]);
axes[2,0].set_title('Life Ladder');

sns.scatterplot(data=df ,x='Life Ladder', y='Negative affect', hue='target_name', ax=axes[2,1]);
axes[2,1].set_title('Life Ladder');

## 4.Data preparation

In [None]:
df_clean = df.drop(['year'], axis=1)

le = LabelEncoder()
for column in df_clean.select_dtypes(include=['object']).columns:
    df_clean[column] = le.fit_transform(df_clean[column])

df_clean.head()

In [None]:
X = df_clean.drop('target_name', axis=1)
y = df_clean['target_name']

# 5. Split & Scale Data

<div style="direction:rtl">
<font color='green' size="5px">
 نرمالسازی دیتا
    </font>
</div>

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6. Build Models

In [None]:
models = {
    'LogisticRegression':LogisticRegression(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'XGBoost': XGBClassifier(eval_metric='logloss', random_state=42),
    'SVM': SVC(probability=True, random_state=42),
    'KNN': KNeighborsClassifier(),
    'AdaBoost': AdaBoostClassifier(algorithm='SAMME', random_state=42)
}

results = {}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)

    y_pred = model.predict(X_test_scaled)
    y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]  # Probabilities for churn

    accuracy = accuracy_score(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred, output_dict=True)

    results[name] = {
        'Accuracy': accuracy,
        'Precision': classification_rep['1']['precision'],
        'Recall': classification_rep['1']['recall'],
        'F1': classification_rep['1']['f1-score']
    }

results_df = pd.DataFrame(results).T

results_df

# 7. Clustering

In [None]:
kmeans = KMeans(n_clusters=5, random_state=42)
cluster_labels = kmeans.fit_predict(X_train_scaled)

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_train_scaled)

import plotly.express as px

pca_df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
pca_df['Cluster'] = cluster_labels

fig = px.scatter(pca_df, x='PC1', y='PC2', color=pca_df['Cluster'].astype(str),
                 labels={'Cluster': 'Cluster Group'},
                 template='plotly_white',
                  width=900, height=800)

fig.show()