In [None]:
import pandas as pd

# قراءة كل ملف CSV في DataFrame منفصل
df_2015 = pd.read_csv('2015.csv')
df_2016 = pd.read_csv('2016.csv')
df_2017 = pd.read_csv('2017.csv')
df_2018 = pd.read_csv('2018.csv')
df_2019 = pd.read_csv('2019.csv')

# عرض أول خمس صفوف من أحد الملفات للتأكد
df_2019.head()

print("2015 columns:", df_2015.columns)
print("2019 columns:", df_2019.columns)

# تعديل الأعمدة واختيار المشتركة فقط

# 2015
df_2015 = df_2015.rename(columns={
    'Country': 'Country',
    'Happiness Score': 'Score',
    'Economy (GDP per Capita)': 'GDP',
    'Health (Life Expectancy)': 'Health',
    'Freedom': 'Freedom',
    'Generosity': 'Generosity',
    'Trust (Government Corruption)': 'Trust'
})[['Country', 'Score', 'GDP', 'Health', 'Freedom', 'Generosity', 'Trust']]
df_2015['Year'] = 2015

# 2016
df_2016 = df_2016.rename(columns={
    'Country': 'Country',
    'Happiness Score': 'Score',
    'Economy (GDP per Capita)': 'GDP',
    'Health (Life Expectancy)': 'Health',
    'Freedom': 'Freedom',
    'Generosity': 'Generosity',
    'Trust (Government Corruption)': 'Trust'
})[['Country', 'Score', 'GDP', 'Health', 'Freedom', 'Generosity', 'Trust']]
df_2016['Year'] = 2016

# 2017
df_2017 = df_2017.rename(columns={
    'Country': 'Country',
    'Happiness.Score': 'Score',
    'Economy..GDP.per.Capita.': 'GDP',
    'Health..Life.Expectancy.': 'Health',
    'Freedom': 'Freedom',
    'Generosity': 'Generosity',
    'Trust..Government.Corruption.': 'Trust'
})[['Country', 'Score', 'GDP', 'Health', 'Freedom', 'Generosity', 'Trust']]
df_2017['Year'] = 2017

# 2018
df_2018 = df_2018.rename(columns={
    'Country or region': 'Country',
    'Score': 'Score',
    'GDP per capita': 'GDP',
    'Healthy life expectancy': 'Health',
    'Freedom to make life choices': 'Freedom',
    'Generosity': 'Generosity',
    'Perceptions of corruption': 'Trust'
})[['Country', 'Score', 'GDP', 'Health', 'Freedom', 'Generosity', 'Trust']]
df_2018['Year'] = 2018

# 2019
df_2019 = df_2019.rename(columns={
    'Country or region': 'Country',
    'Score': 'Score',
    'GDP per capita': 'GDP',
    'Healthy life expectancy': 'Health',
    'Freedom to make life choices': 'Freedom',
    'Generosity': 'Generosity',
    'Perceptions of corruption': 'Trust'
})[['Country', 'Score', 'GDP', 'Health', 'Freedom', 'Generosity', 'Trust']]
df_2019['Year'] = 2019

df_all = pd.concat([df_2015, df_2016, df_2017, df_2018, df_2019], ignore_index=True)
df_all.head()

In [None]:
df_all = df_all.dropna()

# تحديد المتغيرات المستقلة والتابعة
X = df_all[['GDP', 'Health', 'Freedom', 'Generosity', 'Trust']]
y = df_all['Score']

from sklearn.model_selection import train_test_split

# 80% تدريب و 20% اختبار
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

model_lr = LinearRegression()
model_lr.fit(X_train, y_train)
y_pred_lr = model_lr.predict(X_test)

print("Linear Regression:")
print("MSE:", mean_squared_error(y_test, y_pred_lr))
print("R2 Score:", r2_score(y_test, y_pred_lr))

from sklearn.tree import DecisionTreeRegressor

model_dt = DecisionTreeRegressor(random_state=42)
model_dt.fit(X_train, y_train)
y_pred_dt = model_dt.predict(X_test)

print("\nDecision Tree:")
print("MSE:", mean_squared_error(y_test, y_pred_dt))
print("R2 Score:", r2_score(y_test, y_pred_dt))

from sklearn.ensemble import RandomForestRegressor

model_rf = RandomForestRegressor(random_state=42)
model_rf.fit(X_train, y_train)
y_pred_rf = model_rf.predict(X_test)

print("\nRandom Forest:")
print("MSE:", mean_squared_error(y_test, y_pred_rf))
print("R2 Score:", r2_score(y_test, y_pred_rf))

from sklearn.neighbors import KNeighborsRegressor

model_knn = KNeighborsRegressor(n_neighbors=5)
model_knn.fit(X_train, y_train)
y_pred_knn = model_knn.predict(X_test)

print("\nKNN:")
print("MSE:", mean_squared_error(y_test, y_pred_knn))
print("R2 Score:", r2_score(y_test, y_pred_knn))

In [None]:
# إنشاء عمود جديد للفئات التصنيفية
def label_happiness(score):
    if score < 5.5:
        return 'Low'
    elif score < 6.5:
        return 'Medium'
    else:
        return 'High'

df_all['Happiness_Level'] = df_all['Score'].apply(label_happiness)

# المتغيرات X نفسها، لكن y بتكون التصنيف الجديد
X = df_all[['GDP', 'Health', 'Freedom', 'Generosity', 'Trust']]
y = df_all['Happiness_Level']

# تقسيم التدريب والاختبار
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

model_nb = GaussianNB()
model_nb.fit(X_train, y_train)
y_pred_nb = model_nb.predict(X_test)

print("Naive Bayes:")
print("Accuracy:", accuracy_score(y_test, y_pred_nb))
print("\nClassification Report:\n", classification_report(y_test, y_pred_nb))

from sklearn.svm import SVC

model_svm = SVC()
model_svm.fit(X_train, y_train)
y_pred_svm = model_svm.predict(X_test)

print("SVM:")
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("\nClassification Report:\n", classification_report(y_test, y_pred_svm))

from sklearn.neural_network import MLPClassifier

model_ann = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=1000, random_state=42)
model_ann.fit(X_train, y_train)
y_pred_ann = model_ann.predict(X_test)

print("ANN (Neural Network):")
print("Accuracy:", accuracy_score(y_test, y_pred_ann))
print("\nClassification Report:\n", classification_report(y_test, y_pred_ann))

In [None]:
import matplotlib.pyplot as plt

plt.scatter(y_test, y_pred_lr)
plt.xlabel("Actual Scores")
plt.ylabel("Predicted Scores")
plt.title("Linear Regression: Actual vs Predicted")
plt.grid(True)
plt.show()

importances = model_rf.feature_importances_
features = X.columns

plt.figure(figsize=(8,4))
plt.bar(features, importances)
plt.title("Feature Importance - Random Forest")
plt.xlabel("Features")
plt.ylabel("Importance Score")
plt.grid(True)
plt.show()

from sklearn.tree import plot_tree

plt.figure(figsize=(14, 6))
plot_tree(model_dt, filled=True, feature_names=X.columns, max_depth=2)
plt.title("Decision Tree (depth=2)")
plt.show()

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

cm = confusion_matrix(y_test, y_pred_svm, labels=model_svm.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model_svm.classes_)
disp.plot(cmap='Blues')
plt.title("Confusion Matrix - SVM")
plt.grid(False)
plt.show()

In [None]:
import pandas as pd

# التأكد من التحويل إلى DataFrame
X_train_df = pd.DataFrame(X_train, columns=X.columns)
X_test_df = pd.DataFrame(X_test, columns=X.columns)
y_train_df = pd.DataFrame(y_train, columns=["Score"])
y_test_df = pd.DataFrame(y_test, columns=["Score"])

# حفظ الملفات في مجلد preprocessed data
X_train_df.to_csv('data/preprocessed data/X.csv', index=False)
y_train_df.to_csv('data/preprocessed data/Y.csv', index=False)
X_test_df.to_csv('data/preprocessed data/X_test.csv', index=False)
y_test_df.to_csv('data/preprocessed data/Y_test.csv', index=False)

import os

# إنشاء المسارات المطلوبة إذا ما كانت موجودة
os.makedirs("data/original data", exist_ok=True)
os.makedirs("data/preprocessed data", exist_ok=True)
os.makedirs("data/Results", exist_ok=True)

# حفظ توقعات النماذج داخل مجلد Results
pd.DataFrame(y_pred_lr, columns=["Prediction"]).to_csv('data/Results/predictions_LR_model.csv', index=False)
pd.DataFrame(y_pred_dt, columns=["Prediction"]).to_csv('data/Results/predictions_DT_model.csv', index=False)
pd.DataFrame(y_pred_rf, columns=["Prediction"]).to_csv('data/Results/predictions_RF_model.csv', index=False)
pd.DataFrame(y_pred_knn, columns=["Prediction"]).to_csv('data/Results/predictions_KNN_model.csv', index=False)
pd.DataFrame(y_pred_nb, columns=["Prediction"]).to_csv('data/Results/predictions_NB_model.csv', index=False)
pd.DataFrame(y_pred_svm, columns=["Prediction"]).to_csv('data/Results/predictions_SVM_model.csv', index=False)
pd.DataFrame(y_pred_ann, columns=["Prediction"]).to_csv('data/Results/predictions_ANN_model.csv', index=False)