In [1]:
# import necessary libraries
import pandas as pd
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score


# load the dataset
data = pd.read_csv('/kaggle/input/acme-happiness/ACME-HappinessSurvey2020.csv')

# split the dataset into training and testing sets
X = data.drop('Y', axis=1)
y = data['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# select the top 3 features using SelectKBest
selector = SelectKBest(score_func=chi2, k=3)
X_train_new = selector.fit_transform(X_train, y_train)
X_test_new = selector.transform(X_test)

# create a logistic regression model
model = LogisticRegression()

# train the model on the training data using the top 3 features
model.fit(X_train_new, y_train)

# predict the happiness level for the testing data using the top 3 features
y_pred = model.predict(X_test_new)

# evaluate the accuracy of the model using the top 3 features
accuracy = model.score(X_test_new, y_test)
print('Accuracy:', accuracy)

# evaluate the f1 score of the model using the top 3 features
f1 = f1_score(y_test, y_pred)
print('F1 Score:', f1)

#AUC
ROC_AUC = roc_auc_score(y_test, y_pred)

print('ROC AUC : {:.4f}'.format(ROC_AUC))

# print the top 3 features
feature_scores = pd.DataFrame({'Feature': X_train.columns, 'Score': selector.scores_})
top_features = feature_scores.nlargest(3, 'Score')
print('Top 3 Features:', top_features['Feature'].tolist())

Accuracy: 0.46153846153846156
F1 Score: 0.5
ROC AUC : 0.4848
Top 3 Features: ['X1', 'X5', 'X2']


In [9]:
# import necessary libraries
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score


# load the dataset
data = pd.read_csv('/kaggle/input/acme-happiness/ACME-HappinessSurvey2020.csv')

# split the dataset into training and testing sets
X = data.drop('Y', axis=1)
y = data['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# create a logistic regression model
model = LogisticRegression()

# train the model on the training data
model.fit(X_train, y_train)

# predict the happiness level for the testing data
y_pred = model.predict(X_test)

# evaluate the f1 score of the model using the top 3 features
f1 = f1_score(y_test, y_pred)
print('F1 Score:', f1)

#AUC
ROC_AUC = roc_auc_score(y_test, y_pred)

print('ROC AUC : {:.4f}'.format(ROC_AUC))

# evaluate the accuracy of the model
accuracy = model.score(X_test, y_test)
print('Accuracy:', accuracy)

F1 Score: 0.5
ROC AUC : 0.4848
Accuracy: 0.46153846153846156


In [11]:
# import necessary libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score


# load the dataset
data = pd.read_csv('/kaggle/input/acme-happiness/ACME-HappinessSurvey2020.csv')

# split the dataset into training and testing sets
X = data.drop('Y', axis=1)
y = data['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# create a decision tree classifier model
model = DecisionTreeClassifier()

# train the model on the training data using the best 3 features
model.fit(X_train, y_train)

# predict the happiness level for the testing data using the best 3 features
y_pred = model.predict(X_test)

# evaluate the f1 score of the model using the top 3 features
f1 = f1_score(y_test, y_pred)
print('F1 Score:', f1)

#AUC
ROC_AUC = roc_auc_score(y_test, y_pred)

print('ROC AUC : {:.4f}'.format(ROC_AUC))

# evaluate the accuracy of the model using the best 3 features
accuracy = model.score(X_test, y_test)
print('Accuracy:', accuracy)


F1 Score: 0.64
ROC AUC : 0.6636
Accuracy: 0.6538461538461539


In [8]:
# import necessary libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score


# load the dataset
data = pd.read_csv('/kaggle/input/acme-happiness/ACME-HappinessSurvey2020.csv')

# split the dataset into training and testing sets
X = data.drop('Y', axis=1)
y = data['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# select the top 3 features using SelectKBest
selector = SelectKBest(score_func=chi2, k=3)
X_train_new = selector.fit_transform(X_train, y_train)
X_test_new = selector.transform(X_test)

# create a decision tree classifier model
model = DecisionTreeClassifier()

# train the model on the training data using the top 3 features
model.fit(X_train_new, y_train)

# predict the happiness level for the testing data using the top 3 features
y_pred = model.predict(X_test_new)

# evaluate the accuracy of the model using the top 3 features
accuracy = model.score(X_test_new, y_test)
print('Accuracy:', accuracy)

# evaluate the f1 score of the model using the top 3 features
f1 = f1_score(y_test, y_pred)
print('F1 Score:', f1)

#AUC
ROC_AUC = roc_auc_score(y_test, y_pred)

print('ROC AUC : {:.4f}'.format(ROC_AUC))


# print the top 3 features
feature_scores = pd.DataFrame({'Feature': X_train.columns, 'Score': selector.scores_})
top_features = feature_scores.nlargest(3, 'Score')
print('Top 3 Features:', top_features['Feature'].tolist())

Accuracy: 0.7307692307692307
F1 Score: 0.6956521739130435
ROC AUC : 0.7303
Top 3 Features: ['X1', 'X5', 'X2']


In [7]:
# import necessary libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import roc_auc_score

# load the dataset
data = pd.read_csv('/kaggle/input/acme-happiness/ACME-HappinessSurvey2020.csv')

# split the dataset into training and testing sets
X = data.drop('Y', axis=1)
y = data['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# select the top 3 features using SelectKBest
selector = SelectKBest(score_func=chi2, k=3)
X_train_new = selector.fit_transform(X_train, y_train)
X_test_new = selector.transform(X_test)

# create a decision tree classifier model using Gini index
model_gini = DecisionTreeClassifier(criterion='gini')

# train the model on the training data using the top 3 features
model_gini.fit(X_train_new, y_train)

# predict the happiness level for the testing data using the top 3 features
y_pred_gini = model_gini.predict(X_test_new)

# evaluate the accuracy of the model using Gini index and the top 3 features
accuracy_gini = model_gini.score(X_test_new, y_test)
print('Accuracy (Gini index):', accuracy_gini)

# create a decision tree classifier model using entropy
model_entropy = DecisionTreeClassifier(criterion='entropy')

# train the model on the training data using the top 3 features
model_entropy.fit(X_train_new, y_train)

# predict the happiness level for the testing data using the top 3 features
y_pred_entropy = model_entropy.predict(X_test_new)

# evaluate the accuracy of the model using entropy and the top 3 features
accuracy_entropy = model_entropy.score(X_test_new, y_test)
print('Accuracy (Entropy):', accuracy_entropy)

#AUC
ROC_AUC = roc_auc_score(y_test, y_pred)

print('ROC AUC : {:.4f}'.format(ROC_AUC))


# print the top 3 features
feature_scores = pd.DataFrame({'Feature': X_train.columns, 'Score': selector.scores_})
top_features = feature_scores.nlargest(3, 'Score')
print('Top 3 Features:', top_features['Feature'].tolist())

Accuracy (Gini index): 0.7307692307692307
Accuracy (Entropy): 0.7307692307692307
ROC AUC : 0.6030
Top 3 Features: ['X1', 'X5', 'X2']


In [6]:
# import necessary libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.metrics import roc_auc_score

# load the dataset
data = pd.read_csv('/kaggle/input/acme-happiness/ACME-HappinessSurvey2020.csv')

# split the dataset into training and testing sets
X = data.drop('Y', axis=1)
y = data['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# create a decision tree classifier model using Gini index
model_gini = DecisionTreeClassifier(criterion='gini')

# train the model on the training data using the top 3 features
model_gini.fit(X_train, y_train)

# predict the happiness level for the testing data using the top 3 features
y_pred_gini = model_gini.predict(X_test)

# evaluate the accuracy of the model using Gini index and the top 3 features
accuracy_gini = model_gini.score(X_test, y_test)
print('Accuracy (Gini index):', accuracy_gini)

# create a decision tree classifier model using entropy
model_entropy = DecisionTreeClassifier(criterion='entropy')

# train the model on the training data using the top 3 features
model_entropy.fit(X_train, y_train)

# predict the happiness level for the testing data using the top 3 features
y_pred_entropy = model_entropy.predict(X_test)

# evaluate the accuracy of the model using entropy and the top 3 features
accuracy_entropy = model_entropy.score(X_test, y_test)
print('Accuracy (Entropy):', accuracy_entropy)

#AUC
ROC_AUC = roc_auc_score(y_test, y_pred)

print('ROC AUC : {:.4f}'.format(ROC_AUC))


Accuracy (Gini index): 0.6538461538461539
Accuracy (Entropy): 0.6538461538461539
ROC AUC : 0.6030


In [5]:
# import necessary libraries

import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.metrics import roc_auc_score

# load the dataset
data = pd.read_csv('/kaggle/input/acme-happiness/ACME-HappinessSurvey2020.csv')

# split the dataset into training and testing sets
X = data.drop('Y', axis=1)
y = data['Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# select the top 3 features using SelectKBest
selector = SelectKBest(score_func=f_regression, k=3)
X_train_new = selector.fit_transform(X_train, y_train)
X_test_new = selector.transform(X_test)

# create a decision tree regressor model
model = DecisionTreeRegressor()

# train the model on the training data using the top 3 features
model.fit(X_train_new, y_train)

# predict the happiness level for the testing data using the top 3 features
y_pred = model.predict(X_test_new)


#AUC
ROC_AUC = roc_auc_score(y_test, y_pred)

print('ROC AUC : {:.4f}'.format(ROC_AUC))

# print the top 3 features
feature_scores = pd.DataFrame({'Feature': X_train.columns, 'Score': selector.scores_})
top_features = feature_scores.nlargest(3, 'Score')
print('Top 3 Features:', top_features['Feature'].tolist())

ROC AUC : 0.6030
Top 3 Features: ['X1', 'X6', 'X5']
