# 1. Decision Tree

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [4]:
path_database = ("C:/Users/melis/OneDrive/IA/IA-Trabalho-pratico/.venv/mental-health-in-tech-v2.csv")
data = pd.read_csv(path_database, index_col = False)
data = pd.DataFrame(data)

In [5]:
# Separando características e variável alvo
X = data.drop('treatment', axis=1)
y = data['treatment']

# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinando o modelo de Decision Tree com validação cruzada
clf = DecisionTreeClassifier(random_state=42)

# Avaliando o modelo com cross-validation
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)

print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# Treinando o modelo com o conjunto de treinamento completo
clf.fit(X_train, y_train)

# Fazendo previsões no conjunto de teste
y_pred = clf.predict(X_test)

# Avaliando o modelo no conjunto de teste
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("\nClassification Report on test set:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix on test set:\n", confusion_matrix(y_test, y_pred))

Cross-validation scores: [0.68       0.75806452 0.72580645 0.70967742 0.61290323]
Mean cross-validation score: 0.6972903225806453
Accuracy on test set: 0.6602564102564102

Classification Report on test set:
               precision    recall  f1-score   support

           0       0.60      0.54      0.57        65
           1       0.69      0.75      0.72        91

    accuracy                           0.66       156
   macro avg       0.65      0.64      0.64       156
weighted avg       0.66      0.66      0.66       156


Confusion Matrix on test set:
 [[35 30]
 [23 68]]


# 2. Random Forest

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [7]:
path_database = ("C:/Users/melis/OneDrive/IA/IA-Trabalho-pratico/.venv/mental-health-in-tech-v2.csv")
data = pd.read_csv(path_database, index_col = False)
data = pd.DataFrame(data)

In [8]:
# Separando características e variável alvo
X = data.drop('treatment', axis=1)
y = data['treatment']

# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinando o modelo de Random Forest com validação cruzada
clf = RandomForestClassifier(random_state=42)

# Avaliando o modelo com cross-validation
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)

print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# Treinando o modelo com o conjunto de treinamento completo
clf.fit(X_train, y_train)

# Fazendo previsões no conjunto de teste
y_pred = clf.predict(X_test)

# Avaliando o modelo no conjunto de teste
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("\nClassification Report on test set:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix on test set:\n", confusion_matrix(y_test, y_pred))

Cross-validation scores: [0.744      0.83064516 0.83870968 0.78225806 0.7016129 ]
Mean cross-validation score: 0.7794451612903226
Accuracy on test set: 0.7564102564102564

Classification Report on test set:
               precision    recall  f1-score   support

           0       0.86      0.49      0.63        65
           1       0.72      0.95      0.82        91

    accuracy                           0.76       156
   macro avg       0.79      0.72      0.72       156
weighted avg       0.78      0.76      0.74       156


Confusion Matrix on test set:
 [[32 33]
 [ 5 86]]


# 3. Naive Bayes

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [10]:
path_database = ("C:/Users/melis/OneDrive/IA/IA-Trabalho-pratico/.venv/mental-health-in-tech-v2.csv")
data = pd.read_csv(path_database, index_col = False)
data = pd.DataFrame(data)

In [11]:
# Separando características e variável alvo
X = data.drop('treatment', axis=1)
y = data['treatment']

# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinando o modelo de Naive Bayes com validação cruzada
clf = GaussianNB()

# Avaliando o modelo com cross-validation
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)

print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# Treinando o modelo com o conjunto de treinamento completo
clf.fit(X_train, y_train)

# Fazendo previsões no conjunto de teste
y_pred = clf.predict(X_test)

# Avaliando o modelo no conjunto de teste
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("\nClassification Report on test set:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix on test set:\n", confusion_matrix(y_test, y_pred))

Cross-validation scores: [0.696      0.82258065 0.83870968 0.71774194 0.7016129 ]
Mean cross-validation score: 0.7553290322580646
Accuracy on test set: 0.7628205128205128

Classification Report on test set:
               precision    recall  f1-score   support

           0       0.73      0.69      0.71        65
           1       0.79      0.81      0.80        91

    accuracy                           0.76       156
   macro avg       0.76      0.75      0.75       156
weighted avg       0.76      0.76      0.76       156


Confusion Matrix on test set:
 [[45 20]
 [17 74]]


# 4. K-Nearest Neighbors

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [13]:
path_database = ("C:/Users/melis/OneDrive/IA/IA-Trabalho-pratico/.venv/mental-health-in-tech-v2.csv")
data = pd.read_csv(path_database, index_col = False)
data = pd.DataFrame(data)

In [14]:
# Separando características e variável alvo
X = data.drop('treatment', axis=1)
y = data['treatment']

# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinando o modelo de KNN com validação cruzada
clf = KNeighborsClassifier(n_neighbors=5)

# Avaliando o modelo com cross-validation
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)

print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# Treinando o modelo com o conjunto de treinamento completo
clf.fit(X_train, y_train)

# Fazendo previsões no conjunto de teste
y_pred = clf.predict(X_test)

# Avaliando o modelo no conjunto de teste
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("\nClassification Report on test set:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix on test set:\n", confusion_matrix(y_test, y_pred))

Cross-validation scores: [0.568      0.58870968 0.63709677 0.57258065 0.62096774]
Mean cross-validation score: 0.5974709677419355
Accuracy on test set: 0.5897435897435898

Classification Report on test set:
               precision    recall  f1-score   support

           0       0.51      0.31      0.38        65
           1       0.62      0.79      0.69        91

    accuracy                           0.59       156
   macro avg       0.56      0.55      0.54       156
weighted avg       0.57      0.59      0.56       156


Confusion Matrix on test set:
 [[20 45]
 [19 72]]


# 5. Support Vector Machine

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [16]:
path_database = ("C:/Users/melis/OneDrive/IA/IA-Trabalho-pratico/.venv/mental-health-in-tech-v2.csv")
data = pd.read_csv(path_database, index_col = False)
data = pd.DataFrame(data)

In [17]:
# Separando características e variável alvo
X = data.drop('treatment', axis=1)
y = data['treatment']

# Padronizando as características
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinando o modelo de SVM com validação cruzada
clf = SVC(kernel='linear', random_state=42)

# Avaliando o modelo com cross-validation
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)

print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# Treinando o modelo com o conjunto de treinamento completo
clf.fit(X_train, y_train)

# Fazendo previsões no conjunto de teste
y_pred = clf.predict(X_test)

# Avaliando o modelo no conjunto de teste
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("\nClassification Report on test set:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix on test set:\n", confusion_matrix(y_test, y_pred))

Cross-validation scores: [0.728      0.79032258 0.82258065 0.72580645 0.73387097]
Mean cross-validation score: 0.760116129032258
Accuracy on test set: 0.7564102564102564

Classification Report on test set:
               precision    recall  f1-score   support

           0       0.80      0.55      0.65        65
           1       0.74      0.90      0.81        91

    accuracy                           0.76       156
   macro avg       0.77      0.73      0.73       156
weighted avg       0.76      0.76      0.75       156


Confusion Matrix on test set:
 [[36 29]
 [ 9 82]]


# 6. Linear Discriminant Analysis

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [19]:
path_database = ("C:/Users/melis/OneDrive/IA/IA-Trabalho-pratico/.venv/mental-health-in-tech-v2.csv")
data = pd.read_csv(path_database, index_col = False)
data = pd.DataFrame(data)

In [20]:
# Separando características e variável alvo
X = data.drop('treatment', axis=1)
y = data['treatment']

# Padronizando as características
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinando o modelo de LDA com validação cruzada
clf = LinearDiscriminantAnalysis()

# Avaliando o modelo com cross-validation
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)

print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# Treinando o modelo com o conjunto de treinamento completo
clf.fit(X_train, y_train)

# Fazendo previsões no conjunto de teste
y_pred = clf.predict(X_test)

# Avaliando o modelo no conjunto de teste
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("\nClassification Report on test set:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix on test set:\n", confusion_matrix(y_test, y_pred))

Cross-validation scores: [0.728      0.78225806 0.81451613 0.74193548 0.74193548]
Mean cross-validation score: 0.7617290322580645
Accuracy on test set: 0.7435897435897436

Classification Report on test set:
               precision    recall  f1-score   support

           0       0.76      0.57      0.65        65
           1       0.74      0.87      0.80        91

    accuracy                           0.74       156
   macro avg       0.75      0.72      0.72       156
weighted avg       0.75      0.74      0.74       156


Confusion Matrix on test set:
 [[37 28]
 [12 79]]


# 7. Neural Network

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [22]:
path_database = ("C:/Users/melis/OneDrive/IA/IA-Trabalho-pratico/.venv/mental-health-in-tech-v2.csv")
data = pd.read_csv(path_database, index_col = False)
data = pd.DataFrame(data)

In [23]:
# Separando características e variável alvo
X = data.drop('treatment', axis=1)
y = data['treatment']

# Padronizando as características
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinando o modelo de Rede Neural com validação cruzada
clf = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)

# Avaliando o modelo com cross-validation
cv_scores = cross_val_score(clf, X_train, y_train, cv=5)

print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# Treinando o modelo com o conjunto de treinamento completo
clf.fit(X_train, y_train)

# Fazendo previsões no conjunto de teste
y_pred = clf.predict(X_test)

# Avaliando o modelo no conjunto de teste
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print("\nClassification Report on test set:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix on test set:\n", confusion_matrix(y_test, y_pred))

Cross-validation scores: [0.672      0.69354839 0.75806452 0.68548387 0.65322581]
Mean cross-validation score: 0.6924645161290324
Accuracy on test set: 0.7115384615384616

Classification Report on test set:
               precision    recall  f1-score   support

           0       0.69      0.57      0.62        65
           1       0.73      0.81      0.77        91

    accuracy                           0.71       156
   macro avg       0.71      0.69      0.69       156
weighted avg       0.71      0.71      0.71       156


Confusion Matrix on test set:
 [[37 28]
 [17 74]]
