In [12]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time

#Read the data and perform preprocessing
data = pd.read_csv('adult.data', header=None)
data.columns = [
    'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
    'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
    'hours-per-week', 'native-country', 'income'
]

# Create a dictionary to map the values of categorical features to numbers
category_mapping = {}
categorical_cols = data.select_dtypes(include=['object']).columns.tolist()

for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    category_mapping[col] = le

# feature vectors and target variables
X = data.drop('income', axis=1).values
y = data['income'].values

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# create DecisionTreeClassifier
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_train, y_train)

# prediction
y_pred_tree = decision_tree_classifier.predict(X_test)
accuracy_tree = accuracy_score(y_test, y_pred_tree)

print("Decision Tree Accuracy:", accuracy_tree)

# create RandomForestClassifier
random_forest_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest_classifier.fit(X_train, y_train)

# prediction
y_pred_rf = random_forest_classifier.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

print("Random Forest Accuracy:", accuracy_rf)


# model training
decision_tree_classifier.fit(X_train, y_train)
random_forest_classifier.fit(X_train, y_train)

# Evaluate the performance of Decision Tree Classifier 
accuracy_tree = accuracy_score(y_test, y_pred_tree)
precision_tree = precision_score(y_test, y_pred_tree)
recall_tree = recall_score(y_test, y_pred_tree)
f1_tree = f1_score(y_test, y_pred_tree)
print("Decision Tree Classifier:")
print("Accuracy:", accuracy_tree)
print("Precision:", precision_tree)
print("Recall:", recall_tree)
print("F1 Score:", f1_tree)

# Evaluate the performance of Random Tree Classifier 
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)
print("Random Tree Classifier :")
print("Accuracy:", accuracy_rf)
print("Precision:", precision_rf)
print("Recall:", recall_rf)
print("F1 Score:", f1_rf)

# time of training Decision Tree Classifier
start_time = time.time()
decision_tree_classifier.fit(X_train, y_train)
end_time = time.time()
training_time_tree = end_time - start_time
print("Decision Tree Classifier Training Time:",training_time_tree)

# time of training Random Tree Classifier
start_time = time.time()
random_forest_classifier.fit(X_train, y_train)
end_time = time.time()
training_time_rf = end_time - start_time
print("Random Tree Classifier Training Time:",training_time_rf)

Decision Tree Accuracy: 0.8139106402579457
Random Forest Accuracy: 0.8616612927990174
Decision Tree Classifier:
Accuracy: 0.8139106402579457
Precision: 0.6085904416212946
Recall: 0.6403564608529599
F1 Score: 0.6240694789081885
Random Tree Classifier :
Accuracy: 0.8616612927990174
Precision: 0.7477810650887574
Recall: 0.6435391470401018
F1 Score: 0.6917550461854259
Decision Tree Classifier Training Time: 0.6342859268188477
Random Tree Classifier Training Time: 10.909450054168701
