In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

# Creating a hypothetical dataset
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Ethan'],
    'Age': [25, 30, 35, 40, 45],
    'Location': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'Salary': [70000, 80000, 90000, 50000, 60000],
    'Marital Status': ['Not Married', 'Married', 'Not Married', 'Married', 'Not Married']
}

df = pd.DataFrame(data)

# Encoding categorical data to numerical
label_encoder = LabelEncoder()
df['Location'] = label_encoder.fit_transform(df['Location'])
df['Marital Status'] = label_encoder.fit_transform(df['Marital Status'])

# Splitting dataset into features (X) and target variable (y)
X = df[['Age', 'Location', 'Salary']]
y = df['Marital Status']

# Training the Decision Tree Classifier
classifier = DecisionTreeClassifier()
classifier.fit(X, y)

df  # Display the transformed dataset

Unnamed: 0,Name,Age,Location,Salary,Marital Status
0,Alice,25,3,70000,1
1,Bob,30,2,80000,0
2,Charlie,35,0,90000,1
3,Diana,40,1,50000,0
4,Ethan,45,4,60000,1


In [3]:
# Using predict_proba to get the probability of each class for each individual
probabilities = classifier.predict_proba(X)

# Adding probabilities to the original dataframe for better visualization
df['Prob. Married'] = probabilities[:, 0]  # Probability of being Married
df['Prob. Not Married'] = probabilities[:, 1]  # Probability of being Not Married

df[['Name', 'Age', 'Location', 'Salary', 'Prob. Married', 'Prob. Not Married']]  # Display relevant columns

Unnamed: 0,Name,Age,Location,Salary,Prob. Married,Prob. Not Married
0,Alice,25,3,70000,0.0,1.0
1,Bob,30,2,80000,1.0,0.0
2,Charlie,35,0,90000,0.0,1.0
3,Diana,40,1,50000,1.0,0.0
4,Ethan,45,4,60000,0.0,1.0
