In [2]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import export_text


#  Define the Toy Dataset

In [11]:
# Example dataset of cat characteristics
data = {
    'fur_length': ['short', 'long', 'medium', 'short', 'long', 'short', 'hairless', 'medium'],
    'weight': [4.2, 5.5, 4.8, 4.0, 5.8, 3.9, 4.5, 4.7],
    'color': ['black', 'white', 'gray', 'brown', 'gray', 'black', 'white', 'brown'],
    'ear_shape': ['pointed', 'rounded', 'pointed', 'rounded', 'pointed', 'rounded', 'pointed', 'pointed'],
    'cat_type': ['short-haired', 'long-haired', 'medium-haired', 'short-haired', 'long-haired', 'short-haired', 'hairless', 'medium-haired']
}

# Convert to a DataFrame
df = pd.DataFrame(data)


# Encode Categorical Features
Since decision trees in scikit-learn require numerical input,
we need to convert categorical features into numerical values using LabelEncoder.

In [20]:
# Initialize label encoders
le_fur = LabelEncoder()
le_color = LabelEncoder()
le_ear = LabelEncoder()
le_cat_type = LabelEncoder()

# Apply label encoding to categorical features
df['fur_length'] = le_fur.fit_transform(df['fur_length'])
df['color'] = le_color.fit_transform(df['color'])
df['ear_shape'] = le_ear.fit_transform(df['ear_shape'])
df['cat_type'] = le_cat_type.fit_transform(df['cat_type'])

# Features (X) and target (y)
X = df.drop('cat_type', axis=1)
y = df['cat_type']


# Split the Data into Training and Test Sets

In [21]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [23]:
X_train

Unnamed: 0,fur_length,weight,color,ear_shape
0,3,4.2,0,0
7,2,4.7,1,0
2,2,4.8,2,0
4,1,5.8,2,0
3,3,4.0,1,1
6,0,4.5,3,0


In [24]:
y_train

0    3
7    2
2    2
4    1
3    3
6    0
Name: cat_type, dtype: int64

# Train the Decision Tree Classifier

In [14]:
# Create a DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=42)

# Train the model
clf.fit(X_train, y_train)


# Make Predictions and Evaluate

In [15]:
# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 50.00%


# Visualize the Decision Tree

In [8]:
# Visualize the decision tree
tree_rules = export_text(clf, feature_names=list(X.columns))
print(tree_rules)


|--- fur_length <= 2.50
|   |--- fur_length <= 1.50
|   |   |--- color <= 2.50
|   |   |   |--- class: 1
|   |   |--- color >  2.50
|   |   |   |--- class: 0
|   |--- fur_length >  1.50
|   |   |--- class: 2
|--- fur_length >  2.50
|   |--- class: 3



# Factorial function

In [14]:
def fact(n):
    if n==0:
        return 1
    else:
        return n* fact(n-1)
abs=fact(4)
abs 

24

# Fibonachi Series

In [17]:
def fib(n):
    if n == 0: 
        return 0
    elif n == 1:    
        return 1
    else:
        return fib(n-1) + fib(n-2)
abc =fib(8)
abc

21

 # Code for Calculating Entropy 

In [62]:
def entropy(n):
    classes, counts=np.unique(y, return_counts=True)
    prob= counts/len(y)
    entropy_value=-np.sum(prob * np.log2(prob))
    return entropy_value

In [65]:
if __name__ == "__main__":
    y_binary=np.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0])
    print("Entropy for binary classification:", entropy(y_binary))
    y_multi = np.array([0, 1, 2, 1, 0, 2, 1, 0, 2, 2])  
    print("Entropy for multi-class classification:", entropy(y_multi))

Entropy for binary classification: 1.0
Entropy for multi-class classification: 1.0


# Code for Calculating  Information Gain

In [75]:
# Function to calculate entropy
def entropy(y):
    classes, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    return -np.sum(probabilities * np.log2(probabilities))

# Function to calculate information gain
def information_gain(X_col, y, threshold):
    # Split the data into two parts based on the threshold
    left_idx = X_col <= threshold
    right_idx = X_col > threshold
    
    # Calculate the entropy for each subset
    left_entropy = entropy(y[left_idx])
    right_entropy = entropy(y[right_idx])
    
    # Calculate the weighted average of the entropy for the two subsets
    n = len(y)
    n_left, n_right = len(y[left_idx]), len(y[right_idx])
    
    weighted_entropy = (n_left / n) * left_entropy + (n_right / n) * right_entropy
    
    # Information gain is the parent entropy minus the weighted child entropy
    parent_entropy = entropy(y)
    info_gain = parent_entropy - weighted_entropy
    
    return info_gain

# Example usage
if __name__ == "__main__":
    # Feature values (X_col) and labels (y)
    X_col = np.array([2.771244718, 1.728571309, 3.678319846, 3.961043357, 2.999208922,
                      7.497545867, 9.00220326, 7.444542326, 10.12493903, 6.642287351])
    
    y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
    
    # Set a threshold (e.g., 6)
    threshold = 8
    
    # Calculate the information gain
    info_gain = information_gain(X_col, y, threshold)
    print(f"Information Gain for threshold {threshold}: {info_gain}")

    
    

Information Gain for threshold 8: 0.2364527976600279


In [78]:
def greet():
    print("Hello, world!")

if __name__ == "__main__":
    greet()


Hello, world!


In [83]:
def abc():
    print("Hi Bro")
if __name__ == "__main__":
    abc()

Hi Bro


In [6]:
import numpy as np

In [11]:
X_train = np.array([[1,1,1],[1,0,1],[1,0,0],[1,0,0],[1,1,1],[0,1,1],[0,0,0],[1,0,1],[0,1,0],[1,0,0]])
y_train = np.array([1,1,0,0,1,0,0,1,1,0])

In [12]:
X_train

array([[1, 1, 1],
       [1, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [1, 1, 1],
       [0, 1, 1],
       [0, 0, 0],
       [1, 0, 1],
       [0, 1, 0],
       [1, 0, 0]])

In [18]:
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load a dataset (Iris dataset as an example)
data = load_iris()
X = data.data  # Features
y = data.target  # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Decision Tree classifier
clf = DecisionTreeClassifier()

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 100.00%
