In [1]:
class OneHotEncoder:
    def __init__(self, ignore_unknown=False):
        self.classes_ = None
        self.ignore_unknown = ignore_unknown
        
    def fit(self, data):
        """Learn unique categories from data"""
        self.classes_ = sorted(set(data))
        return self
    
    def transform(self, data):
        """Convert categorical data to one-hot encoded vectors"""
        if self.classes_ is None:
            raise ValueError("Must fit encoder before transforming")
            
        encoded = []
        for item in data:
            if item not in self.classes_:
                if self.ignore_unknown:
                    # Return all zeros for unknown categories
                    encoded.append([0] * len(self.classes_))
                    continue
                else:
                    raise ValueError(f"Unknown category '{item}' encountered")
            
            # Create vector with 1 at category index
            vec = [0] * len(self.classes_)
            vec[self.classes_.index(item)] = 1
            encoded.append(vec)
            
        return encoded
    
    def fit_transform(self, data):
        """Fit and transform in one step"""
        self.fit(data)
        return self.transform(data)
    
    def inverse_transform(self, encoded_data):
        """Convert one-hot vectors back to categories"""
        if self.classes_ is None:
            raise ValueError("Must fit encoder before inverse transforming")
            
        decoded = []
        for vec in encoded_data:
            # Find index of 1 in the vector
            if sum(vec) != 1:
                raise ValueError("Invalid one-hot encoded vector")
            index = vec.index(1)
            decoded.append(self.classes_[index])
            
        return decoded

In [2]:
encoder = OneHotEncoder(ignore_unknown=True)
data = ['cat', 'dog', 'mouse', 'dog']

# Fit and transform
encoded = encoder.fit_transform(data)
print("Encoded:")
for row in encoded:
    print(row)
"""
Output:
[1, 0, 0]  # cat
[0, 1, 0]  # dog
[0, 0, 1]  # mouse
[0, 1, 0]  # dog
"""

# Transform new data with unknown category
new_data = ['dog', 'cat', 'rabbit']
print("\nNew data encoded:")
print(encoder.transform(new_data))
"""
Output (with ignore_unknown=True):
[[0, 1, 0], [1, 0, 0], [0, 0, 0]]
"""

# Inverse transform
decoded = encoder.inverse_transform([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
print("\nDecoded:", decoded)  # Output: ['cat', 'dog', 'mouse']

Encoded:
[1, 0, 0]
[0, 1, 0]
[0, 0, 1]
[0, 1, 0]

New data encoded:
[[0, 1, 0], [1, 0, 0], [0, 0, 0]]

Decoded: ['cat', 'dog', 'mouse']
