In [8]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Load the Iris dataset
iris = load_iris()

# Create a DataFrame
data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
data['target'] = iris.target

# Introduce missing values and a duplicate row
data.iloc[0, 0] = None
data.iloc[2, 2] = None
data = pd.concat([data, data.iloc[[0]]], ignore_index=True)

# Handle missing values and remove duplicates
data.fillna(data.mean(), inplace=True)
data.drop_duplicates(inplace=True)

# Merge additional species data
data['species'] = iris.target_names[data['target']]

# Scale features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data.iloc[:, :-2])

# One-hot encode species
encoder = OneHotEncoder(sparse_output=False)
encoded_species = encoder.fit_transform(data[['species']])

# Create final transformed dataset
final_transformed_data = pd.concat([
    pd.DataFrame(scaled_features, columns=iris.feature_names),
    pd.DataFrame(encoded_species, columns=encoder.get_feature_names_out(['species']))
], axis=1)

# Display the transformed dataset
print("Transformed Dataset:")
print(final_transformed_data.head(10))


Transformed Dataset:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0          -0.000393          1.012401          -1.351472         -1.308624   
1          -1.148762         -0.137353          -1.351472         -1.308624   
2          -1.390951          0.322549          -0.003901         -1.308624   
3          -1.512046          0.092598          -1.294339         -1.308624   
4          -1.027667          1.242352          -1.351472         -1.308624   
5          -0.543288          1.932205          -1.180073         -1.045486   
6          -1.512046          0.782450          -1.351472         -1.177055   
7          -1.027667          0.782450          -1.294339         -1.308624   
8          -1.754235         -0.367304          -1.351472         -1.308624   
9          -1.148762          0.092598          -1.294339         -1.440192   

   species_setosa  species_versicolor  species_virginica  
0             1.0                 0.0             