In [None]:
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
import pandas as pd
import matplotlib.pyplot as plt

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target

print("First 5 rows of the dataset:")
print(df.head())
print("\nDataset Summary:")
print(df.describe())
print(f"\nFeature Names: {iris.feature_names}")

X = iris.data
y = iris.target
selector = SelectKBest(score_func=f_classif, k=2)
X_selected = selector.fit_transform(X, y)

selected_features = [iris.feature_names[i] for i in selector.get_support(indices=True)]
print(f"\nSelected Features (ANOVA F-Value): {selected_features}")

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

explained_variance = pca.explained_variance_ratio_
cumulative_variance = explained_variance.cumsum()
print(f"\nExplained Variance Ratio by PCA Components: {explained_variance}")
print(f"Cumulative Variance Explained by Top 2 Components: {cumulative_variance[-1]}")

plt.figure(figsize=(8, 6))
colors = ['r', 'g', 'b']
labels = iris.target_names

for target, color, label in zip(range(3), colors, labels):
    plt.scatter(X_pca[y == target, 0], X_pca[y == target, 1],
                color=color, label=label, alpha=0.7)

plt.title('PCA-Transformed Data (2 Components)')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.show()

if cumulative_variance[-1] > 0.95:
    print("\nThe top 2 PCA components explain over 95% of the variance.")
else:
    print("\nThe top 2 PCA components explain less than 95% of the variance.")

print("\nPCA has reduced the dataset to 2 dimensions while retaining the majority of its variance.")