In [None]:
import pandas as pd

from sklearn.datasets import load_wine
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_iris
from sklearn.datasets import load_diabetes

from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Select the top 2 features using chi2
selector = SelectKBest(score_func=chi2, k=2)
X_new = selector.fit_transform(X, y)

# Get selected feature names
mask = selector.get_support()  # Returns a boolean array
selected_features = [iris.feature_names[i] for i in range(len(mask)) if mask[i]]
print("Selected features:", selected_features)


In [None]:

# Load the Wine dataset
wine = load_wine()
X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3, random_state=42)

# Train Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Get feature importances
importances = rf.feature_importances_

# Visualize feature importance
plt.barh(wine.feature_names, importances)
plt.xlabel("Feature Importance")
plt.ylabel("Feature")
plt.show()


In [None]:
# Load the Breast Cancer dataset
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3, random_state=42)

# Perform RFE with an SVM classifier
svc = SVC(kernel="linear")
rfe = RFE(estimator=svc, n_features_to_select=10)
rfe.fit(X_train, y_train)

# Evaluate model
y_pred = rfe.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
# Load the Diabetes dataset
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size=0.3, random_state=42)

# Apply Lasso regression for feature selection
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)

# Train with selected features
y_pred = lasso.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))


In [None]:
# Load the Iris dataset
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3, random_state=42)

# Train logistic regression model
lr = LogisticRegression(max_iter=200)
lr.fit(X_train, y_train)

# Evaluate model
y_pred = lr.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


In [None]:
# Load the Breast Cancer dataset
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3, random_state=42)

# Train SVM model
svm = SVC()
svm.fit(X_train, y_train)

# Evaluate model
y_pred = svm.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt

# Load the Wine dataset
wine = load_wine()
X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3, random_state=42)

# Train Decision Tree classifier
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

# Visualize the decision tree
plt.figure(figsize=(12,8))
plot_tree(dt, feature_names=wine.feature_names, class_names=wine.target_names, filled=True)
plt.show()

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the Boston Housing dataset
housing = fetch_california_housing()
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.3, random_state=42)

# Train linear regression model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Evaluate model
y_pred = lr.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R-squared:", r2_score(y_test, y_pred))

In [None]:
from sklearn.linear_model import Ridge

# Load the Diabetes dataset
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size=0.3, random_state=42)

# Train Ridge regression model
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)

# Evaluate model
y_pred = ridge.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R-squared:", r2_score(y_test, y_pred))


In [None]:
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt

# Load the Boston Housing dataset
housing = fetch_california_housing()
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.3, random_state=42)

# Train decision tree regression model
dt_reg = DecisionTreeRegressor()
dt_reg.fit(X_train, y_train)

# Evaluate model
y_pred = dt_reg.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

# Visualize the decision tree
plt.figure(figsize=(12,8))
plot_tree(dt_reg, feature_names=housing.feature_names, filled=True)
plt.show()
