In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)

model = LogisticRegression()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))

0.9473684210526315


In [14]:


data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
print(df.shape)
print(df.head())


(150, 5)
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


In [15]:
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [16]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

model = KNeighborsClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 1.0


In [17]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print(X_scaled[:5])


[[-0.90068117  1.01900435 -1.34022653 -1.3154443 ]
 [-1.14301691 -0.13197948 -1.34022653 -1.3154443 ]
 [-1.38535265  0.32841405 -1.39706395 -1.3154443 ]
 [-1.50652052  0.09821729 -1.2833891  -1.3154443 ]
 [-1.02184904  1.24920112 -1.34022653 -1.3154443 ]]


In [18]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC())
])

param_grid = {'svc__C': [0.1, 1, 10]}
grid = GridSearchCV(pipeline, param_grid, cv=3)
grid.fit(X_train, y_train)
print("Best Score:", grid.best_score_)
print("Best Params:", grid.best_params_)

Best Score: 0.9583333333333334
Best Params: {'svc__C': 1}


In [19]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_pca)
print("Cluster Centers:", kmeans.cluster_centers_)

Cluster Centers: [[ 2.34652659  0.27393856]
 [-2.64241546  0.19088505]
 [ 0.66567601 -0.3316042 ]]


In [20]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

clf1 = LogisticRegression()
clf2 = DecisionTreeClassifier()
clf3 = KNeighborsClassifier()

voting = VotingClassifier(estimators=[
    ('lr', clf1), ('dt', clf2), ('knn', clf3)], voting='hard')

voting.fit(X_train, y_train)
print("Voting Accuracy:", voting.score(X_test, y_test))

Voting Accuracy: 1.0


In [21]:
from sklearn.datasets import load_breast_cancer
from imblearn.over_sampling import SMOTE
import sklearn
print(sklearn.__version__)

X, y = load_breast_cancer(return_X_y=True)
smote = SMOTE()
X_res, y_res = smote.fit_resample(X, y)
print("Original:", X.shape, "Resampled:", X_res.shape)

1.7.0
Original: (569, 30) Resampled: (714, 30)


In [22]:
from sklearn.metrics import make_scorer, f1_score

f1 = make_scorer(f1_score, average='weighted')
grid = GridSearchCV(pipeline, param_grid, cv=3, scoring=f1)
grid.fit(X_train, y_train)
print("Best F1 Score:", grid.best_score_)

Best F1 Score: 0.9580911025968497
