In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

df = pd.read_csv("email_spam.csv")
X = df[['word_free', 'word_offer', 'word_click', 'num_links', 'num_caps', 'sender_reputation']]
y = df['is_spam']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = LogisticRegression()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
y_prob = model.predict_proba(X_test_scaled)[:, 1] 

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-score:", f1_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.475
Precision: 0.2
Recall: 0.05555555555555555
F1-score: 0.08695652173913043
ROC-AUC: 0.31313131313131315
Confusion Matrix:
 [[18  4]
 [17  1]]


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

df = pd.read_csv("customer_churn.csv")
X = df[["tenure_months","monthly_charges","support_tickets","is_premium","avg_usage_hours"]]
y = df["churn"]

X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
model = LogisticRegression(max_iter=1000).fit(X_train, y_train)
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_prob))

Accuracy: 0.525
Precision: 0.5416666666666666
Recall: 0.6190476190476191
F1 Score: 0.5777777777777777
ROC AUC: 0.4761904761904762


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

df = pd.read_csv("flowers.csv")
X = df[["sepal_length","sepal_width","petal_length","petal_width"]]
y = df["species"]
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
k_values = range(1, 26, 2)
cv_scores = [cross_val_score(KNeighborsClassifier(n_neighbors=k), X_train, y_train, cv=5).mean() for k in k_values]
best_k = k_values[int(np.argmax(cv_scores))]
best_cv_score = max(cv_scores)
knn = KNeighborsClassifier(n_neighbors=best_k).fit(X_train, y_train)
y_pred = knn.predict(X_test)


print("Best k:", best_k)
print("CV Score:", best_cv_score)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Best k: 21
CV Score: 0.3125
Test Accuracy: 0.475
Confusion Matrix:
 [[9 1 4]
 [7 1 5]
 [3 1 9]]


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

df = pd.read_csv("airbnb.csv")

X = df[["size_m2","distance_center_km","rating","num_reviews"]]
y = df["price"]

X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

k_vals = range(1, 26, 2)
cv_rmse = [ -cross_val_score(KNeighborsRegressor(n_neighbors=k), X_train, y_train,
                             cv=5, scoring="neg_root_mean_squared_error").mean() for k in k_vals]

best_k = k_vals[np.argmin(cv_rmse)]
model = KNeighborsRegressor(n_neighbors=best_k).fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Best k:", best_k)
print("CV RMSE:", min(cv_rmse))
print("Test RMSE:", mean_squared_error(y_test, y_pred) ** 0.5)
print("Test R²:", r2_score(y_test, y_pred))

Best k: 25
CV RMSE: 143.49769761619137
Test RMSE: 112.97143422452243
Test R²: -0.158177130420893
