In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, PolynomialFeatures, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_excel("Odunc_Guncel_Kategorili_Liste.xlsx")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'Odunc_Guncel_Kategorili_Liste.xlsx'

In [None]:
target = "LC_Kategori"
X = df.drop(columns=[target])
y = df[target]
X = X.fillna("Unknown")

In [None]:
label_cols = X.select_dtypes(include=['object']).columns
for col in label_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))
X.head()

In [None]:
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_features = poly.fit_transform(X.select_dtypes(include=[np.number]))
poly_df = pd.DataFrame(poly_features)
poly_df.head()

In [None]:
X['Bolum_Fakulte_Cross'] = X['Bolum'].astype(str) + "_" + X['Fakulte'].astype(str)

In [None]:
selector = SelectKBest(chi2, k=10)
selected = selector.fit_transform(abs(X.select_dtypes(include=[np.number])), y)
selected[:5]

In [None]:
scaler = RobustScaler()
scaled = scaler.fit_transform(selected)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

base_model = DecisionTreeClassifier()
base_model.fit(X_train, y_train)
pred_base = base_model.predict(X_test)

print("Base Accuracy:", accuracy_score(y_test, pred_base))
print("Base F1:", f1_score(y_test, pred_base, average='weighted'))

In [None]:
model = RandomForestClassifier(n_estimators=200, max_depth=12, random_state=42)
model.fit(X_train, y_train)
pred = model.predict(X_test)
print(classification_report(y_test, pred))

In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(confusion_matrix(y_test, pred), annot=True, fmt='d')
plt.title("Confusion Matrix")
plt.show()