In [4]:
# notebooks/02_model.ipynb iÃ§inde Ã§alÄ±ÅŸtÄ±r

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

# 1. Veriyi oku
train = pd.read_csv("../data/train.csv")
X = train.drop(columns=["activity"])
y = train["activity"]

# 2. EÄŸitim ve doÄŸrulama verisi ayÄ±r
X_tr, X_val, y_tr, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 3. KullanÄ±lacak modeller
models = {
    "logreg": LogisticRegression(max_iter=300),
    "rf": RandomForestClassifier(n_estimators=200, random_state=42),
    "svm": SVC()
}

# 4. SonuÃ§larÄ± saklamak iÃ§in boÅŸ dictionary
results = {}

# 5. Model dÃ¶ngÃ¼sÃ¼
for name, model in models.items():
    print("="*50)
    print(f"Model: {name}")

    # Pipeline: StandardScaler + Model
    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("clf", model)
    ])
    
    # EÄŸit
    pipe.fit(X_tr, y_tr)
    y_pred = pipe.predict(X_val)

    # Performans Ã¶lÃ§
    report = classification_report(y_val, y_pred, output_dict=True)
    print(classification_report(y_val, y_pred))

    # SonuÃ§larÄ± sakla
    results[name] = {
        "pipeline": pipe,
        "report": report,
        "y_pred": y_pred
    }

    # Confusion Matrix Ã§iz & kaydet
    cm = confusion_matrix(y_val, y_pred)
    plt.figure(figsize=(6,6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title(f"Confusion Matrix: {name.upper()}")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(f"../figures/conf_matrix_{name}.png")
    plt.close()

# 6. En iyi modeli seÃ§ ve kaydet
# Ã–rneÄŸin logistic regression seÃ§iyorsak:
best_model = results["logreg"]["pipeline"]
joblib.dump(best_model, "../models/best_model.joblib")
print("âœ… En iyi model kaydedildi: models/best_model.joblib")

# 7. Performans tablosunu yazdÄ±r
performance = []
for name, data in results.items():
    row = {
        "Model": name,
        "F1-macro": round(data["report"]["macro avg"]["f1-score"], 4),
        "Accuracy": round(data["report"]["accuracy"], 4)
    }
    performance.append(row)

df_perf = pd.DataFrame(performance)
df_perf.sort_values("F1-macro", ascending=False, inplace=True)
print("\nðŸ“Š Model KarÅŸÄ±laÅŸtÄ±rma:")
print(df_perf)

import pandas as pd
import joblib

# Test verisini oku
test = pd.read_csv("../data/test.csv")

# EÄŸer 'activity' sÃ¼tunu varsa sil (Ã¶nlem)
if "activity" in test.columns:
    test = test.drop(columns=["activity"])

# EÄŸitilen modeli yÃ¼kle
model = joblib.load("../models/best_model.joblib")

# Tahmin yap
preds = model.predict(test)

# submission.csv dosyasÄ±nÄ± oluÅŸtur
submission = pd.DataFrame({
    "Id": test.index,
    "PredictedActivity": preds
})
submission.to_csv("../submission.csv", index=False)

print("âœ… submission.csv baÅŸarÄ±yla oluÅŸturuldu!")
pd.read_csv("../submission.csv").head()


Model: logreg
                    precision    recall  f1-score   support

            LAYING       0.99      1.00      1.00       136
           SITTING       0.95      0.87      0.91       125
          STANDING       0.90      0.96      0.92       134
           WALKING       0.98      1.00      0.99       121
WALKING_DOWNSTAIRS       0.99      0.98      0.98        98
  WALKING_UPSTAIRS       0.99      0.97      0.98       108

          accuracy                           0.96       722
         macro avg       0.97      0.96      0.96       722
      weighted avg       0.96      0.96      0.96       722

Model: rf
                    precision    recall  f1-score   support

            LAYING       0.99      1.00      1.00       136
           SITTING       0.95      0.94      0.95       125
          STANDING       0.96      0.96      0.96       134
           WALKING       0.97      0.98      0.98       121
WALKING_DOWNSTAIRS       0.94      0.94      0.94        98
  WALKING_UP

Unnamed: 0,Id,PredictedActivity
0,0,STANDING
1,1,STANDING
2,2,STANDING
3,3,STANDING
4,4,STANDING
