In [29]:
import os
print(os.getcwd())  # Çalıştığın dizini gösterir


c:\Users\merve\OneDrive\Masaüstü\e_commerce_analysis_pipline_project


In [30]:
os.chdir("/Users/merve/OneDrive/Masaüstü/e_commerce_analysis_pipline_project")
print(os.getcwd())  # Doğru dizine geçtiğini doğrula


c:\Users\merve\OneDrive\Masaüstü\e_commerce_analysis_pipline_project


In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib
import yaml

def preprocess_data():
    # Veriyi yükle
    df = pd.read_csv("data/raw/sales_data.csv")  # PostgreSQL'den çekilen e-commerce dataset
    
    # Eksik verileri temizle
    df = df.dropna()
    
    # Kategorik değişkenleri encode et
    categorical_columns = df.select_dtypes(include=['object']).columns
    for col in categorical_columns:
        df[col] = LabelEncoder().fit_transform(df[col])
    
    # Feature scaling
    numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns
    df[numeric_columns] = StandardScaler().fit_transform(df[numeric_columns])
    
    # Train-test ayrimi
    train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
    
    # Kaydet
    train_data.to_csv("data/processed/train_data.csv", index=False)
    test_data.to_csv("data/processed/test_data.csv", index=False)
    
    print("Preprocessing tamamlandi!")
def train_model():
    # MLflow başlat
    mlflow.set_experiment("Ecommerce Sales Prediction")
    
    # Parametreleri oku
    with open("params.yaml", "r") as f:
        params = yaml.safe_load(f)["train"]
    
    # Veri yükle
    train_data = pd.read_csv("data/processed/train_data.csv")
    X = train_data.drop(columns=["quantity"])  # Özellikler
    y = train_data["quantity"]  # Hedef değişken
    
    models = {
        "RandomForest": RandomForestRegressor(n_estimators=params["rf_n_estimators"], max_depth=params["rf_max_depth"], random_state=42),
        "XGBoost": XGBRegressor(learning_rate=params["xgboost_learning_rate"], n_estimators=params["xgboost_n_estimators"], max_depth=params["xgboost_max_depth"], random_state=42),
        "LinearRegression": LinearRegression()
    }
    
    for model_name, model in models.items():
        with mlflow.start_run(run_name=model_name):
            print(f"Model Eğitiliyor: {model_name}")
            
            model.fit(X, y)
            
            # Tahmin yap
            y_pred = model.predict(X)
            
            # Değerlendirme metrikleri
            metrics = {
                "mae": mean_absolute_error(y, y_pred),
                "rmse": np.sqrt(mean_squared_error(y, y_pred))

            }
            
            # MLflow loglari
            mlflow.log_metrics(metrics)
            mlflow.sklearn.log_model(model, model_name)
            joblib.dump(model, f"models/{model_name}.pkl")
            
            mlflow.end_run()  # MLflow oturumunu kapat
            print(f"{model_name} modeli MLflow'a kaydedildi!")

if __name__ == "__main__":
    preprocess_data()
    train_model()

Preprocessing tamamlandi!
Model Eğitiliyor: RandomForest




RandomForest modeli MLflow'a kaydedildi!
Model Eğitiliyor: XGBoost




XGBoost modeli MLflow'a kaydedildi!
Model Eğitiliyor: LinearRegression




LinearRegression modeli MLflow'a kaydedildi!
