In [12]:
import streamlit as st
import numpy as np
import pandas as pd
import joblib
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


In [13]:
# Verileri dosyadan yükleme
data = pd.read_csv('veri.csv')


In [14]:
print(data.columns)


Index(['otel_yildizi', 'sehir', 'oda_sayisi', 'denize_yakinlik', 'mevsim',
       'ortalama_puan', 'fiyat'],
      dtype='object')


In [15]:
# Kategorik ve sayısal değişkenlerin belirlenmesi
categorical_features = ['sehir', 'mevsim']
numerical_features = ['otel_yildizi', 'oda_sayisi', 'denize_yakinlik', 'ortalama_puan']


In [16]:
# Ön işleme adımları
categorical_transformer = OneHotEncoder()
numerical_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)])

In [17]:
# Bağımlı ve bağımsız değişkenlerin belirlenmesi
X = data.drop(columns=['fiyat'])
y = data['fiyat']


In [8]:
# Veriyi eğitim ve test olarak ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [18]:
# Modellerin tanımlanması
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "Support Vector Machine": SVR(),
    "K-Nearest Neighbors": KNeighborsRegressor(),
    "Gaussian Process": GaussianProcessRegressor(),
    "Neural Network": MLPRegressor(max_iter=500)
}


In [22]:
# Modelleri eğitme ve sonuçları saklama
results = {}
for name, model in models.items():
    pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
    pipeline.fit(X_train, y_train)
    predictions = pipeline.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    results[name] = (pipeline, rmse)
    print(f"{name}: RMSE = {rmse:.2f}")


Linear Regression: RMSE = 468.59
Ridge Regression: RMSE = 468.60
Lasso Regression: RMSE = 468.94
Decision Tree: RMSE = 643.81
Random Forest: RMSE = 481.27
Gradient Boosting: RMSE = 473.59
Support Vector Machine: RMSE = 473.43
K-Nearest Neighbors: RMSE = 533.47
Gaussian Process: RMSE = 777.27
Neural Network: RMSE = 473.07




In [23]:
# En iyi modeli seçme
best_model_name = min(results, key=lambda k: results[k][1])
best_model, best_rmse = results[best_model_name]

print(f"\nEn iyi model: {best_model_name} (RMSE = {best_rmse:.2f})")


En iyi model: Linear Regression (RMSE = 468.59)


In [24]:
# En iyi modeli kaydetme
joblib.dump(best_model, 'eniyi.joblib')
print("En iyi model 'eniyi.joblib' olarak kaydedildi.")


En iyi model 'eniyi.joblib' olarak kaydedildi.
