In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename)) 

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/academy2025/sample_submission.csv
/kaggle/input/academy2025/train.csv
/kaggle/input/academy2025/testFeatures.csv


In [2]:
df_train = pd.read_csv("/kaggle/input/academy2025/train.csv")
df_test = pd.read_csv("/kaggle/input/academy2025/testFeatures.csv")
df_submission = pd.read_csv("/kaggle/input/academy2025/sample_submission.csv")

## tarih sütununu modelin anlayacağı şekilde parçalama

In [3]:
df_train['tarih'] = pd.to_datetime(df_train['tarih'])
df_test['tarih'] = pd.to_datetime(df_test['tarih'])

for df in [df_train, df_test]:
    df['tarih_yıl'] = df['tarih'].dt.year
    df['tarih_ay'] = df['tarih'].dt.month
    df['tarih_gün'] = df['tarih'].dt.day



## label encoding

In [4]:
from sklearn.preprocessing import LabelEncoder

def encode_categories(train_df, test_df, columns):
    encoder = LabelEncoder()
    for col in columns:
        combined = pd.concat([train_df[col], test_df[col]], axis=0)
        encoder.fit(combined)
        train_df[col] = encoder.transform(train_df[col])
        test_df[col] = encoder.transform(test_df[col])
    return train_df, test_df

categorical_cols = ['ürün', 'ürün kategorisi', 'ürün üretim yeri', 'market', 'şehir']

df_train, df_test = encode_categories(df_train, df_test, categorical_cols)


## girdi ve hedef tanımlama

In [5]:
X = df_train.drop(['ürün fiyatı', 'tarih'], axis=1)
y = df_train['ürün fiyatı']

X_test = df_test.drop(['tarih'], axis=1)



In [6]:
print(df_train.head())
print(df_test.head())


       tarih  ürün  ürün besin değeri  ürün kategorisi  ürün fiyatı  \
0 2019-01-01    65                120                0        32.70   
1 2019-01-01    70                103                0        11.04   
2 2019-01-01    61                160                0        11.20   
3 2019-01-01     1                354                3         7.04   
4 2019-01-01     2                 72                3        13.28   

   ürün üretim yeri  market  şehir  tarih_yıl  tarih_ay  tarih_gün  
0                 1       1      6       2019         1          1  
1                 1       1      6       2019         1          1  
2                 1       1      6       2019         1          1  
3                 1       1      6       2019         1          1  
4                 1       1      6       2019         1          1  
   id      tarih  ürün  ürün besin değeri  ürün kategorisi  ürün üretim yeri  \
0   0 2024-01-01    65                120                0                 1   

## eğitim doğrulama ayrımı

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)






## model oluşturma ve eğitme

In [8]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(random_state=42, n_jobs=-1)
model.fit(X_train, y_train)



## tahmin ve başarı metriği

In [9]:
from sklearn.metrics import mean_squared_error

y_pred = model.predict(X_val)
mse = mean_squared_error(y_val, y_pred)
rmse = mse ** 0.5

print(f"Doğrulama RMSE: {rmse:.2f}")


Doğrulama RMSE: 1.32


## tahmin yapma

In [10]:
test_preds = model.predict(df_test.drop(columns=['id', 'tarih']))


## submission

In [11]:
submission = pd.DataFrame({
    'id': df_test['id'],
    'ürün fiyatı': test_preds
})
submission.to_csv('submission.csv', index=False)