In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('./train.csv')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 291142 entries, 0 to 291141
Data columns (total 8 columns):
 #   Column                Non-Null Count   Dtype 
---  ------                --------------   ----- 
 0   merchant_id           291142 non-null  object
 1   month_id              291142 non-null  int64 
 2   merchant_source_name  291142 non-null  object
 3   settlement_period     291142 non-null  object
 4   working_type          291142 non-null  object
 5   mcc_id                291142 non-null  object
 6   merchant_segment      291142 non-null  object
 7   net_payment_count     291142 non-null  int64 
dtypes: int64(2), object(6)
memory usage: 17.8+ MB


In [3]:
df['month_id'] = pd.to_datetime(df['month_id'], format='%Y%m')
df['year'] = df['month_id'].dt.year
df['month'] = df['month_id'].dt.month

df

Unnamed: 0,merchant_id,month_id,merchant_source_name,settlement_period,working_type,mcc_id,merchant_segment,net_payment_count,year,month
0,merchant_43992,2023-07-01,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,15106,2023,7
1,merchant_43992,2023-01-01,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,16918,2023,1
2,merchant_43992,2023-05-01,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,13452,2023,5
3,merchant_43992,2023-08-01,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,16787,2023,8
4,merchant_43992,2023-02-01,Merchant Source - 3,Settlement Period - 3,Working Type - 2,mcc_197,Segment - 2,12428,2023,2
...,...,...,...,...,...,...,...,...,...,...
291137,merchant_67083,2023-09-01,Merchant Source - 1,Settlement Period - 1,Working Type - 6,mcc_169,Segment - 4,4,2023,9
291138,merchant_4263,2023-09-01,Merchant Source - 2,Settlement Period - 1,Working Type - 6,mcc_73,Segment - 4,4,2023,9
291139,merchant_11346,2023-09-01,Merchant Source - 1,Settlement Period - 1,Working Type - 6,mcc_169,Segment - 4,3,2023,9
291140,merchant_21397,2023-09-01,Merchant Source - 1,Settlement Period - 1,Working Type - 6,mcc_168,Segment - 4,3,2023,9


In [4]:
# Bağımsız değişkenleri (X) ve bağımlı değişkeni (y) belirle
X = df[['year', 'month', 'merchant_source_name', 'settlement_period', 'working_type', 'mcc_id', 'merchant_segment']]
y = df['net_payment_count']

# Kategorik değişkenleri One-Hot Encoding yapın
X = pd.get_dummies(X, drop_first=True)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# LinearRegression
LRmodel = LinearRegression()
LRmodel.fit(X_train, y_train)

# LinearRegression tahmin
LR_y_pred = LRmodel.predict(X_val)

mae = mean_absolute_error(y_val, LR_y_pred)
print(f'Linear Regression MAE: {mae}')

Linear Regression MAE: 986.1046179040854


In [None]:
# GradientBoostingRegressor
GBmodel = GradientBoostingRegressor(random_state=42)
GBmodel.fit(X_train, y_train)

GB_y_pred = GBmodel.predict(X_val)

mae = mean_absolute_error(y_val, GB_y_pred)
print(f'Gradient Boosting MAE: {mae}')

In [None]:
from xgboost import XGBRegressor

XGmodel = XGBRegressor(random_state=42)
XGmodel.fit(X_train, y_train)

XG_y_pred = XGmodel.predict(X_val)

mae = mean_absolute_error(y_val, XG_y_pred)
print(f'XGBoost MAE: {mae}')

In [None]:
# RandomForestRegressor
RFmodel = RandomForestRegressor(random_state=42)
RFmodel.fit(X_train, y_train)

# Doğrulama seti üzerinde tahmin yapın
RF_y_pred = RFmodel.predict(X_val)

mae = mean_absolute_error(y_val, RF_y_pred)
print(f'Random Forest MAE: {mae}')

In [None]:
from prophet import Prophet

dfProphet = pd.read_csv('./train.csv')

dfProphet['month_id'] = pd.to_datetime(dfProphet['month_id'], format='%Y%m')
dfProphet = dfProphet.rename(columns={'month_id': 'ds', 'net_payment_count': 'y'})

train_size = int(len(dfProphet) * 0.8)
train_data, val_data = dfProphet[:train_size], dfProphet[train_size:]

prophet_model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
prophet_model.fit(train_data)

future = prophet_model.make_future_dataframe(periods=len(val_data))

forecast = prophet_model.predict(future)

mae = mean_absolute_error(val_data['y'], forecast['yhat'][-len(val_data):])
print(f'MAE: {mae}')
