In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import KNNImputer
from torch.nn.utils.rnn import pad_sequence
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import math
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
# Veri setini yükleme
df = pd.read_csv('train_transformer.csv')


# Eksik değerleri doldurma veya silme
# Örnek: 'rating' sütunu için eksik değerleri ortalama ile doldurma
df['rating'].fillna(0, inplace=True)

# Veri türlerinin düzeltilmesi
# Örnek: 'orderdate' sütununu datetime türüne dönüştürme
df['orderdate'] = pd.to_datetime(df['orderdate'])
# Veri setine genel bakış
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 444819 entries, 0 to 444818
Data columns (total 35 columns):
 #   Column                    Non-Null Count   Dtype         
---  ------                    --------------   -----         
 0   client_id                 444819 non-null  object        
 1   order_id                  444819 non-null  object        
 2   orderdate                 444819 non-null  datetime64[ns]
 3   city_id                   444819 non-null  object        
 4   distance                  444819 non-null  float64       
 5   aandm                     444819 non-null  float64       
 6   basketvalue               444819 non-null  float64       
 7   chargedamount             444819 non-null  float64       
 8   deliveryfee               444819 non-null  float64       
 9   discountamount            444819 non-null  float64       
 10  supliersupport            444819 non-null  float64       
 11  rating                    444819 non-null  float64       
 12  wa

In [2]:
def dynamic_scaling(df, column, apply_threshold, max_threshold):
    filtered_rows = df[df[column] > apply_threshold]
    max_value = filtered_rows[column].max()
    df.loc[filtered_rows.index, column] = apply_threshold + ((filtered_rows[column] - apply_threshold) / (max_value - apply_threshold)) * (max_threshold - apply_threshold)
    return df

df = dynamic_scaling(df, 'next_order_price', 80, 100)
df = dynamic_scaling(df, 'days_to_next_order', 120, 140)
# Tarih ve zamanla ilgili özellikler
df['day'] = df['orderdate'].dt.day
df['dayofweek'] = df['orderdate'].dt.dayofweek

# Son siparişten bu yana geçen gün sayısı
df['days_since_last_order'] = df.groupby('client_id')['orderdate'].diff().dt.days.fillna(0)

# Normalleştirme


categorical_features = df.select_dtypes(include=['object']).drop(['client_id', 'order_id', ],axis=1).columns
# Label Encoding
label_encoders = {}
for column in categorical_features:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])


last_orders = df[df.days_to_next_order.isnull()]
df = df[~df.days_to_next_order.isnull()]


imputer = KNNImputer(n_neighbors=5)
# use inputer for using_frequency column to fill the missing values
df['using_frequency'] = imputer.fit_transform(df[['using_frequency']])

# days_to_next_order sütununu ve chargedamount sütununu son 2 sütun yapma
df = df[['client_id', 'orderdate', 'city_id', 'distance', 'aandm', 'chargedamount', 'deliveryfee', 'discountamount', 'supliersupport',
       'rating', 'warehouse_id', 'prev_order_count', 'delivery_time','decision_time', 'discount_rate', 'regain_potential',
       'regain_amount_cumsum', 'order_frequency', 'hex_delivery_time', 'hex_regain_pot', 'hex_charged_amount_avg',
       'using_frequency', 'total_piece', 'mean_price', 'mastercategory_mode', 'brand_mode', 'subcategory_mode', 'product_mode', 'promo_id',
       'responsibledepartment_id', 'promoobjective', 'day', 'dayofweek','days_since_last_order', 'basketvalue', 
       'days_to_next_order', 'next_order_price']]
df.head()

Unnamed: 0,client_id,orderdate,city_id,distance,aandm,chargedamount,deliveryfee,discountamount,supliersupport,rating,...,product_mode,promo_id,responsibledepartment_id,promoobjective,day,dayofweek,days_since_last_order,basketvalue,days_to_next_order,next_order_price
0,6538ff945bbb65f457ca697d,2020-06-30 09:46:00,0,1373.0,6.405,16.4394,0.0,10.0006,0.0,0.0,...,332,611,0,1.0,30,1,0.0,26.44,15.0,4.85
1,6538ff945bbb65f457ca697d,2020-07-14 17:48:00,0,1373.0,5.4699,4.85,1.79,8.0,0.0,0.0,...,195,280,0,2.0,14,1,14.0,11.06,33.0,16.171
3,6538ff945bbb65f457ca697e,2020-06-30 10:11:00,0,820.0,9.9455,5.21,0.0,15.52,0.0,0.0,...,970,832,0,1.0,30,1,0.0,20.73,24.0,5.5105
4,6538ff945bbb65f457ca697e,2020-07-23 15:21:00,0,820.0,5.0219,5.5105,1.79,8.2596,0.1465,0.0,...,221,280,0,2.0,23,3,23.0,11.9801,9.0,5.6601
8,6538ff945bbb65f457ca6981,2020-06-30 14:07:00,2,1360.0,6.8144,9.7898,0.0,10.1502,0.0,0.0,...,978,611,0,1.0,30,1,0.0,19.94,1.0,16.5003


In [3]:
# DataFrame'de 'client_id' sütunu üzerinden gruplama yapılır
grouped = df.groupby('client_id')

# Her bir müşteri için sipariş geçmişini bir liste olarak oluştur
sequences = {client: group.drop(columns=['client_id', 'orderdate']).values for client, group in grouped}

# Eğitim ve test setleri için müşteri ID'lerini ayırma
client_ids = df['client_id'].unique()
np.random.shuffle(client_ids)
train_ids = client_ids[:int(len(client_ids) * 0.9)]
test_ids = client_ids[int(len(client_ids) * 0.9):]
print(f"Training set contains {len(train_ids)} clients")
print(f"Test set contains {len(test_ids)} clients")

# Eğitim ve test setleri için dizileri oluşturma
train_sequences = [sequences[client_id] for client_id in train_ids if client_id in sequences]
test_sequences = [sequences[client_id] for client_id in test_ids if client_id in sequences]
print(f"Training set shape: {train_sequences[0].shape}")
print(f"Test set shape: {test_sequences[0].shape}")

# drop the last 2 columns from the sequences which are chargedamount and days_to_next_order
train_seq_dropped = [np.delete(sequence, [-2,-1], axis=1) for sequence in train_sequences]
test_seq_dropped = [np.delete(sequence, [-2,-1], axis=1) for sequence in test_sequences]

# Her müşteri için sipariş dizilerini PyTorch tensörlerine dönüştürme
train_tensors = [torch.tensor(sequence, dtype=torch.float32) for sequence in train_seq_dropped]
test_tensors = [torch.tensor(sequence, dtype=torch.float32) for sequence in test_seq_dropped]
print(f"Training set shape: {train_tensors[0].shape}")
print(f"Test set shape: {test_tensors[0].shape}")

# Padding işlemi
train_padded = pad_sequence(train_tensors, batch_first=True, padding_value=0.0)
test_padded = pad_sequence(test_tensors, batch_first=True, padding_value=0.0)

# Eğer diziler maksimum uzunluktan daha uzunsa, kırpma işlemi
MAX_SEQUENCE_LENGTH = 20
train_padded = train_padded[:, :MAX_SEQUENCE_LENGTH, :]
test_padded = test_padded[:, :MAX_SEQUENCE_LENGTH, :]
print(f"Training set shape: {train_padded.shape}")
print(f"Test set shape: {test_padded.shape}")
# Hedef değişkenler için tensörleri hazırlama ve düzleştirme
def prepare_labels(sequences, default_value=[0, 0]):
    labels = []
    for seq in sequences:
        if len(seq) > 0:
            labels.append(seq[-1, -2:])
        else:
            labels.append(default_value)
    # Önce numpy array'e dönüştür, sonra tensöre
    return torch.tensor(np.array(labels), dtype=torch.float32)

# Eğitim ve test hedeflerini hazırlama
train_labels_tensor = prepare_labels(train_sequences)
test_labels_tensor = prepare_labels(test_sequences)

# Eğitim ve test veri setlerini PyTorch TensorDataset olarak oluşturma
train_dataset = TensorDataset(train_padded, train_labels_tensor)
test_dataset = TensorDataset(test_padded, test_labels_tensor)

# Veri yükleyicilerini oluşturma
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


Training set contains 68761 clients
Test set contains 7641 clients
Training set shape: (2, 35)
Test set shape: (3, 35)
Training set shape: torch.Size([2, 33])
Test set shape: torch.Size([3, 33])
Training set shape: torch.Size([68761, 20, 33])
Test set shape: torch.Size([7641, 20, 33])


In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=20):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(1)].unsqueeze(0)
        return x
    
class CustomTransformerModel(nn.Module):
    def __init__(self, input_size, shared_hidden_dim, specific_hidden_dim, num_heads, num_layers, output_size):
        super(CustomTransformerModel, self).__init__()
        self.input_embedding = nn.Linear(input_size, shared_hidden_dim)
        self.pos_encoder = PositionalEncoding(shared_hidden_dim)

        # Paylaşılan Transformer Katmanları
        encoder_layers = nn.TransformerEncoderLayer(d_model=shared_hidden_dim, nhead=num_heads, dropout=0.1, activation='relu', batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)

        # Hedef Spesifik Katmanlar
        self.specific_layers_days = nn.Sequential(
            nn.Linear(shared_hidden_dim, specific_hidden_dim),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(specific_hidden_dim, output_size)
        )
        self.specific_layers_amount = nn.Sequential(
            nn.Linear(shared_hidden_dim, specific_hidden_dim),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(specific_hidden_dim, output_size)
        )

        # Dikkat Mekanizması
        self.attention = nn.MultiheadAttention(shared_hidden_dim, num_heads, batch_first=True)

    def forward(self, src):
        mask = (src == 0.0).all(dim=2)  # Tüm özellikler 0 ise, bu bir padding elemanıdır
        
        src = self.input_embedding(src)
        src = self.pos_encoder(src)
        # Transformer Encoder'a mask uygula
        shared_output = self.transformer_encoder(src, src_key_padding_mask=mask)

        # Dikkat Mekanizması ile Context Vektörünü Elde Etme
        attention_output, _ = self.attention(shared_output, shared_output, shared_output, key_padding_mask=mask)
        
        # Context Vektörünü Sonraki Katmanlara Aktarma
        output_days = self.specific_layers_days(attention_output[:, -1, :])
        output_amount = self.specific_layers_amount(attention_output[:, -1, :])

        return output_days, output_amount


    def get_specific_parameters_days(self):
        return list(self.specific_layers_days.parameters())

    def get_specific_parameters_amount(self):
        return list(self.specific_layers_amount.parameters())

class TransformerTrainer:
    def __init__(self, model, train_loader, test_loader, learning_rate=1e-4):
        self.model = model
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.criterion_days = nn.MSELoss()
        self.criterion_amount = nn.MSELoss()
        self.optimizer_shared = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-3)
        self.optimizer_days = optim.Adam(model.get_specific_parameters_days(), lr=learning_rate)
        self.optimizer_amount = optim.Adam(model.get_specific_parameters_amount(), lr=learning_rate)
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer_shared, 'min', min_lr=1e-7)

    def train(self, epochs):
        best_val_loss = float('inf')
        patience_counter = 0
        patience = 6
        for epoch in range(epochs):
            self.model.train()
            # Gradient Clipping
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1)
            total_loss_days = 0
            total_loss_amount = 0
            for data, targets in self.train_loader:
                self.optimizer_shared.zero_grad()
                self.optimizer_days.zero_grad()
                self.optimizer_amount.zero_grad()

                output_days, output_amount = self.model(data)
                
                # Model çıktılarını sıkıştır
                output_days = output_days.squeeze()
                output_amount = output_amount.squeeze()
                
                loss_days = self.criterion_days(output_days, targets[:, 0])
                loss_days.backward(retain_graph=True)
                
                loss_amount = self.criterion_amount(output_amount, targets[:, 1])
                loss_amount.backward(retain_graph=True)
                
                total_loss = loss_days + loss_amount
                total_loss.backward()
                
                self.optimizer_shared.step()
                self.optimizer_amount.step()
                self.optimizer_days.step()
                
                
                total_loss_days += loss_days.item()
                total_loss_amount += loss_amount.item()
            
            print(f"Epoch {epoch+1}/{epochs}, Days Loss : {total_loss_days/len(self.train_loader):.6f}, Amount Loss : {total_loss_amount/len(self.train_loader):.6f}")
            # Modeli değerlendirme
            val_loss_days, val_loss_amount, val_mse_days, val_mse_amount, val_mae_days, val_mae_amount, val_r2_days, val_r2_amount = self.evaluate()
            eval_loss = val_loss_days + val_loss_amount
            eval_loss /= len(self.test_loader)
            self.scheduler.step(eval_loss)
            if eval_loss < best_val_loss:
                best_val_loss = eval_loss
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
                
            print(f"Epoch {epoch+1} Evaluation - Days Loss: {val_loss_days/len(self.test_loader):.4f}, Amount Loss: {val_loss_amount/len(self.test_loader):.4f}, "
                f"\nDays MSE: {val_mse_days:.4f}, Amount MSE: {val_mse_amount:.4f}, "
                f"\nDays MAE: {val_mae_days:.4f}, Amount MAE: {val_mae_amount:.4f}, "
                f"\nDays R2: {val_r2_days:.4f}, Amount R2: {val_r2_amount:.4f}")
    
    def evaluate(self):
        self.model.eval()
        total_loss_days = 0
        total_loss_amount = 0
        predictions_days = []
        predictions_amount = []
        actuals_days = []
        actuals_amount = []
        with torch.no_grad():
            for data, targets in self.test_loader:
                output_days, output_amount = self.model(data)
                # Model çıktılarını sıkıştır
                output_days = output_days.squeeze()
                output_amount = output_amount.squeeze()
                # Kayıpları topla
                total_loss_days += self.criterion_days(output_days, targets[:, 0]).item()
                total_loss_amount += self.criterion_amount(output_amount, targets[:, 1]).item()

                # Tahminleri ve gerçek değerleri kaydet
                predictions_days.extend(output_days.cpu().numpy())
                predictions_amount.extend(output_amount.cpu().numpy())
                actuals_days.extend(targets[:, 0].cpu().numpy())
                actuals_amount.extend(targets[:, 1].cpu().numpy())

        # Metriklerin hesaplanması
        mse_days = mean_squared_error(actuals_days, predictions_days)
        mse_amount = mean_squared_error(actuals_amount, predictions_amount)
        mae_days = mean_absolute_error(actuals_days, predictions_days)
        mae_amount = mean_absolute_error(actuals_amount, predictions_amount)
        r2_days = r2_score(actuals_days, predictions_days)
        r2_amount = r2_score(actuals_amount, predictions_amount)

        return total_loss_days, total_loss_amount, mse_days, mse_amount, mae_days, mae_amount, r2_days, r2_amount

input_size = train_padded.shape[2]  # Girdi boyutu
shared_hidden_dim = 256  # Paylaşılan katmanlar için gizli boyut
specific_hidden_dim = 128  # Hedef spesifik katmanlar için gizli boyut
num_heads = 8  # Dikkat başlıkları sayısı
num_layers = 4  # Transformer katman sayısı
output_size = 1  # Her bir hedef için çıktı boyutu

# Model örneği oluşturma
model = CustomTransformerModel(input_size, shared_hidden_dim, specific_hidden_dim, num_heads, num_layers, output_size)

# Eğitim sürecini yönetecek sınıfın oluşturulması
trainer = TransformerTrainer(model, train_loader, test_loader)
# Modelin eğitimi
trainer.train(epochs=10)


In [None]:
# Modelin eğitilmiş parametrelerini kaydetme
torch.save(model.state_dict(), 'transformer_model_OPparam.pth')
print('model saved')

model saved


In [None]:
#modeli kaydetme
torch.save(model, 'transformer_model_OP.pth')
print('model saved')

model saved


## Modelin degerlendirilmesi ve analizi

In [None]:
def create_predictions_dataframe(model, test_loader):
    model.eval()
    predictions_days = []
    predictions_amount = []
    actuals_days = []
    actuals_amount = []
    with torch.no_grad():
        for data, targets in test_loader:
            output_days, output_amount = model(data)
            predictions_days.extend(output_days.cpu().numpy().flatten())
            predictions_amount.extend(output_amount.cpu().numpy().flatten())
            actuals_days.extend(targets[:, 0].cpu().numpy())
            actuals_amount.extend(targets[:, 1].cpu().numpy())

    df = pd.DataFrame({
        'Actual Days to Next Order': actuals_days,
        'Predicted Days to Next Order': predictions_days,
        'Actual Charged Amount': actuals_amount,
        'Predicted Charged Amount': predictions_amount
    })
    return df

# Model ve test_loader örneklerini kullanarak DataFrame oluştur
predictions_df = create_predictions_dataframe(model, test_loader)


In [None]:
def plot_predictions_vs_actuals(df, title, actual_col, predicted_col):
    plt.figure(figsize=(10, 6))
    plt.scatter(df[actual_col], df[predicted_col], alpha=0.6)
    plt.title(f'{title}: Predictions vs Actuals')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.plot([df[actual_col].min(), df[actual_col].max()], [df[actual_col].min(), df[actual_col].max()], 'k--')  # İdeal çizgi
    plt.show()

# 'Days to Next Order' için tahminlerin ve gerçek değerlerin saçılım grafiği
plot_predictions_vs_actuals(predictions_df, 'Days to Next Order', 'Actual Days to Next Order', 'Predicted Days to Next Order')

# 'Charged Amount' için tahminlerin ve gerçek değerlerin saçılım grafiği
plot_predictions_vs_actuals(predictions_df, 'Charged Amount', 'Actual Charged Amount', 'Predicted Charged Amount')

In [None]:
def plot_error_distribution(df, title, actual_col, predicted_col):
    errors = df[predicted_col] - df[actual_col]
    plt.figure(figsize=(10, 6))
    plt.hist(errors, bins=50, alpha=0.7)
    plt.title(f'{title}: Error Distribution')
    plt.xlabel('Prediction Error')
    plt.ylabel('Frequency')
    plt.show()

# 'Days to Next Order' için hata dağılımı
plot_error_distribution(predictions_df, 'Days to Next Order', 'Actual Days to Next Order', 'Predicted Days to Next Order')

# 'Charged Amount' için hata dağılımı
plot_error_distribution(predictions_df, 'Charged Amount', 'Actual Charged Amount', 'Predicted Charged Amount')
