In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset
df = pd.read_csv('E:\ML_Models\ML_Pharamachain\data\LSTM_data\data_pre.csv')
print(f"--- Data Loaded --- \nRows: {df.shape[0]}, Columns: {df.shape[1]}")

# 1. Temporal Sorting (Critical for LSTM)
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df = df.sort_values(['Item ID', 'Timestamp'])
print("Sorted data by Item ID and Timestamp to create chronological scan sequences.")

# 2. Encode Categorical Data
le = LabelEncoder()
df['Location_Enc'] = le.fit_transform(df['Location'].astype(str))
df['Payment_Enc'] = le.fit_transform(df['Payment Status'].astype(str))
print(f"Encoded 'Location' and 'Payment Status' into numeric values.")

# 3. Feature Selection and Scaling
num_cols = ['Temperature', 'Order Amount', 'Quantity Shipped', 'Time to Delivery']
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])
print(f"Normalized numerical columns: {num_cols}")

features = ['Location_Enc', 'Payment_Enc'] + num_cols
print(f"Final feature set for LSTM: {features}")

--- Data Loaded --- 
Rows: 10348, Columns: 11
Sorted data by Item ID and Timestamp to create chronological scan sequences.
Encoded 'Location' and 'Payment Status' into numeric values.
Normalized numerical columns: ['Temperature', 'Order Amount', 'Quantity Shipped', 'Time to Delivery']
Final feature set for LSTM: ['Location_Enc', 'Payment_Enc', 'Temperature', 'Order Amount', 'Quantity Shipped', 'Time to Delivery']


In [2]:
def create_sequences(df, seq_length=3):
    X, y = [], []
    for item_id, group in df.groupby('Item ID'):
        data = group[features].values
        # Placeholder: Using a dummy label if 'Fraud' column isn't present
        labels = group['Fraud_Indicator'].values if 'Fraud_Indicator' in group else np.zeros(len(group))
        
        if len(data) >= seq_length:
            for i in range(len(data) - seq_length + 1):
                X.append(data[i:i + seq_length])
                y.append(labels[i + seq_length - 1])
    return np.array(X), np.array(y)

X_seq, y_seq = create_sequences(df)
print(f"--- Sequence Creation ---")
print(f"Input Shape (Samples, TimeSteps, Features): {X_seq.shape}")
print(f"Label Shape: {y_seq.shape}")

--- Sequence Creation ---
Input Shape (Samples, TimeSteps, Features): (8348, 3, 6)
Label Shape: (8348,)


In [3]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# Convert to Tensors
X_train = torch.FloatTensor(X_seq)
y_train = torch.FloatTensor(y_seq).view(-1, 1)
loader = DataLoader(TensorDataset(X_train, y_train), batch_size=16, shuffle=True)

class PharmaLSTM(nn.Module):
    def __init__(self, input_dim):
        super(PharmaLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, 64, num_layers=2, batch_first=True)
        self.fc = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.sigmoid(self.fc(out[:, -1, :]))

model = PharmaLSTM(len(features))
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

print(f"\n--- Starting Training ---")
for epoch in range(10):
    total_loss = 0
    for batch_X, batch_y in loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    avg_loss = total_loss / len(loader)
    print(f"Epoch {epoch+1}/10 | Average Loss: {avg_loss:.4f}")

print("\nModel Training Complete. It is now ready to detect anomalies in scan patterns.")


--- Starting Training ---
Epoch 1/10 | Average Loss: 0.0290
Epoch 2/10 | Average Loss: 0.0002
Epoch 3/10 | Average Loss: 0.0001
Epoch 4/10 | Average Loss: 0.0000
Epoch 5/10 | Average Loss: 0.0000
Epoch 6/10 | Average Loss: 0.0000
Epoch 7/10 | Average Loss: 0.0000
Epoch 8/10 | Average Loss: 0.0000
Epoch 9/10 | Average Loss: 0.0000
Epoch 10/10 | Average Loss: 0.0000

Model Training Complete. It is now ready to detect anomalies in scan patterns.
