<a href="https://colab.research.google.com/github/rezPirayesh/Customer-prediction/blob/main/Customer_Prediction_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
print("Hello, Colab works!")
import torch
print(torch.__version__)
print("GPU available:", torch.cuda.is_available())

Hello, Colab works!
2.6.0+cu124
GPU available: True


In [3]:
import pandas as pd
df = pd.read_csv('data.csv', encoding='ISO-8859-1')
print(df.head())  # First 5 rows
print(df.info())  # Columns, types
print(df['CustomerID'].notnull().value_counts())  # Purchase indicator

  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   

      InvoiceDate  UnitPrice  CustomerID         Country  
0  12/1/2010 8:26       2.55     17850.0  United Kingdom  
1  12/1/2010 8:26       3.39     17850.0  United Kingdom  
2  12/1/2010 8:26       2.75     17850.0  United Kingdom  
3  12/1/2010 8:26       3.39     17850.0  United Kingdom  
4  12/1/2010 8:26       3.39     17850.0  United Kingdom  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   InvoiceNo   

In [4]:
import torch
import torch.nn as nn
model = nn.Sequential(
    nn.Linear(2, 4),
    nn.ReLU(),
    nn.Linear(4, 1),
    nn.Sigmoid()
)
print("Model created:", model)

Model created: Sequential(
  (0): Linear(in_features=2, out_features=4, bias=True)
  (1): ReLU()
  (2): Linear(in_features=4, out_features=1, bias=True)
  (3): Sigmoid()
)


In [5]:
import pandas as pd
df = pd.read_csv('data.csv', encoding='ISO-8859-1')
df = df.dropna(subset=['CustomerID'])
df = df[df['Quantity'] > 0]
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
print("Cleaned dataset:")
print(df.info())
print(df.head())

Cleaned dataset:
<class 'pandas.core.frame.DataFrame'>
Index: 397924 entries, 0 to 541908
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   InvoiceNo    397924 non-null  object        
 1   StockCode    397924 non-null  object        
 2   Description  397924 non-null  object        
 3   Quantity     397924 non-null  int64         
 4   InvoiceDate  397924 non-null  datetime64[ns]
 5   UnitPrice    397924 non-null  float64       
 6   CustomerID   397924 non-null  float64       
 7   Country      397924 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 27.3+ MB
None
  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3  

In [6]:
df['TotalPrice'] = df['Quantity'] * df['UnitPrice']
customer_data = df.groupby('CustomerID').agg({
    'TotalPrice': 'sum',
    'InvoiceNo': 'nunique',
    'Quantity': 'sum'
}).reset_index()
customer_data.columns = ['CustomerID', 'TotalSpent', 'PurchaseFrequency', 'TotalItems']
customer_data['Purchased'] = 1
print("Customer features:")
print(customer_data.head())
print(customer_data.info())

Customer features:
   CustomerID  TotalSpent  PurchaseFrequency  TotalItems  Purchased
0     12346.0    77183.60                  1       74215          1
1     12347.0     4310.00                  7        2458          1
2     12348.0     1797.24                  4        2341          1
3     12349.0     1757.55                  1         631          1
4     12350.0      334.40                  1         197          1
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4339 entries, 0 to 4338
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   CustomerID         4339 non-null   float64
 1   TotalSpent         4339 non-null   float64
 2   PurchaseFrequency  4339 non-null   int64  
 3   TotalItems         4339 non-null   int64  
 4   Purchased          4339 non-null   int64  
dtypes: float64(2), int64(3)
memory usage: 169.6 KB
None


In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch

# Features and target
X = customer_data[['TotalSpent', 'PurchaseFrequency', 'TotalItems']].values
y = customer_data['Purchased'].values

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.FloatTensor(X_train).to('cuda')
X_test = torch.FloatTensor(X_test).to('cuda')
y_train = torch.FloatTensor(y_train).to('cuda').reshape(-1, 1)
y_test = torch.FloatTensor(y_test).to('cuda').reshape(-1, 1)

print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)

Training data shape: torch.Size([3471, 3])
Test data shape: torch.Size([868, 3])


In [8]:
print("Customer data size:", customer_data.shape)
print(customer_data.head())

Customer data size: (4339, 5)
   CustomerID  TotalSpent  PurchaseFrequency  TotalItems  Purchased
0     12346.0    77183.60                  1       74215          1
1     12347.0     4310.00                  7        2458          1
2     12348.0     1797.24                  4        2341          1
3     12349.0     1757.55                  1         631          1
4     12350.0      334.40                  1         197          1


In [9]:
import torch
import torch.nn as nn

class CustomerNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(3, 16),  # 3 features
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.layers(x)

model = CustomerNet().to('cuda')
print(model)

CustomerNet(
  (layers): Sequential(
    (0): Linear(in_features=3, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=8, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8, out_features=1, bias=True)
    (5): Sigmoid()
  )
)


In [10]:
import torch.optim as optim

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 10, Loss: 0.5564
Epoch 20, Loss: 0.5309
Epoch 30, Loss: 0.5052
Epoch 40, Loss: 0.4789
Epoch 50, Loss: 0.4506
Epoch 60, Loss: 0.4183
Epoch 70, Loss: 0.3833
Epoch 80, Loss: 0.3459
Epoch 90, Loss: 0.3076
Epoch 100, Loss: 0.2691


In [11]:
import pandas as pd
df = pd.read_csv('data.csv', encoding='ISO-8859-1')
df = df.dropna(subset=['CustomerID'])
df = df[df['Quantity'] > 0]
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df['TotalPrice'] = df['Quantity'] * df['UnitPrice']
customer_data = df.groupby('CustomerID').agg({
    'TotalPrice': 'sum',
    'InvoiceNo': 'nunique',
    'Quantity': 'sum'
}).reset_index()
customer_data.columns = ['CustomerID', 'TotalSpent', 'PurchaseFrequency', 'TotalItems']
customer_data['Purchased'] = 1
print("Customer data size:", customer_data.shape)

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
X = customer_data[['TotalSpent', 'PurchaseFrequency', 'TotalItems']].values
y = customer_data['Purchased'].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
X_train = torch.FloatTensor(X_train).to('cuda')
X_test = torch.FloatTensor(X_test).to('cuda')
y_train = torch.FloatTensor(y_train).to('cuda').reshape(-1, 1)
y_test = torch.FloatTensor(y_test).to('cuda').reshape(-1, 1)
print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)

Customer data size: (4339, 5)
Training data shape: torch.Size([3471, 3])
Test data shape: torch.Size([868, 3])


In [12]:
import torch
import torch.nn as nn
class CustomerNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(3, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.layers(x)
model = CustomerNet().to('cuda')
print(model)

import torch.optim as optim
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

CustomerNet(
  (layers): Sequential(
    (0): Linear(in_features=3, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=8, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8, out_features=1, bias=True)
    (5): Sigmoid()
  )
)
Epoch 10, Loss: 0.4979
Epoch 20, Loss: 0.4829
Epoch 30, Loss: 0.4669
Epoch 40, Loss: 0.4490
Epoch 50, Loss: 0.4270
Epoch 60, Loss: 0.4022
Epoch 70, Loss: 0.3762
Epoch 80, Loss: 0.3485
Epoch 90, Loss: 0.3132
Epoch 100, Loss: 0.2757


In [13]:
with torch.no_grad():
    model.eval()
    predictions = model(X_test)
    predictions = (predictions > 0.5).float()
    accuracy = (predictions == y_test).float().mean()
    print(f"Test Accuracy: {accuracy.item():.4f}")

Test Accuracy: 1.0000
