In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [6]:
# Load datasets (ensure the files are in the `data/` folder)
customers = pd.read_csv('../data/Customers.csv')
products = pd.read_csv('../data/Products.csv')
transactions = pd.read_csv('../data/Transactions.csv')

In [2]:
# Load merged data
merged_data = pd.read_csv('../data/Merged_Data.csv')

In [3]:
# Feature Engineering
customer_data = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',   # Total revenue from the customer
    'Quantity': 'sum',     # Total quantity purchased
    'ProductID': 'nunique', # Number of unique products
    'TransactionDate': lambda x: (pd.to_datetime(merged_data['TransactionDate'].max()) - pd.to_datetime(x).max()).days
}).reset_index()

In [4]:
# Rename 'TransactionDate' to 'Recency'
customer_data.rename(columns={'TransactionDate': 'Recency'}, inplace=True)

# Normalize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_data[['TotalValue', 'Quantity', 'ProductID', 'Recency']])

# Compute cosine similarity
similarity_matrix = cosine_similarity(normalized_features)

In [8]:
# Generate lookalike recommendations for the first 20 customers
customer_ids = customer_data['CustomerID'].tolist()
lookalike_results = {}

for i, customer_id in enumerate(customer_ids[:20]):  # First 20 customers
    similar_indices = similarity_matrix[i].argsort()[::-1][1:4]  # Top 3 similar customers
    lookalike_results[customer_id] = [(customer_ids[j], similarity_matrix[i, j]) for j in similar_indices]

# Save compact format
lookalike_df = pd.DataFrame({
    'CustomerID': lookalike_results.keys(),
    'Lookalikes': lookalike_results.values()
})
lookalike_df.to_csv('Samson_Sabu_Lookalike.csv', index=False)

# Expand Lookalikes into separate columns
expanded_lookalike_df = pd.DataFrame([
    {
        'CustomerID': row['CustomerID'],
        'Lookalike1': row['Lookalikes'][0][0],
        'Score1': row['Lookalikes'][0][1],
        'Lookalike2': row['Lookalikes'][1][0],
        'Score2': row['Lookalikes'][1][1],
        'Lookalike3': row['Lookalikes'][2][0],
        'Score3': row['Lookalikes'][2][1],
    }
    for _, row in lookalike_df.iterrows()
])

# Save expanded format
expanded_lookalike_df.to_csv('Samson_Sabu_Lookalike_Expanded.csv', index=False)

# Display Results
print("\nCompact Lookalike Recommendations:")
print(lookalike_df.head())

print("\nExpanded Lookalike Recommendations:")
print(expanded_lookalike_df.head())

# Function for dynamic recommendations
def get_recommendations(customer_id, n=3):
    if customer_id in customer_ids:
        index = customer_ids.index(customer_id)
        similar_indices = similarity_matrix[index].argsort()[::-1][1:n+1]
        return [(customer_ids[j], similarity_matrix[index, j]) for j in similar_indices]
    else:
        return "Customer not found."

# Example Usage
print("\nDynamic Recommendations for C0001:")
print(get_recommendations('C0001', n=5))


Compact Lookalike Recommendations:
  CustomerID                                         Lookalikes
0      C0001  [(C0191, 0.9864538255341586), (C0069, 0.981347...
1      C0002  [(C0031, 0.9979033139477226), (C0029, 0.985410...
2      C0003  [(C0176, 0.9549430669779934), (C0073, 0.893420...
3      C0004  [(C0068, 0.9909049685559322), (C0113, 0.989168...
4      C0005  [(C0123, 0.9997155252676422), (C0078, 0.994219...

Expanded Lookalike Recommendations:
  CustomerID Lookalike1    Score1 Lookalike2    Score2 Lookalike3    Score3
0      C0001      C0191  0.986454      C0069  0.981347      C0137  0.973648
1      C0002      C0031  0.997903      C0029  0.985410      C0197  0.960246
2      C0003      C0176  0.954943      C0073  0.893420      C0144  0.891651
3      C0004      C0068  0.990905      C0113  0.989169      C0017  0.988949
4      C0005      C0123  0.999716      C0078  0.994220      C0036  0.993105

Dynamic Recommendations for C0001:
[('C0191', 0.9864538255341586), ('C0069', 0.9813471