# Import necessary libraries

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

  from pandas.core import (


# Load datasets

In [2]:
transactions = pd.read_csv('transactions.csv')

In [3]:
products_data = {
    'ProductID': ['P001', 'P002', 'P003', 'P004', 'P005'],
    'ProductName': ['ActiveWear Biography', 'ActiveWear Smartwatch', 'ComfortLiving Biography',
                    'BookWorld Rug', 'TechPro T-Shirt'],
    'Category': ['Books', 'Electronics', 'Books', 'Home Decor', 'Clothing'],
    'Price': [169.3, 346.3, 44.12, 95.69, 429.31]
}

In [4]:
products = pd.DataFrame(products_data)

In [5]:
customers_data = {
    'CustomerID': ['C0001', 'C0002', 'C0003', 'C0004', 'C0005'],
    'CustomerName': ['Lawrence Carroll', 'Elizabeth Lutz', 'Michael Rivera', 'Kathleen Rodriguez', 'Laura Weber'],
    'Region': ['South America', 'Asia', 'South America', 'South America', 'Asia'],
    'SignupDate': ['2022-07-10', '2022-02-13', '2024-03-07', '2022-10-09', '2022-08-15']
}

In [6]:
customers = pd.DataFrame(customers_data)

# Merge datasets

In [7]:
merged_data = transactions.merge(products, on='ProductID').merge(customers, on='CustomerID')

# Feature engineering: Create customer-level aggregated data

In [8]:
customer_features = merged_data.groupby('CustomerID').agg(
    total_transactions=('TransactionID', 'count'),
    total_quantity=('Quantity', 'sum'),
    total_revenue=('TotalValue', 'sum'),
    avg_transaction_value=('TotalValue', 'mean')
).reset_index()

# One-hot encode regions and categories

In [9]:
region_dummies = pd.get_dummies(customers.set_index('CustomerID')['Region'])
customer_features = customer_features.set_index('CustomerID').join(region_dummies)

# Normalize the feature data

In [10]:
scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_features.drop(columns=['total_transactions', 'total_quantity', 'total_revenue', 'avg_transaction_value']))

# Compute similarity scores

In [11]:
similarity_matrix = cosine_similarity(normalized_features)

# Build the lookalike model

In [14]:
lookalike_map = {}
customer_ids = customer_features.index.tolist()
for idx, customer_id in enumerate(customer_ids):
    # Get similarity scores for the current customer
    similarities = list(enumerate(similarity_matrix[idx]))
    # Exclude the customer itself and sort by similarity score
    similarities = sorted([(customer_ids[i], score) for i, score in similarities if customer_ids[i] != customer_id], 
                          key=lambda x: x[1], reverse=True)
    # Get the top 3 similar customers
    lookalike_map[customer_id] = similarities[:3]

In [15]:
# Create Lookalike.csv
lookalike_df = pd.DataFrame([
    {
        'CustomerID': customer_id,
        'Lookalikes': [{"CustomerID": sim[0], "Score": sim[1]} for sim in lookalikes]
    }
    for customer_id, lookalikes in lookalike_map.items()
])
lookalike_df.to_csv('Lookalike.csv', index=False)

# Display a preview of the lookalike map


In [16]:
print(lookalike_df.head())

  CustomerID                                         Lookalikes
0      C0002  [{'CustomerID': 'C0003', 'Score': -0.999999999...
1      C0003  [{'CustomerID': 'C0002', 'Score': -0.999999999...


In [17]:
# Evaluate the model
## Verify similarity scores range
print("\nVerifying similarity scores range (0 to 1):")
similarity_values = [sim[1] for sims in lookalike_map.values() for sim in sims]
print(f"Minimum similarity: {min(similarity_values):.4f}")
print(f"Maximum similarity: {max(similarity_values):.4f}")


Verifying similarity scores range (0 to 1):
Minimum similarity: -1.0000
Maximum similarity: -1.0000


In [19]:
## Diversity check
print("\nDiversity check in recommendations:")
recommended_customers = [sim[0] for sims in lookalike_map.values() for sim in sims]
unique_recommendations = len(set(recommended_customers))
print(f"Total unique customers recommended: {unique_recommendations}")
print(f"Total recommendations made: {len(recommended_customers)}")


Diversity check in recommendations:
Total unique customers recommended: 2
Total recommendations made: 2
