In [None]:
import pandas as pd

# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

# Display basic info
print(customers.info())
print(products.info())
print(transactions.info())

# Check for missing values
print(customers.isnull().sum())
print(products.isnull().sum())
print(transactions.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   CustomerID    200 non-null    object
 1   CustomerName  200 non-null    object
 2   Region        200 non-null    object
 3   SignupDate    200 non-null    object
dtypes: object(4)
memory usage: 6.4+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   ProductID    100 non-null    object 
 1   ProductName  100 non-null    object 
 2   Category     100 non-null    object 
 3   Price        100 non-null    float64
dtypes: float64(1), object(3)
memory usage: 3.3+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------

### *Task 2: Lookalike Model*

#### *Step 1: Preprocess Data*
Combine customer and transaction data for feature engineering.

In [None]:
# Merge customer and transaction data
customer_transactions = transactions.merge(customers, on='CustomerID', how='left')

# Merge product data to get the 'Category' column
customer_transactions = customer_transactions.merge(products[['ProductID', 'Category']], on='ProductID', how='left')

# Aggregate customer-level features
customer_features = customer_transactions.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    avg_transaction_value=('TotalValue', 'mean'),
    total_transactions=('TransactionID', 'nunique'),
    favorite_category=('Category', lambda x: x.mode()[0])
).reset_index()

# Merge with customer profile data
customer_features = customer_features.merge(customers, on='CustomerID', how='left')

#### *Step 2: Build Lookalike Model*
Use a similarity metric (e.g., cosine similarity) to find similar customers.

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Scale features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features[['total_spent', 'avg_transaction_value', 'total_transactions']])

# Compute similarity matrix
similarity_matrix = cosine_similarity(scaled_features)

# Function to get top 3 similar customers
def get_top_similar_customers(customer_id, similarity_matrix, top_n=3):
    customer_index = customer_features[customer_features['CustomerID'] == customer_id].index[0]
    similarities = similarity_matrix[customer_index]
    top_indices = similarities.argsort()[-top_n-1:-1][::-1]
    top_customers = customer_features.iloc[top_indices]['CustomerID'].tolist()
    top_scores = similarities[top_indices].tolist()
    return list(zip(top_customers, top_scores))

# Generate lookalike recommendations for the first 20 customers
lookalike_results = {}
for customer_id in customer_features['CustomerID'].head(20):
    lookalike_results[customer_id] = get_top_similar_customers(customer_id, similarity_matrix)

In [None]:
# Save results to CSV
import csv
with open('FirstName_LastName_Lookalike.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'LookalikeCustomerID', 'SimilarityScore'])
    for customer_id, lookalikes in lookalike_results.items():
        for lookalike in lookalikes:
            writer.writerow([customer_id, lookalike[0], lookalike[1]])