# Task 2

##Import necessary libraries

In [12]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

## Load the datasets

In [13]:
customers = pd.read_csv("/content/Customers.csv")
transactions = pd.read_csv("/content/Transactions.csv")

## Merge customer and transaction data


In [14]:
customer_transactions = pd.merge(customers, transactions, on='CustomerID')

## Check for missing values


In [15]:
print("\nMissing Values in Customers:")
print(customers.isnull().sum())
print("\nMissing Values in Transactions:")
print(transactions.isnull().sum())


Missing Values in Customers:
CustomerID      0
CustomerName    0
Region          0
SignupDate      0
dtype: int64

Missing Values in Transactions:
TransactionID      0
CustomerID         0
ProductID          0
TransactionDate    0
Quantity           0
TotalValue         0
Price              0
dtype: int64


## Convert date columns to datetime


In [16]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

## Merge customer and transaction data


In [17]:
customer_transactions = pd.merge(customers, transactions, on='CustomerID')

## Create customer features


In [18]:
customer_features = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'Quantity': 'sum',    # Total quantity purchased
    'TransactionDate': 'nunique'  # Number of transactions
}).reset_index()

## Normalize features


In [19]:
scaler = StandardScaler()
customer_features_scaled = scaler.fit_transform(customer_features[['TotalValue', 'Quantity', 'TransactionDate']])

## Compute similarity matrix


In [20]:
similarity_matrix = cosine_similarity(customer_features_scaled)

## Function to get top 3 lookalikes for a given customer

In [21]:
def get_lookalikes(customer_id, similarity_matrix, top_n=3):
    customer_index = customer_features[customer_features['CustomerID'] == customer_id].index[0]
    similarities = similarity_matrix[customer_index]
    top_indices = np.argsort(similarities)[-top_n-1:-1][::-1]  # Exclude the customer itself
    lookalikes = customer_features.iloc[top_indices]['CustomerID'].tolist()
    scores = similarities[top_indices]
    return list(zip(lookalikes, scores))

## Generate lookalikes for the first 20 customers


In [22]:
lookalike_results = {}
for customer_id in customer_features['CustomerID'].head(20):
    lookalike_results[customer_id] = get_lookalikes(customer_id, similarity_matrix)

## Save results to a CSV file


In [23]:
lookalike_df = pd.DataFrame(lookalike_results).T.reset_index()
lookalike_df.columns = ['CustomerID', 'Lookalike1', 'Lookalike2', 'Lookalike3']
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike model results saved to Lookalike.csv")

Lookalike model results saved to Lookalike.csv
