In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv('/content/drive/MyDrive/Data science Assignment Zeotap/Customers.csv',sep=',')
products = pd.read_csv('/content/drive/MyDrive/Data science Assignment Zeotap/Products.csv',sep=',')
transactions = pd.read_csv('/content/drive/MyDrive/Data science Assignment Zeotap/Transactions.csv',sep=',')

# Remove BOM characters from column names
customers.columns = customers.columns.str.replace('ï»¿', '')
products.columns = products.columns.str.replace('ï»¿', '')
transactions.columns = transactions.columns.str.replace('ï»¿', '')

# Convert date columns to datetime
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

# Merge datasets into one DataFrame
data = pd.merge(transactions, customers, on='CustomerID')
data = pd.merge(data, products, on='ProductID')


In [3]:
# Create a customer-product matrix
customer_product_matrix = data.pivot_table(
    index='CustomerID',
    columns='ProductName',
    values='TotalValue',
    aggfunc='sum'
).fillna(0)

# Scale the matrix to normalize spending patterns
scaler = StandardScaler()
customer_product_matrix_scaled = scaler.fit_transform(customer_product_matrix)

# Convert back to DataFrame
customer_product_matrix_scaled = pd.DataFrame(
    customer_product_matrix_scaled,
    index=customer_product_matrix.index,
    columns=customer_product_matrix.columns
)
# Compute cosine similarity between customers
similarity_matrix = cosine_similarity(customer_product_matrix_scaled)

# Create a DataFrame for similarity scores
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=customer_product_matrix.index,
    columns=customer_product_matrix.index
)

# Display sample similarity scores
print(similarity_df.head())
# Function to get top N similar customers
def get_top_n_similar(customers, similarity_df, n=3):
    recommendations = {}
    for customer_id in customers:
        # Sort similarity scores in descending order, excluding the customer itself
        similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:n+1]
        recommendations[customer_id] = list(zip(similar_customers.index, similar_customers.values))
    return recommendations

# Get top 3 similar customers for the first 20 customers
first_20_customers = customers['CustomerID'][:20]
recommendations = get_top_n_similar(first_20_customers, similarity_df, n=3)

# Convert recommendations to a DataFrame
lookalike_df = pd.DataFrame([
    {'CustomerID': key, 'Lookalikes': value}
    for key, value in recommendations.items()
])

CustomerID     C0001     C0002     C0003     C0004     C0005     C0006  \
CustomerID                                                               
C0001       1.000000 -0.054594  0.054619 -0.022810 -0.063402 -0.091476   
C0002      -0.054594  1.000000 -0.039473 -0.065389 -0.022230 -0.042617   
C0003       0.054619 -0.039473  1.000000  0.083942  0.114454 -0.067773   
C0004      -0.022810 -0.065389  0.083942  1.000000 -0.000474 -0.099499   
C0005      -0.063402 -0.022230  0.114454 -0.000474  1.000000 -0.047611   

CustomerID     C0007     C0008     C0009     C0010  ...     C0191     C0192  \
CustomerID                                          ...                       
C0001       0.092261 -0.054069  0.062400 -0.058647  ...  0.003818 -0.063134   
C0002      -0.037948  0.299206  0.002358 -0.018067  ... -0.041409 -0.018110   
C0003      -0.008329  0.392142  0.127793 -0.040201  ... -0.004324 -0.042806   
C0004       0.213756 -0.060083 -0.060029 -0.051889  ... -0.060177 -0.093141   
C0005  

In [5]:
# Prepare the data for saving
lookalike_df = pd.DataFrame([
    {'CustomerID': key, 'Lookalikes': value} for key, value in recommendations.items()
])

# Save the recommendations as a CSV file
lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)

print("Lookalike recommendations saved to 'FirstName_LastName_Lookalike.csv'")
from google.colab import files

# Download the Lookalike CSV file
files.download('FirstName_LastName_Lookalike.csv')


Lookalike recommendations saved to 'FirstName_LastName_Lookalike.csv'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>