In [9]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
customers=pd.read_csv('/content/Customers.csv')
products=pd.read_csv('/content/Products.csv')
transactions=pd.read_csv('/content/Transactions.csv')

In [12]:
customer_product_matrix = pd.pivot_table(
    transactions,
    values='Quantity',
    index='CustomerID',
    columns='ProductID',
    fill_value=0
)

In [13]:
similarity_matrix = cosine_similarity(customer_product_matrix)

In [14]:
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=customer_product_matrix.index,
    columns=customer_product_matrix.index
)


In [15]:
def get_top_lookalikes(customer_id, top_n=3):
    """Returns the top N lookalikes for a given customer."""

    # Get similarity scores for the given customer
    similarity_scores = similarity_df.loc[customer_id].sort_values(ascending=False)

    # Exclude the customer itself
    similarity_scores = similarity_scores.drop(customer_id)

    # Get the top N lookalikes
    top_lookalikes = similarity_scores.head(top_n)

    # Return as a list of (customer_id, score) tuples
    return list(top_lookalikes.items())

In [16]:
recommendations = {}
for customer_id in customers['CustomerID'][:20]:
    recommendations[customer_id] = get_top_lookalikes(customer_id)

In [17]:
import csv

with open('Lookalike.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['CustomerID', 'Lookalikes'])  # Header row
    for customer_id, lookalikes in recommendations.items():
        writer.writerow([customer_id, lookalikes])

In [18]:
for customer_id, lookalikes in recommendations.items():
    print(f"Customer: {customer_id}")
    for lookalike_id, score in lookalikes:
        print(f"  Lookalike: {lookalike_id}, Similarity: {score:.4f}")
    print("-" * 20)

Customer: C0001
  Lookalike: C0097, Similarity: 0.5477
  Lookalike: C0194, Similarity: 0.4697
  Lookalike: C0199, Similarity: 0.4382
--------------------
Customer: C0002
  Lookalike: C0091, Similarity: 0.3802
  Lookalike: C0030, Similarity: 0.3728
  Lookalike: C0071, Similarity: 0.3299
--------------------
Customer: C0003
  Lookalike: C0134, Similarity: 0.5199
  Lookalike: C0181, Similarity: 0.5176
  Lookalike: C0144, Similarity: 0.4000
--------------------
Customer: C0004
  Lookalike: C0070, Similarity: 0.4989
  Lookalike: C0132, Similarity: 0.3843
  Lookalike: C0063, Similarity: 0.3361
--------------------
Customer: C0005
  Lookalike: C0096, Similarity: 0.6482
  Lookalike: C0055, Similarity: 0.5145
  Lookalike: C0064, Similarity: 0.3329
--------------------
Customer: C0006
  Lookalike: C0058, Similarity: 0.6489
  Lookalike: C0040, Similarity: 0.5804
  Lookalike: C0196, Similarity: 0.4643
--------------------
Customer: C0007
  Lookalike: C0020, Similarity: 0.5883
  Lookalike: C0079, S