In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
class LookalikeModel:
    def __init__(self):
        self.custDf = pd.read_csv('Customers.csv')
        self.transDf = pd.read_csv('Transactions.csv')
        self.prodDf = pd.read_csv('Products.csv')
    def createCustFeatures(self):
        transFeat = self.transDf.groupby('CustomerID').agg({
            'TransactionID': 'count',
            'TotalValue': ['sum', 'mean'],
            'Quantity': ['sum', 'mean'],
            'ProductID': 'nunique'
        }).reset_index()    
        transFeat.columns = ['CustomerID', 'transCount', 'totSpend', 
                             'avgTransVal', 'totQty', 'avgQty', 'uniqProds']
        
        prodCats = pd.merge(self.transDf, self.prodDf[['ProductID', 'Category']], 
                            on='ProductID')
        catPivot = pd.crosstab(prodCats['CustomerID'], prodCats['Category']) 
        self.custFeat = pd.merge(transFeat, catPivot, left_on='CustomerID', 
                                  right_index=True, how='left') 
        return self.custFeat
    
    def findLookalikes(self, targetCusts, nRecs=3):
        feats = self.createCustFeatures() 
        scaler = StandardScaler()
        featsScaled = scaler.fit_transform(feats.drop('CustomerID', axis=1)) 
        simMat = cosine_similarity(featsScaled)
        recs = {}
        for custId in targetCusts:
            custIdx = feats[feats['CustomerID'] == custId].index[0]
            simScores = simMat[custIdx]   
            simIdxs = np.argsort(simScores)[::-1][1:nRecs+1]       
            recs[custId] = [
                {
                    'custId': feats.iloc[idx]['CustomerID'],
                    'simScore': simScores[idx]
                }
                for idx in simIdxs
            ]
        
        return recs
     def saveRecs(self, recs, outFile='Lookalike.csv'):
      outData = []
        for custId, rec in recs.items():
            recStr = ','.join([
                f"{r['custId']}:{r['simScore']:.3f}"
                for r in rec
            ])
            outData.append({
                'customerId': custId,
                'lookalikes': recStr
            })
        pd.DataFrame(outData).to_csv(outFile, index=False)
def main():
    model = LookalikeModel()
    targetCusts = [f'C{str(i).zfill(4)}' for i in range(1, 21)]
    recs = model.findLookalikes(targetCusts)
    model.saveRecs(recs)
    print("\nSample Lookalike Recommendations:")
    for custId, rec in list(recs.items())[:5]:
        print(f"\nCustomer {custId}:")
        for r in rec:
            print(f"  Similar customer: {r['custId']}, "
                  f"Similarity score: {r['simScore']:.3f}")

if __name__ == "__main__":
    main()



Sample Lookalike Recommendations:

Customer C0001:
  Similar customer: C0069, Similarity score: 0.925
  Similar customer: C0127, Similarity score: 0.777
  Similar customer: C0035, Similarity score: 0.763

Customer C0002:
  Similar customer: C0031, Similarity score: 0.855
  Similar customer: C0062, Similarity score: 0.853
  Similar customer: C0133, Similarity score: 0.850

Customer C0003:
  Similar customer: C0144, Similarity score: 0.827
  Similar customer: C0166, Similarity score: 0.784
  Similar customer: C0129, Similarity score: 0.724

Customer C0004:
  Similar customer: C0113, Similarity score: 0.934
  Similar customer: C0017, Similarity score: 0.930
  Similar customer: C0065, Similarity score: 0.914

Customer C0005:
  Similar customer: C0095, Similarity score: 0.916
  Similar customer: C0007, Similarity score: 0.915
  Similar customer: C0035, Similarity score: 0.897
