In [16]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity

def build_lookalike_model(merged_data, customers):
    """Build a lookalike model to recommend similar customers based on transaction history."""

    # Feature engineering
    customer_profiles = merged_data.groupby('CustomerID').agg(
        TotalSpend=('TotalValue', 'sum'),
        AvgTransactionValue=('TotalValue', 'mean'),
        FavoriteCategory=('Category', lambda x: x.mode()[0]),
        TransactionCount=('TransactionID', 'count')
    ).reset_index()

    # Merge with customer demographics
    customer_profiles = pd.merge(customer_profiles, customers, on="CustomerID", how="left")

    # One-hot encode categorical variables (Region)
    encoder = OneHotEncoder()
    encoded_region = encoder.fit_transform(customer_profiles[['Region']])
    encoded_region_df = pd.DataFrame(encoded_region.toarray(), columns=encoder.get_feature_names_out(['Region']))

    # Combine features
    features = pd.concat([customer_profiles[['TotalSpend', 'AvgTransactionValue', 'TransactionCount']], encoded_region_df], axis=1)

    # Normalize features
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)

    return customer_profiles, scaled_features


In [17]:
def recommend_lookalikes(customer_profiles, scaled_features):
    """Find similar customers using cosine similarity."""
    similarity_matrix = cosine_similarity(scaled_features)

    # Generate lookalike recommendations for the first 20 customers
    lookalike_results = {}
    for i in range(20):  # First 20 customers
        customer_id = customer_profiles.iloc[i]['CustomerID']
        similarity_scores = similarity_matrix[i]
        top_3_indices = similarity_scores.argsort()[-4:-1][::-1]  # Exclude self
        top_3_customers = customer_profiles.iloc[top_3_indices]['CustomerID'].values
        top_3_scores = similarity_scores[top_3_indices]
        lookalike_results[customer_id] = list(zip(top_3_customers, top_3_scores))

    # Save results to CSV
    lookalike_df = pd.DataFrame(lookalike_results.items(), columns=['CustomerID', 'Lookalikes'])
    lookalike_df.to_csv("Lookalike.csv", index=False)
    print("Lookalike model results saved to 'Lookalike.csv'.")
