In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv("/content/Customers.csv")
products = pd.read_csv("/content/Products.csv")
transactions = pd.read_csv("/content/Transactions.csv")

# Merge datasets to create a comprehensive dataset
data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

# Feature engineering: Create customer-product interaction matrix
customer_product_matrix = data.pivot_table(
    index="CustomerID",
    columns="ProductID",
    values="Quantity",
    fill_value=0
)

# Normalize the data to handle differences in scale
scaler = StandardScaler()
customer_product_matrix_normalized = scaler.fit_transform(customer_product_matrix)

# Compute the cosine similarity between customers
similarity_matrix = cosine_similarity(customer_product_matrix_normalized)

# Convert similarity matrix to a DataFrame
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=customer_product_matrix.index,
    columns=customer_product_matrix.index
)

# Function to get top 3 similar customers for a given customer
def get_top_similar_customers(customer_id, similarity_df, n=3):
    similar_customers = similarity_df.loc[customer_id].sort_values(ascending=False).iloc[1:n+1]
    return list(zip(similar_customers.index, similar_customers.values))

# Generate lookalike recommendations for the first 20 customers (C0001 - C0020)
lookalike_results = {}
for customer_id in customers["CustomerID"][:20]:
    if customer_id in similarity_df.index:
        lookalike_results[customer_id] = get_top_similar_customers(customer_id, similarity_df)

# Convert lookalike results to a DataFrame for CSV export
lookalike_list = [
    {"CustomerID": key, "SimilarCustomers": value}
    for key, values in lookalike_results.items()
    for value in values
]
lookalike_df = pd.DataFrame(lookalike_list, columns=["CustomerID", "SimilarCustomerID", "SimilarityScore"])

# Save to CSV
lookalike_df.to_csv("FirstName_LastName_Lookalike.csv", index=False)
print("Lookalike recommendations saved to CSV.")


Lookalike recommendations saved to CSV.


In [13]:
import pandas as pd
from sklearn.metrics import pairwise_distances
from sklearn.metrics import mean_squared_error

# Evaluation Function
def evaluate_lookalike_model(customer_product_matrix, similarity_df, lookalike_df):
    """
    Evaluate the Lookalike Model based on accuracy, logic, and quality of recommendations.

    Parameters:
    - customer_product_matrix (pd.DataFrame): Customer-Product interaction matrix.
    - similarity_df (pd.DataFrame): DataFrame containing the similarity scores.
    - lookalike_df (pd.DataFrame): DataFrame containing lookalike recommendations.

    Returns:
    - evaluation_metrics (dict): Dictionary containing evaluation scores.
    """
    # Drop rows with NaN values in 'CustomerID' or 'SimilarCustomerID'
    lookalike_df = lookalike_df.dropna(subset=['CustomerID', 'SimilarCustomerID'])

    # Metric 1: Similarity Score Consistency
    consistent_recommendations = 0
    total_recommendations = len(lookalike_df)
    if total_recommendations == 0:
        similarity_score_consistency = 0  # or any other appropriate value
    else:
        for _, row in lookalike_df.iterrows():
            customer_id = row['CustomerID']
            similar_customer_id = row['SimilarCustomerID']
            similarity_score = row['SimilarityScore']

        # Check if the stored similarity score matches the actual similarity
            actual_similarity = similarity_df.loc[str(customer_id),str(similar_customer_id)]
            if abs(similarity_score - actual_similarity) < 0.01:  # Threshold for minor differences
                consistent_recommendations += 1

        similarity_score_consistency = consistent_recommendations / total_recommendations

    # Metric 2: Accuracy of Recommendations (MSE between stored and actual similarities)
    if total_recommendations > 0:  # Check if there are any recommendations before calculating MSE
        mse_similarity = mean_squared_error(
            lookalike_df['SimilarityScore'],
            lookalike_df.apply(lambda row: similarity_df.loc[row['CustomerID'], row['SimilarCustomerID']], axis=1)
        )
    else:
        mse_similarity = 0 # or any other appropriate value if there are no recommendations


    # Metric 3: Coverage of Recommendations
    coverage = len(lookalike_df['SimilarCustomerID'].unique()) / len(customer_product_matrix.index)

    # Metric 4: Diversity in Recommendations
    diversity = 1 - pairwise_distances(customer_product_matrix.values, metric='cosine').mean()

    # Compile evaluation metrics
    evaluation_metrics = {
        "Similarity Score Consistency": similarity_score_consistency,
        "MSE of Similarity Scores": mse_similarity,
        "Coverage of Recommendations": coverage,
        "Diversity of Recommendations": diversity
    }

    return evaluation_metrics


# Load lookalike results (assuming lookalike_df is already created)
lookalike_df = pd.read_csv("/content/FirstName_LastName_Lookalike.csv")

# customer-product matrix (recomputed for evaluation context)
customer_product_matrix = data.pivot_table(
    index="CustomerID",
    columns="ProductID",
    values="Quantity",
    fill_value=0
)

# similarity DataFrame (recomputed for evaluation context)
similarity_matrix = cosine_similarity(customer_product_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)

# Evaluate the model
evaluation_results = evaluate_lookalike_model(customer_product_matrix,
                                              similarity_df,
                                              lookalike_df)
print("Evaluation Results:")
print(evaluation_results,end="\n")


Evaluation Results:
{'Similarity Score Consistency\n': 0, 'MSE of Similarity Scores\n': 0, 'Coverage of Recommendations\n': 0.0, 'Diversity of Recommendations': 0.04504754422876078}
