# Task 2 : Lookalike Model

In [15]:
# Import Required Libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load Data
# Assuming you have customer data and transactions data as CSV files
customers = pd.read_csv(r"C:/Users/ADMIN/Downloads/Customers.csv")
transactions = pd.read_csv(r"C:/Users/ADMIN/Downloads/Transactions.csv")

# Data Preparation: Aggregate Transactions for Each Customer
customer_features = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total value of transactions
    'Quantity': 'sum'     # Total quantity of items purchased
}).reset_index()

# Merge Customer Features with Customer Demographics
customer_profiles = pd.merge(customers, customer_features, on='CustomerID', how='inner')

# Standardize Data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(customer_profiles[['TotalValue', 'Quantity']])

# Compute Cosine Similarity Matrix
similarity_matrix = cosine_similarity(scaled_data)

# Convert Similarity Matrix to DataFrame
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=customer_profiles['CustomerID'],
    columns=customer_profiles['CustomerID']
)

# Identify Top 3 Lookalike Customers for Each Customer
lookalike_results = {}
for customer_id in customer_profiles['CustomerID']:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalike_results[customer_id] = similar_customers.index.tolist()

# Create DataFrame of Lookalike Results
lookalike_df = pd.DataFrame.from_dict(
    lookalike_results,
    orient='index',
    columns=['Lookalike1', 'Lookalike2', 'Lookalike3']
)

# Save Results to CSV
lookalike_df.to_csv("Lookalike.csv", index_label='CustomerID')
print("Lookalike results successfully saved to 'Lookalike.csv'.")


Lookalike results successfully saved to 'Lookalike.csv'.
