In [None]:
import pandas as pd
import io
import requests

customers_file_id = '1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE'
products_file_id = '1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0'
transactions_file_id = '1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF'

def read_csv_from_google_drive(file_id):
    """Reads a CSV file from Google Drive given its file ID."""
    url = f'https://docs.google.com/uc?export=download&id={file_id}'
    response = requests.get(url)
    response.raise_for_status()
    return pd.read_csv(io.StringIO(response.text))

customers = read_csv_from_google_drive(customers_file_id)
products = read_csv_from_google_drive(products_file_id)
transactions = read_csv_from_google_drive(transactions_file_id)

In [None]:
encoder = OneHotEncoder(sparse_output=False)
region_encoded = encoder.fit_transform(customers[['Region']])
region_df = pd.DataFrame(region_encoded, columns=encoder.categories_[0], index=customers['CustomerID'])

In [None]:
transactions_with_customers = pd.merge(transactions, customers, on="CustomerID", how="left")
transactions_with_customers = pd.merge(transactions_with_customers, products, on="ProductID", how="left", suffixes=('_trans', '_prod'))

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder
from collections import defaultdict




customer_transaction_history = transactions_with_customers.groupby('CustomerID').agg({
    'ProductID': pd.Series.nunique,
    'Quantity': np.sum,
    'TotalValue': np.sum,
    'Price': np.mean
}).reset_index()



customer_profiles = pd.merge(customer_transaction_history, region_df, left_on='CustomerID', right_index=True)

features = customer_profiles.drop(columns='CustomerID')
features_matrix = features.values

cosine_sim = cosine_similarity(features_matrix)

lookalike_map = defaultdict(list)

for i, cust_id in enumerate(customers['CustomerID'][:20]):
    similarity_scores = cosine_sim[i]
    similar_customer_indices = similarity_scores.argsort()[-4:-1]
    for idx in similar_customer_indices:
        similar_cust_id = customers['CustomerID'].iloc[idx]
        score = similarity_scores[idx]
        lookalike_map[cust_id].append([similar_cust_id, score])

lookalike_data = []
for cust_id, similar_customers in lookalike_map.items():
    for similar_cust_id, score in similar_customers:
        lookalike_data.append([cust_id, similar_cust_id, score])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'LookalikeCustomerID', 'SimilarityScore'])
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike Model Generated and Saved as Lookalike.csv")


Lookalike Model Generated and Saved as Lookalike.csv


  customer_transaction_history = transactions_with_customers.groupby('CustomerID').agg({
  customer_transaction_history = transactions_with_customers.groupby('CustomerID').agg({
