In [33]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [34]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [35]:
customers_path = "/content/drive/My Drive/data/Customers.csv"
products_path = "/content/drive/My Drive/data/Products.csv"
transactions_path = "/content/drive/My Drive/data/Transactions.csv"

In [36]:
customers = pd.read_csv(customers_path)
products = pd.read_csv(products_path)
transactions = pd.read_csv(transactions_path)

In [37]:
print(customers.head())
print(products.head())
print(transactions.head())


  CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15
  ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3       

In [38]:
# Merge transactions with customer and product data
transactions = transactions.merge(customers, on="CustomerID", how="left")
transactions = transactions.merge(products, on="ProductID", how="left")

# Create a customer-product matrix
customer_product_matrix = transactions.pivot_table(index="CustomerID", columns="ProductID", values="Quantity", aggfunc="sum", fill_value=0)

# Standardize the matrix
scaler = StandardScaler()
customer_product_matrix_scaled = scaler.fit_transform(customer_product_matrix)

# --- Lookalike Model ---
# Compute cosine similarity
similarity_matrix = cosine_similarity(customer_product_matrix_scaled)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)

In [39]:
# --- Generate Recommendations ---
lookalike_results = {}
for customer_id in similarity_df.index[:20]:  # First 20 customers
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Top 3 lookalikes
    lookalike_results[customer_id] = list(zip(similar_customers.index, similar_customers.values))

# Save results to Lookalike.csv
lookalike_df = pd.DataFrame([
    {"CustomerID": customer_id, "Lookalike_Customers": lookalikes}
    for customer_id, lookalikes in lookalike_results.items()
])
lookalike_df.to_csv("/content/drive/My Drive/outputs/Sameer_Ahmed_Lookalike.csv", index=False)

print("Lookalike Model completed and results saved.")


Lookalike Model completed and results saved.
