In [147]:
import pandas as pd

In [148]:
df_transaction = pd.read_csv('../csv_files/transactions.csv')

In [149]:
df_product = pd.read_csv('../csv_files/products.csv')

In [150]:
df_customer = pd.read_csv('../csv_files/customers.csv')

In [151]:
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Location Based recommendation

In [152]:
location_vectorizer = TfidfVectorizer()
location_vectorizer.fit(df_customer['location'])
location_matrix = location_vectorizer.transform(df_customer['location'])

In [153]:
KNN_location = NearestNeighbors(metric='cosine', algorithm='brute')
KNN_location.fit(location_matrix)

In [154]:
def recommend_by_location(customer_id,top_n=5):
    if customer_id not in df_customer['customer_id'].values:
        return f"❌ Customer ID {customer_id} not found."
    
    location = df_customer[df_customer['customer_id']==customer_id]['location'].index[0]

    distances,indices = KNN_location.kneighbors(location_matrix[location],n_neighbors=5)
    local_customer = df_customer.iloc[indices[0][1:]]['customer_id'].tolist()
    
    local_transaction = df_transaction[df_transaction['customer_id'].isin(local_customer)]
    top_products = local_transaction['product_id'].value_counts().head(top_n).index.tolist()

    return df_product[df_product['product_id'].isin(top_products)]['name'] 

In [155]:
print(recommend_by_location(customer_id=4, top_n=5))

6      Electric Toothbrush 142
78            Portable SSD 172
129             Smartwatch 535
152      Bluetooth Speaker 969
198             VR Headset 615
Name: name, dtype: object


# similar product-based recommendation

In [156]:
df_product['combined_features'] = df_product['name']+' '+df_product['category']
product_vectorizer = TfidfVectorizer()
product_vectorizer.fit(df_product['combined_features'])
features = product_vectorizer.transform(df_product['combined_features'])

In [157]:
def recommended_similar_product(product_name, top_n=10):
    
    matches = df_product[df_product['name'].str.lower().str.contains(product_name.lower(), regex=False)]
    
    if matches.empty:
        return f"❌ Product '{product_name}' not found in any product name."

    product_idx = matches.index[0]

    similarity_matrix = cosine_similarity(features)

    similar_score = list(enumerate(similarity_matrix[product_idx]))
    sorted_score = sorted(similar_score, key=lambda x: x[1], reverse=True)

    top_similar_products = sorted_score[1:top_n+1]
    recommended_product_idx = [x[0] for x in top_similar_products]

    return df_product.iloc[recommended_product_idx]['name'].reset_index(drop=True)


In [158]:
recommended_similar_product("smartwatch", top_n=5)

0    Smartwatch 796
1    Smartwatch 812
2    Smartwatch 290
3    Smartwatch 240
4    Smartwatch 947
Name: name, dtype: object

# Past-purchased based recommendation

In [159]:
user_item_matrix = df_transaction.pivot_table(
    index='customer_id', columns='product_id', aggfunc='size', fill_value=0
)

In [160]:
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(user_item_matrix.T)

In [161]:
def recommend_based_on_purchases(product_id, top_n=5):
    if product_id not in user_item_matrix.columns:
        return f"❌ Product ID {product_id} not found in purchase data."

    product_vector = user_item_matrix.T.loc[[product_id]]
    distances, indices = knn.kneighbors(product_vector, n_neighbors=top_n+1)

    recommended_ids = user_item_matrix.columns[indices[0][1:]]  # exclude self

    return df_product[df_product['product_id'].isin(recommended_ids)]['name'].drop_duplicates()

In [162]:
recommend_based_on_purchases(2, top_n=3)

27     Mechanical Keyboard 248
91                  Laptop 971
176    Electric Toothbrush 246
Name: name, dtype: object

# pickle file

In [164]:
import pickle

with open('recommendation_model.pkl', 'wb') as f:
    pickle.dump({
        'knn_location': KNN_location,
        'knn_purchase': knn,
        'similarity_matrix': similarity_matrix,
        'location_vectorizer': location_vectorizer,
        'product_vectorizer': product_vectorizer,
        'user_item_matrix': user_item_matrix,
        'df_product': df_product,
        'df_customer':df_customer,
        'df_transaction':df_transaction
    }, f)