In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds
import lightgbm as lgb

# 1️⃣ Load the Datasets
accounts = pd.read_csv('accounts.csv')
products = pd.read_csv('products.csv')
sales_pipeline = pd.read_csv('sales_pipeline.csv')

# 2️⃣ Merge Sales Data with Accounts & Products
df = sales_pipeline.merge(accounts, on="account", how="left")\
                   .merge(products, on="product", how="left")

# 3️⃣ Encode Categorical Variables
label_encoders = {}
for col in ['sector', 'office_location', 'product', 'series']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# 4️⃣ Create a Product-User Matrix for Collaborative Filtering
pivot_table = df.pivot_table(index="account", columns="product", values="close_value", aggfunc="sum").fillna(0)

# 5️⃣ Train a KNN Model for Collaborative Filtering
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=5)
model_knn.fit(pivot_table)

# 6️⃣ Function to Recommend Products Using Collaborative Filtering
def recommend_collaborative(account_name, n_recommendations=5):
    if account_name not in pivot_table.index:
        return "Account not found in historical sales data."
    
    account_idx = pivot_table.index.get_loc(account_name)
    distances, indices = model_knn.kneighbors(pivot_table.iloc[account_idx, :].values.reshape(1, -1), n_neighbors=n_recommendations+1)
    
    recommended_products = []
    for i in range(1, n_recommendations+1):
        recommended_product_idx = pivot_table.columns[indices[0][i]]
        recommended_products.append(products[products['product'] == recommended_product_idx]['product'].values[0])
    
    return recommended_products

# 7️⃣ Content-Based Filtering Using TF-IDF for Product Descriptions
product_descriptions = products['product'] + " " + products['series']
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(product_descriptions)

# 8️⃣ Compute Similarity Between Products
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# 9️⃣ Function to Recommend Products Using Content-Based Filtering
def recommend_content_based(product_name, n_recommendations=5):
    if product_name not in products['product'].values:
        return "Product not found."
    
    product_idx = products.index[products['product'] == product_name].tolist()[0]
    similarity_scores = list(enumerate(cosine_sim[product_idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:n_recommendations+1]
    
    recommended_products = [products.iloc[i[0]]['product'] for i in similarity_scores]
    return recommended_products

# 🔟 Matrix Factorization (SVD) for Recommendation
def recommend_svd(account_name, n_recommendations=5):
    ratings_matrix = pivot_table.values
    u, s, vt = svds(ratings_matrix, k=10)  # Using 10 latent factors
    s_diag = np.diag(s)
    predicted_ratings = np.dot(np.dot(u, s_diag), vt)
    
    account_idx = pivot_table.index.get_loc(account_name)
    product_scores = predicted_ratings[account_idx]
    
    recommended_product_indices = np.argsort(product_scores)[::-1][:n_recommendations]
    recommended_products = [products.iloc[i]['product'] for i in recommended_product_indices]
    
    return recommended_products

# 1️⃣1️⃣ LightGBM Model for Personalized Recommendations
def train_lightgbm():
    features = df[['sector', 'office_location', 'product', 'series', 'close_value']]
    target = df['close_value']
    
    train_data = lgb.Dataset(features, label=target)
    params = {'objective': 'regression', 'metric': 'rmse'}
    
    model = lgb.train(params, train_data, num_boost_round=100)
    return model

lgb_model = train_lightgbm()

def recommend_lightgbm(account_name, n_recommendations=5):
    account_data = df[df['account'] == account_name]
    features = account_data[['sector', 'office_location', 'product', 'series', 'close_value']]
    
    predictions = lgb_model.predict(features)
    recommended_product_indices = np.argsort(predictions)[::-1][:n_recommendations]
    recommended_products = [products.iloc[i]['product'] for i in recommended_product_indices]
    
    return recommended_products

# 1️⃣2️⃣ Hybrid Recommendation System (Combining All Models)
def hybrid_recommend(account_name, current_product, n_recommendations=5):
    collab_recs = recommend_collaborative(account_name, n_recommendations)
    content_recs = recommend_content_based(current_product, n_recommendations)
    svd_recs = recommend_svd(account_name, n_recommendations)
    lgbm_recs = recommend_lightgbm(account_name, n_recommendations)
    
    if isinstance(collab_recs, str): collab_recs = []
    if isinstance(content_recs, str): content_recs = []
    if isinstance(svd_recs, str): svd_recs = []
    if isinstance(lgbm_recs, str): lgbm_recs = []
    
    hybrid_recommendations = list(set(collab_recs + content_recs + svd_recs + lgbm_recs))[:n_recommendations]
    return hybrid_recommendations

# 🏆 Example Usage
print("Collaborative Filtering Recommendations:", recommend_collaborative("ABC Corp"))
print("Content-Based Filtering Recommendations:", recommend_content_based("Product X"))
print("SVD Recommendations:", recommend_svd("ABC Corp"))
print("LightGBM Recommendations:", recommend_lightgbm("ABC Corp"))
print("Hybrid Recommendations:", hybrid_recommend("ABC Corp", "Product X"))
