In [3]:
# === InfoCrucible Recommendation & Locality-Aware System ===
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModel
from geopy.distance import geodesic
import numpy as np

# === Sample User & Article Data ===
users = pd.DataFrame({
    'user_id': [1, 2, 3],
    'location': [(28.6139, 77.2090), (19.0760, 72.8777), (13.0827, 80.2707)],  # (lat, lon)
    'history': ["elections economy reform", "youth protests democracy", "fake news media"]
})

articles = pd.DataFrame({
    'article_id': [101, 102, 103],
    'title': ["Reform Bill Passed", "Youth Demand Action", "Fake News Campaign"],
    'text': ["The new economic reform bill...", "Young activists protest...", "Several fake stories..."],
    'location': [(28.6, 77.2), (19.0, 72.8), (13.0, 80.2)]
})

# === Load BERT Embedding Model ===
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)
model.eval()

def encode(texts):
    with torch.no_grad():
        inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1)
        return F.normalize(embeddings, p=2, dim=1).numpy()

# === User Embedding ===
user_embeddings = encode(users['history'].tolist())

# === Article Embedding ===
article_embeddings = encode(articles['text'].tolist())

# === Content-Based Recommendation ===
def recommend_articles(user_index, top_k=2):
    user_emb = user_embeddings[user_index].reshape(1, -1)
    sims = cosine_similarity(user_emb, article_embeddings).flatten()
    top_indices = sims.argsort()[::-1][:top_k]
    return articles.iloc[top_indices]

# === User Similarity Recommendation ===
def recommend_users(user_index, top_k=2):
    sims = cosine_similarity([user_embeddings[user_index]], user_embeddings)[0]
    top_indices = np.argsort(sims)[::-1][1:top_k+1]
    return users.iloc[top_indices]

# === Locality-Based Filter ===
def filter_by_locality(user_index, radius_km=1000):
    user_loc = users.loc[user_index, 'location']
    return articles[articles['location'].apply(lambda loc: geodesic(user_loc, loc).km < radius_km)]

# === Deep Learning Extension: Autoencoder for Clustering Users ===
class UserAutoencoder(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32)
        )
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim)
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon, z

# === Example Usage ===
if __name__ == "__main__":
    user_index = 0
    print("Recommended Articles:")
    print(recommend_articles(user_index))

    print("\nRecommended Similar Users:")
    print(recommend_users(user_index))

    print("\nLocality-Based Articles:")
    print(filter_by_locality(user_index))

    # Train autoencoder (optional deep learning feature)
    emb_tensor = torch.tensor(user_embeddings, dtype=torch.float32)
    autoencoder = UserAutoencoder(input_dim=emb_tensor.shape[1])
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.01)
    
    for epoch in range(100):
        autoencoder.train()
        recon, z = autoencoder(emb_tensor)
        loss = F.mse_loss(recon, emb_tensor)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("\nAutoencoder training complete. Latent user features:")
    print(z.detach().numpy())


Recommended Articles:
   article_id                title                             text  \
0         101   Reform Bill Passed  The new economic reform bill...   
1         102  Youth Demand Action       Young activists protest...   

       location  
0  (28.6, 77.2)  
1  (19.0, 72.8)  

Recommended Similar Users:
   user_id            location                   history
1        2   (19.076, 72.8777)  youth protests democracy
2        3  (13.0827, 80.2707)           fake news media

Locality-Based Articles:
   article_id               title                             text  \
0         101  Reform Bill Passed  The new economic reform bill...   

       location  
0  (28.6, 77.2)  

Autoencoder training complete. Latent user features:
[[ 0.05152391  0.0887581   0.04389022  0.08141999 -0.04915437  0.02544513
   0.02593962  0.01657471  0.00070634 -0.08335958 -0.03115486 -0.10428935
  -0.1039738   0.20515452 -0.00461113  0.07440617 -0.0622243  -0.19510049
   0.11190814  0.0611937  -0.007

In [2]:
!pip install geopy


Defaulting to user installation because normal site-packages is not writeable
Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.4/125.4 KB[0m [31m655.3 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 KB[0m [31m204.7 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.4.1
