<a href="https://colab.research.google.com/github/rajatyadav1998/graph-recommender-pro/blob/main/Graph_recommeder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==============================================================================
# Step 1: Install All Libraries
# ==============================================================================
print("⏳ Step 1/6: Installing libraries...")
!pip install -q flask pyngrok networkx pandas scikit-learn python-louvain
print("✅ Libraries installed.")

# ==============================================================================
# Step 2: Configure ngrok
# ==============================================================================
print("\n⏳ Step 2/6: Configuring ngrok...")
# ❗️ PASTE YOUR NGROK AUTHTOKEN HERE
AUTHTOKEN = "30M4sFq07MnnJN7ao11As3FeOoo_64mq5pD6GSnPzk62CY2N"

from pyngrok import ngrok
import os
!killall ngrok
os.system(f"ngrok config add-authtoken {AUTHTOKEN}")
print("✅ ngrok configured.")

# ==============================================================================
# Step 3: Imports and Data Loading
# ==============================================================================
print("\n⏳ Step 3/6: Loading data...")
import networkx as nx
import pandas as pd
import random
from flask import Flask, request, render_template_string
from sklearn.linear_model import LogisticRegression
from networkx.algorithms import community as nx_community
from tqdm.notebook import tqdm

if not os.path.exists('facebook_combined.txt'):
    !wget -q "http://snap.stanford.edu/data/facebook_combined.txt.gz"
    !gzip -d facebook_combined.txt.gz
G = nx.read_edgelist('facebook_combined.txt', create_using=nx.Graph(), nodetype=int)
print(f"✅ Graph loaded with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")

# ==============================================================================
# Step 4: Pre-compute All Models
# ==============================================================================
print("\n⏳ Step 4/6: Pre-computing all models (this will take several minutes)...")
print("   - Calculating PageRank...")
pagerank_scores = nx.pagerank(G)
print("   - Detecting communities...")
communities_generator = nx_community.greedy_modularity_communities(G)
user_to_community = {}
for i, comm in enumerate(communities_generator):
    for user in comm:
        user_to_community[user] = i
print("   - Preparing dataset for ML model...")
positive_samples = list(G.edges())
negative_samples = []
all_nodes = list(G.nodes())
num_samples = min(len(positive_samples), 40000)
positive_samples = random.sample(positive_samples, num_samples)
with tqdm(total=num_samples, desc="   - Generating negative samples") as pbar:
    while len(negative_samples) < num_samples:
        u, v = random.sample(all_nodes, 2)
        if u != v and not G.has_edge(u, v) and (u,v) not in negative_samples and (v,u) not in negative_samples:
            negative_samples.append((u, v))
            pbar.update(1)
all_samples = positive_samples + negative_samples
labels = [1] * len(positive_samples) + [0] * len(negative_samples)
df = pd.DataFrame(all_samples, columns=['user1', 'user2'])
df['label'] = labels
print("   - Engineering features for ML model...")
df['jaccard'] = [p[2] for p in nx.jaccard_coefficient(G, df[['user1', 'user2']].values)]
df['adamic_adar'] = [p[2] for p in nx.adamic_adar_index(G, df[['user1', 'user2']].values)]
df['pref_attachment'] = [p[2] for p in nx.preferential_attachment(G, df[['user1', 'user2']].values)]
print("   - Training Logistic Regression model...")
X = df[['jaccard', 'adamic_adar', 'pref_attachment']]
y = df['label']
ml_model = LogisticRegression(random_state=42)
ml_model.fit(X, y)
print("✅ All models are ready!")

# ==============================================================================
# Step 5: Define Helper Functions & Flask App
# ==============================================================================
print("\n⏳ Step 5/6: Defining application functions...")
def recommend_friends_jaccard(graph, user, top_n=10):
    if user not in graph: return None, None
    potential_friends = [(user, node) for node in graph.nodes() if node != user and not graph.has_edge(user, node)]
    if not potential_friends: return [], {}
    scores = nx.jaccard_coefficient(graph, potential_friends)
    recs = sorted(scores, key=lambda x: x[2], reverse=True)[:top_n]
    top_recs = [(v, round(score, 4)) for u, v, score in recs]
    mutuals = {r[0]: len(list(nx.common_neighbors(graph, user, r[0]))) for r in top_recs}
    return top_recs, mutuals
def get_top_influencers_pagerank(scores_dict, top_n=10):
    sorted_users = sorted(scores_dict.items(), key=lambda item: item[1], reverse=True)
    return [(user, round(score, 6)) for user, score in sorted_users[:top_n]]
def recommend_from_community(graph, user, user_comm_map, top_n=10):
    if user not in graph: return None
    user_comm_id = user_comm_map.get(user)
    members = [u for u, c_id in user_comm_map.items() if c_id == user_comm_id and u != user and not graph.has_edge(user, u)]
    return random.sample(members, min(len(members), top_n))
def predict_link_probability(model, graph, user1, user2):
    pair = [(user1, user2)]
    features = pd.DataFrame(columns=['jaccard', 'adamic_adar', 'pref_attachment'])
    features['jaccard'] = [p[2] for p in nx.jaccard_coefficient(graph, pair)]
    features['adamic_adar'] = [p[2] for p in nx.adamic_adar_index(graph, pair)]
    features['pref_attachment'] = [p[2] for p in nx.preferential_attachment(graph, pair)]
    return model.predict_proba(features)[0, 1]

app = Flask(__name__)

HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Graph Recommender Pro</title>
    <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;500;700&display=swap" rel="stylesheet">
    <style>
        :root { --primary-blue: #1877f2; --hover-blue: #166fe5; --bg-gray: #f0f2f5; --card-bg: #ffffff; --text-primary: #050505; --text-secondary: #65676b; --border-color: #ced0d4; }
        body { font-family: 'Roboto', sans-serif; margin: 0; background-color: var(--bg-gray); color: var(--text-primary); }
        .header { background-color: var(--primary-blue); color: white; padding: 12px 20px; text-align: center; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
        .header h1 { margin: 0; font-size: 24px; }
        .container { max-width: 800px; margin: 20px auto; padding: 20px; }
        .tabs { display: flex; border-bottom: 1px solid var(--border-color); }
        .tab-link { padding: 15px 20px; text-decoration: none; cursor: pointer; background-color: transparent; border: none; font-size: 16px; font-weight: 500; color: var(--text-secondary); position: relative; }
        .tab-link.active { color: var(--primary-blue); }
        .tab-link.active::after { content: ''; position: absolute; bottom: -1px; left: 0; right: 0; height: 3px; background-color: var(--primary-blue); }
        .card { background-color: var(--card-bg); border-radius: 8px; box-shadow: 0 1px 2px rgba(0,0,0,0.2); padding: 20px; margin-top: 20px; }
        .form-section { display: flex; flex-wrap: wrap; align-items: center; gap: 15px; }
        input[type="text"] { flex-grow: 1; padding: 12px; border: 1px solid var(--border-color); border-radius: 6px; font-size: 16px; }
        input[type="submit"] { padding: 12px 25px; border: none; background-color: var(--primary-blue); color: white; border-radius: 6px; font-size: 16px; font-weight: bold; cursor: pointer; transition: background-color 0.2s; }
        .results-section h2 { font-size: 20px; margin-top: 0; color: var(--text-primary); }
        .results-table { width: 100%; border-collapse: collapse; margin-top: 15px; }
        .results-table th, .results-table td { border: 1px solid #ddd; padding: 12px; text-align: left; }
        .results-table th { background-color: #f5f6f7; font-weight: 500; }
        .error { color: #d93025; font-weight: bold; text-align: center; padding: 15px; background-color: #fbe9e7; border-radius: 6px; }
        .probability-result { text-align: center; }
        .probability-value { font-size: 48px; font-weight: 700; color: var(--primary-blue); margin: 10px 0; }
        .probability-text { font-size: 18px; color: var(--text-secondary); }
    </style>
</head>
<body>
    <div class="header"><h1>Graph Recommender Pro</h1></div>
    <div class="container">
        <div class="tabs">
            <a href="/?tab=friends" class="tab-link {{ 'active' if tab == 'friends' }}">🧑‍🤝‍🧑 Friend Finder</a>
            <a href="/?tab=predictor" class="tab-link {{ 'active' if tab == 'predictor' }}">🔮 Link Predictor</a>
            <a href="/?tab=influencers" class="tab-link {{ 'active' if tab == 'influencers' }}">⭐ Influencers</a>
            <a href="/?tab=community" class="tab-link {{ 'active' if tab == 'community' }}">🏘️ Communities</a>
        </div>
        <div class="card">
            {% if tab == 'friends' %}<form method="post" action="/?tab=friends"><div class="form-section"><input type="text" name="user_id" placeholder="Enter a User ID" value="{{ user_id or '' }}"><input type="submit" value="Find Friends"></div></form>
            {% elif tab == 'predictor' %}<form method="post" action="/?tab=predictor"><div class="form-section"><input type="text" name="user1" placeholder="User ID 1" value="{{ user1 or '' }}"><span>&</span><input type="text" name="user2" placeholder="User ID 2" value="{{ user2 or '' }}"><input type="submit" value="Predict Link"></div></form>
            {% elif tab == 'community' %}<form method="post" action="/?tab=community"><div class="form-section"><input type="text" name="user_id" placeholder="Enter a User ID" value="{{ user_id or '' }}"><input type="submit" value="Explore Community"></div></form>
            {% endif %}
            {% if error %}<p class="error">{{ error }}</p>{% endif %}
            {% if results %}<div class="results-section"><h2>{{ title }}</h2><table class="results-table">
                        <tr>{% for header in headers %}<th>{{ header }}</th>{% endfor %}</tr>
                        {% for row in results %}<tr>{% for item in row %}<td>{{ item }}</td>{% endfor %}</tr>{% endfor %}
            </table></div>{% endif %}
            {# --- THIS IS THE CORRECTED LINE --- #}
            {% if probability is defined and probability is not none %}
            <div class="results-section probability-result">
                <h2>{{ title }}</h2>
                <p class="probability-value">{{ "%.2f"|format(probability * 100) }}%</p>
                <p class="probability-text">This is the probability that a link exists between User {{user1}} and User {{user2}}.</p>
            </div>
            {% endif %}
        </div>
    </div>
</body>
</html>
"""

@app.route('/', methods=['GET', 'POST'])
def home():
    tab = request.args.get('tab', 'friends')
    context = {'tab': tab}
    if request.method == 'POST':
        if tab == 'friends':
            user_id = request.form.get('user_id')
            context['user_id'] = user_id
            try:
                uid = int(user_id)
                recs, mutuals = recommend_friends_jaccard(G, uid)
                if recs is None: context['error'] = f"User ID '{uid}' not found."
                else:
                    context['title'] = f"Friend Recommendations for User {uid}"
                    context['headers'] = ["User ID", "Jaccard Score", "Mutual Friends"]
                    context['results'] = [(r[0], r[1], mutuals.get(r[0])) for r in recs]
            except (ValueError, TypeError): context['error'] = "Please enter a valid numeric User ID."
        elif tab == 'predictor':
            user1 = request.form.get('user1'); user2 = request.form.get('user2')
            context.update({'user1': user1, 'user2': user2})
            try:
                u1, u2 = int(user1), int(user2)
                if u1 not in G or u2 not in G: context['error'] = "One or both User IDs not found."
                elif u1 == u2: context['error'] = "Please enter two different User IDs."
                elif G.has_edge(u1, u2): context['error'] = f"Users {u1} and {u2} are already friends!"
                else:
                    prob = predict_link_probability(ml_model, G, u1, u2)
                    context['title'] = "Link Prediction Result"
                    context['probability'] = prob
            except (ValueError, TypeError): context['error'] = "Please enter valid numeric User IDs."
        elif tab == 'community':
            user_id = request.form.get('user_id')
            context['user_id'] = user_id
            try:
                uid = int(user_id)
                recs = recommend_from_community(G, uid, user_to_community)
                if recs is None: context['error'] = f"User ID '{uid}' not found."
                else:
                    context['title'] = f"Sample of Users in the Same Community as User {uid}"
                    context['headers'] = ["User ID"]
                    context['results'] = [[r] for r in recs]
            except (ValueError, TypeError): context['error'] = "Please enter a valid numeric User ID."
    elif request.method == 'GET' and tab == 'influencers':
        context['title'] = "Top 10 Network Influencers"
        context['headers'] = ["User ID", "PageRank Score"]
        context['results'] = get_top_influencers_pagerank(pagerank_scores)
    return render_template_string(HTML_TEMPLATE, **context)

# ==============================================================================
# Step 6: Start the Application
# ==============================================================================
if __name__ == '__main__':
    print("\n⏳ Step 6/6: Starting the web application...")
    public_url = ngrok.connect(5000)
    print(f"\n\n🚀🚀🚀 Your App is LIVE! Open this URL in your browser: {public_url} 🚀🚀🚀")
    app.run(port=5000)

⏳ Step 1/6: Installing libraries...
✅ Libraries installed.

⏳ Step 2/6: Configuring ngrok...
ngrok: no process found
✅ ngrok configured.

⏳ Step 3/6: Loading data...
✅ Graph loaded with 4039 nodes and 88234 edges.

⏳ Step 4/6: Pre-computing all models (this will take several minutes)...
   - Calculating PageRank...
   - Detecting communities...
   - Preparing dataset for ML model...


   - Generating negative samples:   0%|          | 0/40000 [00:00<?, ?it/s]

   - Engineering features for ML model...
   - Training Logistic Regression model...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


✅ All models are ready!

⏳ Step 5/6: Defining application functions...

⏳ Step 6/6: Starting the web application...


🚀🚀🚀 Your App is LIVE! Open this URL in your browser: NgrokTunnel: "https://61bdbd46d055.ngrok-free.app" -> "http://localhost:5000" 🚀🚀🚀
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:02:33] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:02:35] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:02:41] "POST /?tab=friends HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:02:43] "GET /?tab=predictor HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:02:46] "GET /?tab=influencers HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:02:49] "GET /?tab=community HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:02:54] "POST /?tab=community HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:03:01] "GET /?tab=influencers HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:03:03] "GET /?tab=predictor HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03:03:11] "POST /?tab=predictor HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2025 03