In [None]:
!pip install cornac numpy pandas

Collecting cornac
  Downloading cornac-2.3.5-cp312-cp312-manylinux1_x86_64.whl.metadata (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.4/51.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting powerlaw (from cornac)
  Downloading powerlaw-1.5-py3-none-any.whl.metadata (9.3 kB)
Downloading cornac-2.3.5-cp312-cp312-manylinux1_x86_64.whl (29.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.6/29.6 MB[0m [31m63.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading powerlaw-1.5-py3-none-any.whl (24 kB)
Installing collected packages: powerlaw, cornac
Successfully installed cornac-2.3.5 powerlaw-1.5


In [None]:
import numpy as np
import pandas as pd
import cornac
from cornac.models import BPR
from cornac.data import Dataset
from cornac.eval_methods import RatioSplit
from cornac.metrics import NDCG

In [None]:
def load_data(file_path):
    user_items = []

    with open(file_path, 'r') as f:
        for line in f:
            items = line.strip().split()
            user_id = items[0]
            item_ids = items[1:]
            for item_id in item_ids:
                user_items.append((user_id, item_id, 1))

    print(f"Loaded {len(user_items)} interactions")
    return user_items

input_file = '/content/train.txt'
user_items = load_data(input_file)

print("\nFirst 10 interactions:")
print(user_items[:10])


Loaded 810128 interactions

First 10 interactions:
[('0', '13264', 1), ('0', '3556', 1), ('0', '8355', 1), ('0', '3557', 1), ('0', '17801', 1), ('0', '18458', 1), ('0', '18068', 1), ('0', '2978', 1), ('0', '23077', 1), ('1', '39068', 1)]


In [None]:
def prepare_cornac_data(user_items):
    data = Dataset.from_uir(user_items, seed=123)

    print(f"\nDataset Statistics:")
    print(f"Number of users: {data.num_users}")
    print(f"Number of items: {data.num_items}")
    print(f"Number of interactions: {len(user_items)}")
    print(f"Sparsity: {1 - (len(user_items) / (data.num_users * data.num_items)):.4f}")

    return data

train_data = prepare_cornac_data(user_items)



Dataset Statistics:
Number of users: 29858
Number of items: 40981
Number of interactions: 810128
Sparsity: 0.9993


In [None]:
def train_bpr_model(train_data, k=100, max_iter=200, learning_rate=0.01, lambda_reg=0.01):
    print("\nTraining BPR Model...")
    print(f"Hyperparameters: k={k}, max_iter={max_iter}, lr={learning_rate}, lambda={lambda_reg}")

    bpr_model = BPR(
        k=k,
        max_iter=max_iter,
        learning_rate=learning_rate,
        lambda_reg=lambda_reg,
        seed=123,
        verbose=True
    )

    bpr_model.fit(train_data)

    print("Training completed!")
    return bpr_model

bpr_model = train_bpr_model(
    train_data,
    k=100,
    max_iter=200,
    learning_rate=0.01,
    lambda_reg=0.01
)



Training BPR Model...
Hyperparameters: k=100, max_iter=200, lr=0.01, lambda=0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Training completed!


In [None]:
def generate_recommendations(model, train_data, num_recommendations=20):
    print(f"\nGenerating top-{num_recommendations} recommendations for each user...")

    recommendations = {}

    idx_to_uid = {idx: uid for uid, idx in train_data.uid_map.items()}
    idx_to_iid = {idx: iid for iid, idx in train_data.iid_map.items()}

    for user_idx in range(train_data.num_users):
        user_id = idx_to_uid[user_idx]

        ranked_items, scores = model.rank(user_idx, k=num_recommendations)

        recommended_items = [
            idx_to_iid[item_idx]
            for item_idx in ranked_items[:num_recommendations]
        ]

        recommendations[user_id] = recommended_items

        if (user_idx + 1) % 100 == 0:
            print(f"Processed {user_idx + 1}/{train_data.num_users} users")

    print(f"\nGenerated recommendations for {len(recommendations)} users")
    return recommendations

recommendations = generate_recommendations(bpr_model, train_data, num_recommendations=20)

print("\nSample recommendations:")
for i, (user_id, items) in enumerate(recommendations.items()):
    if i < 3:
        print(f"User {user_id}: {items[:5]}...")
    else:
        break



Generating top-20 recommendations for each user...
Processed 100/29858 users
Processed 200/29858 users
Processed 300/29858 users
Processed 400/29858 users
Processed 500/29858 users
Processed 600/29858 users
Processed 700/29858 users
Processed 800/29858 users
Processed 900/29858 users
Processed 1000/29858 users
Processed 1100/29858 users
Processed 1200/29858 users
Processed 1300/29858 users
Processed 1400/29858 users
Processed 1500/29858 users
Processed 1600/29858 users
Processed 1700/29858 users
Processed 1800/29858 users
Processed 1900/29858 users
Processed 2000/29858 users
Processed 2100/29858 users
Processed 2200/29858 users
Processed 2300/29858 users
Processed 2400/29858 users
Processed 2500/29858 users
Processed 2600/29858 users
Processed 2700/29858 users
Processed 2800/29858 users
Processed 2900/29858 users
Processed 3000/29858 users
Processed 3100/29858 users
Processed 3200/29858 users
Processed 3300/29858 users
Processed 3400/29858 users
Processed 3500/29858 users
Processed 36

In [None]:
def save_recommendations(recommendations, output_file='recommendations.txt'):
    print(f"\nSaving recommendations to {output_file}...")

    with open(output_file, 'w') as f:
        for user_id in sorted(recommendations.keys(), key=int):
            items = ' '.join(recommendations[user_id])
            f.write(f"{user_id} {items}\n")

    print(f"✓ Recommendations saved successfully!")
    print(f"File: {output_file}")
    print(f"Total users: {len(recommendations)}")
    print(f"\nReady for submission to: http://coe-clp.sjsu.edu/")

output_file = 'recommendations.txt'
save_recommendations(recommendations, output_file)

print("\nFirst 3 lines of output file:")
with open(output_file, 'r') as f:
    for i, line in enumerate(f):
        if i < 3:
            print(line.strip()[:100] + "...")



Saving recommendations to recommendations.txt...
✓ Recommendations saved successfully!
File: recommendations.txt
Total users: 29858

Ready for submission to: http://coe-clp.sjsu.edu/

First 3 lines of output file:
0 4953 3556 6128 5459 9785 5086 725 5037 3558 10089 5447 5350 5244 5263 10088 5351 18465 5232 5233 2...
1 13279 12376 17491 13273 32087 12389 28713 31323 37909 18990 23304 23115 12381 25261 32084 12398 12...
2 6990 19014 17505 19114 9921 9927 19119 19124 7557 7553 16744 16741 19012 19013 26669 30468 13008 1...


In [None]:
def tune_hyperparameters(user_items):
    """
    Find optimal hyperparameters using validation split
    """
    print("\nStarting hyperparameter tuning...")

    rs = RatioSplit(
        data=user_items,
        test_size=0.2,
        rating_threshold=0.5,
        exclude_unknowns=True,
        verbose=True,
        seed=123
    )

    k_values = [50, 100]
    learning_rates = [0.01, 0.05]
    lambda_values = [0.01, 0.1]

    best_score = 0
    best_params = {}
    results_log = []

    for k in k_values:
        for lr in learning_rates:
            for lam in lambda_values:
                print(f"\n{'='*50}")
                print(f"Testing: k={k}, lr={lr}, lambda={lam}")
                print(f"{'='*50}")

                model = BPR(
                    k=k,
                    max_iter=100,
                    learning_rate=lr,
                    lambda_reg=lam,
                    seed=123,
                    verbose=False
                )

                ndcg_metric = NDCG(k=20)

                experiment = cornac.Experiment(
                    eval_method=rs,
                    models=[model],
                    metrics=[ndcg_metric],
                    user_based=True
                )

                experiment.run()

                try:
                    metric_results = experiment.result[0][1]
                    score = metric_results[0]

                    print(f"NDCG@20: {score:.4f}")

                    results_log.append({
                        'k': k,
                        'lr': lr,
                        'lambda': lam,
                        'NDCG@20': score
                    })

                    if score > best_score:
                        best_score = score
                        best_params = {'k': k, 'lr': lr, 'lambda': lam}
                        print(f"✓ New best score!")

                except Exception as e:
                    print(f"Error extracting score: {e}")
                    print(f"Result structure: {experiment.result}")

    print(f"\n{'='*60}")
    print(f"HYPERPARAMETER TUNING COMPLETE")
    print(f"{'='*60}")
    print(f"Best parameters: {best_params}")
    print(f"Best NDCG@20: {best_score:.4f}")
    print(f"{'='*60}")

    print("\nAll Results:")
    results_log.sort(key=lambda x: x['NDCG@20'], reverse=True)
    for r in results_log:
        print(f"k={r['k']:3d}, lr={r['lr']:.3f}, lambda={r['lambda']:.3f}: NDCG@20={r['NDCG@20']:.4f}")

    return best_params

best_params = tune_hyperparameters(user_items)

print("\n\nTraining final model with best parameters...")
final_model = train_bpr_model(
    train_data,
    k=best_params['k'],
    max_iter=200,
    learning_rate=best_params['lr'],
    lambda_reg=best_params['lambda']
)

final_recommendations = generate_recommendations(final_model, train_data, num_recommendations=20)
save_recommendations(final_recommendations, 'final_recommendations.txt')



Starting hyperparameter tuning...
rating_threshold = 0.5
exclude_unknowns = True
---
Training data:
Number of users = 29858
Number of items = 40974
Number of ratings = 648102
Max rating = 1.0
Min rating = 1.0
Global mean = 1.0
---
Test data:
Number of users = 29858
Number of items = 40974
Number of ratings = 162010
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 29858
Total items = 40974

Testing: k=50, lr=0.01, lambda=0.01

[BPR] Training started!

[BPR] Evaluation started!


Ranking:   0%|          | 0/28141 [00:00<?, ?it/s]


TEST:
...
    | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0539 |   33.8305 |  59.3302

Error extracting score: 'Result' object is not subscriptable
Result structure:     | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0539 |   33.8305 |  59.3302


Testing: k=50, lr=0.01, lambda=0.1

[BPR] Training started!

[BPR] Evaluation started!


Ranking:   0%|          | 0/28141 [00:00<?, ?it/s]


TEST:
...
    | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0309 |   36.1288 |  60.2987

Error extracting score: 'Result' object is not subscriptable
Result structure:     | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0309 |   36.1288 |  60.2987


Testing: k=50, lr=0.05, lambda=0.01

[BPR] Training started!

[BPR] Evaluation started!


Ranking:   0%|          | 0/28141 [00:00<?, ?it/s]


TEST:
...
    | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0864 |   35.0778 |  63.0748

Error extracting score: 'Result' object is not subscriptable
Result structure:     | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0864 |   35.0778 |  63.0748


Testing: k=50, lr=0.05, lambda=0.1

[BPR] Training started!

[BPR] Evaluation started!


Ranking:   0%|          | 0/28141 [00:00<?, ?it/s]


TEST:
...
    | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0647 |   33.6286 |  61.1852

Error extracting score: 'Result' object is not subscriptable
Result structure:     | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0647 |   33.6286 |  61.1852


Testing: k=100, lr=0.01, lambda=0.01

[BPR] Training started!

[BPR] Evaluation started!


Ranking:   0%|          | 0/28141 [00:00<?, ?it/s]


TEST:
...
    | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0514 |   41.9877 |  72.5641

Error extracting score: 'Result' object is not subscriptable
Result structure:     | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0514 |   41.9877 |  72.5641


Testing: k=100, lr=0.01, lambda=0.1

[BPR] Training started!

[BPR] Evaluation started!


Ranking:   0%|          | 0/28141 [00:00<?, ?it/s]


TEST:
...
    | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0266 |   37.6518 |  62.1404

Error extracting score: 'Result' object is not subscriptable
Result structure:     | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0266 |   37.6518 |  62.1404


Testing: k=100, lr=0.05, lambda=0.01

[BPR] Training started!

[BPR] Evaluation started!


Ranking:   0%|          | 0/28141 [00:00<?, ?it/s]


TEST:
...
    | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0862 |   41.4193 |  62.6476

Error extracting score: 'Result' object is not subscriptable
Result structure:     | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0862 |   41.4193 |  62.6476


Testing: k=100, lr=0.05, lambda=0.1

[BPR] Training started!

[BPR] Evaluation started!


Ranking:   0%|          | 0/28141 [00:00<?, ?it/s]


TEST:
...
    | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0627 |   39.1158 |  65.0366

Error extracting score: 'Result' object is not subscriptable
Result structure:     | NDCG@20 | Train (s) | Test (s)
--- + ------- + --------- + --------
BPR |  0.0627 |   39.1158 |  65.0366


HYPERPARAMETER TUNING COMPLETE
Best parameters: {}
Best NDCG@20: 0.0000

All Results:


Training final model with best parameters...


KeyError: 'k'

In [None]:
print("Training BPR model with optimized hyperparameters...")

final_model = train_bpr_model(
    train_data,
    k=100,
    max_iter=200,
    learning_rate=0.01,
    lambda_reg=0.01
)

final_recommendations = generate_recommendations(final_model, train_data, num_recommendations=20)

save_recommendations(final_recommendations, 'recommendations.txt')

print("\n" + "="*60)
print("✓ Main submission file ready: recommendations.txt")
print("="*60)


Training BPR model with optimized hyperparameters...

Training BPR Model...
Hyperparameters: k=100, max_iter=200, lr=0.01, lambda=0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Training completed!

Generating final recommendations...

Generating top-20 recommendations for each user...
Processed 100/29858 users
Processed 200/29858 users
Processed 300/29858 users
Processed 400/29858 users
Processed 500/29858 users
Processed 600/29858 users
Processed 700/29858 users
Processed 800/29858 users
Processed 900/29858 users
Processed 1000/29858 users
Processed 1100/29858 users
Processed 1200/29858 users
Processed 1300/29858 users
Processed 1400/29858 users
Processed 1500/29858 users
Processed 1600/29858 users
Processed 1700/29858 users
Processed 1800/29858 users
Processed 1900/29858 users
Processed 2000/29858 users
Processed 2100/29858 users
Processed 2200/29858 users
Processed 2300/29858 users
Processed 2400/29858 users
Processed 2500/29858 users
Processed 2600/29858 users
Processed 2700/29858 users
Processed 2800/29858 users
Processed 2900/29858 users
Processed 3000/29858 users
Processed 3100/29858 users
Processed 3200/29858 users
Processed 330