In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/database-for-interests-based-app/users.csv
/kaggle/input/database-for-interests-based-app/chats.csv


# **Content based recommender**

This Kaggle code performs content-based recommendation for chatrooms based on user interests using TF-IDF and cosine similarity. The code reads chat data from a CSV file, transforms the descriptions into TF-IDF vectors, and computes cosine similarities with a user query. The top similar chatrooms are then displayed in a table with their respective similarity scores. The example query is "I like reading and discussions" with the top 10 similar chatrooms.

Import necessary libraries

In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd

Load chat data from CSV file

In [3]:
descriptions = pd.read_csv("/kaggle/input/database-for-interests-based-app/chats.csv", sep=",")

Initialize TF-IDF vectorizer

In [4]:
vectorizer = TfidfVectorizer(
    stop_words='english',
    binary=False,
    ngram_range=(1, 2),
    use_idf=False,
    norm=None
)

Transform chat descriptions into TF-IDF vectors

In [5]:
doc_vectors = vectorizer.fit_transform(descriptions['description'])

In [6]:
def comp_description(query, results_number=20):
    # Compute cosine similarities between the user query and chat descriptions
    q_vector = vectorizer.transform([query])
    cosine_similarities = cosine_similarity(q_vector, doc_vectors.toarray())

    # Get indices of the top similar chatrooms
    top_indices = cosine_similarities.argsort()[0][::-1][:results_number]

    # Display the results in a table
    results_table = pd.DataFrame({
        'Chatroom': descriptions['chatroom'][top_indices],
        'Similarity Score': cosine_similarities[0][top_indices]
    })

    # Display the table
    print("Top {} similar chatrooms:".format(results_number))
    print(results_table)

Example query

In [7]:
comp_description("I like reading and discussions", results_number=10)

Top 10 similar chatrooms:
                 Chatroom  Similarity Score
3               Book Club          0.324443
1          Science Lovers          0.196116
9             Music Vibes          0.000000
8             Gaming Zone          0.000000
7      Art and Creativity          0.000000
6           Foodies Haven          0.000000
5        Fitness Fanatics          0.000000
4        Travel Explorers          0.000000
2             Movie Buffs          0.000000
0  Technology Enthusiasts          0.000000


# **Collabarative based recommender**

Collaborative filtering recommendation system using Nearest Neighbors. The dataset consists of user ratings for different chatrooms, and the goal is to provide personalized recommendations for users based on their preferences.

Read the CSV file

In [8]:
df = pd.read_csv("/kaggle/input/database-for-interests-based-app/users.csv", sep=",")

Pivot the dataframe to create a user-item matrix

In [9]:
users_pivot = df.pivot_table(index='chatroom_id', columns='user_id', values='rating').fillna(0)

Convert the user-item matrix to a sparse matrix

In [10]:
from scipy.sparse import csr_matrix
user_item_matrix = csr_matrix(users_pivot.values)

Create a NearestNeighbors model

In [11]:
from sklearn.neighbors import NearestNeighbors
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(user_item_matrix)

In [12]:
def get_recommendations(user_id, top_n=5):
    user_index = users_pivot.columns.get_loc(user_id)
    distances, indices = model_knn.kneighbors(users_pivot.iloc[user_index, :].values.reshape(1, -1), n_neighbors=top_n + 1)
    print(f"Top {top_n} Recommendations for {user_id}:\n")
    for i in range(1, len(distances.flatten())):
        print(f'{i}: {users_pivot.index[indices.flatten()[i]]}, with distance of {distances.flatten()[i]:.4f}')

Example: Get top 5 recommendations for a specific user_id

In [13]:
get_recommendations(user_id='user1', top_n=5)

Top 5 Recommendations for user1:

1: chatroom9, with distance of 0.0478
2: chatroom4, with distance of 0.1526
3: chatroom7, with distance of 0.1538
4: chatroom5, with distance of 0.1562
5: chatroom10, with distance of 0.2509
