# User Groups

In [25]:
# import required packages
import pandas as pd
from collections import defaultdict

In [28]:
# config
ds_name = 'Goodreads'
ds_acronym = 'GR'
ds_feedback = 'Explicit'

In [29]:
# loading the Book-Crossing dataset
dataset = pd.read_csv(f"dataset/{ds_name}/{ds_acronym}-Book-{ds_feedback}-5Rate-Map.csv", sep=",")

In [30]:
# get user distribution
user_dist = dataset['uid'].value_counts()
num_users = len(user_dist)
print('No. users: ' + str(num_users))
print('Mean books per user: ' + str(user_dist.mean()))
print('Min books per user: ' + str(user_dist.min()))
print('Max books per user: ' + str(user_dist.max()))

No. users: 100950
Mean books per user: 41.511619613670135
Min books per user: 5
Max books per user: 5284


In [31]:
# get item distribution
item_dist = dataset['bid'].value_counts()
num_items = len(item_dist)
print('No. items: ' + str(num_items))

No. items: 59196


In [32]:
# get top items
top_fraction = 0.2
num_top = int(top_fraction * num_items)
top_item_dist = item_dist[:num_top]
print('No. top items: ' + str(num_top))

No. top items: 11839


In [33]:
top_item_set = set(top_item_dist.keys())

In [34]:
user_books = defaultdict(set)
for eachline in dataset.iterrows():
    uid, bid, rating = eachline[1][0], eachline[1][1], eachline[1][2] 
    uid, bid = int(uid), int(bid)
    if uid in user_books.keys():
        user_books[uid].add(bid)
    else:
        user_books[uid] = {bid}

In [37]:
user_pop_book_ratio = {}

for user, books in user_books.items():
    if user not in user_pop_book_ratio.keys():
        user_pop_book_ratio[user] = (len(set(books) & set(top_item_set))) / len(set(books))

In [38]:
sorted_user_pop_book_ratio = {k: v for k, v in sorted(user_pop_book_ratio.items(), key=lambda item: item[1], reverse=True)}

In [39]:
num_user = len(sorted_user_pop_book_ratio)
num_top_users = int(top_fraction * num_users)
num_top_users

20190

In [40]:
# Files to save users ids based on the number of checkins
inactive_users = open(f'dataset/{ds_name}/user-groups/inactive_users.txt', 'w')
inactive_users.write('user_id' + '\n')

medium_users = open(f'dataset/{ds_name}/user-groups/medium_users.txt', 'w')
medium_users.write('user_id' + '\n')

active_users = open(f'dataset/{ds_name}/user-groups/active_users.txt', 'w')
active_users.write('user_id' + '\n')

8

In [41]:
for uid in list(sorted_user_pop_book_ratio.keys())[:num_top_users]:
    active_users.write(str(uid) + '\n')
active_users.close()

In [42]:
for uid in list(sorted_user_pop_book_ratio.keys())[num_top_users:len(sorted_user_pop_book_ratio) - num_top_users]:
    medium_users.write(str(uid) + '\n')
medium_users.close()

In [43]:
for uid in list(sorted_user_pop_book_ratio.keys())[len(sorted_user_pop_book_ratio) - num_top_users:len(sorted_user_pop_book_ratio)]:
    inactive_users.write(str(uid) + '\n')
inactive_users.close()

In [44]:
# file closing
inactive_users.close()
medium_users.close()
active_users.close()

## Creating User Groups using No. of Interactions (active and inactive user groups)

In [None]:
user_book = dict()
for eachline in dataset.iterrows():
    uid, bid, rating = eachline[1][0], eachline[1][1], eachline[1][2] 
    uid, bid = int(uid), int(bid)
    if uid in user_book.keys():
        user_book[uid] += 1
    else:
        user_book[uid] = 1

In [None]:
user_segmentation_ranges = {'A': 50, 'B': 100}

In [None]:
# Files to save users ids based on the number of checkins
inactive_users = open('inactive_users.txt', 'w')
medium_users = open('medium_users.txt', 'w')
active_users = open('active_users.txt', 'w')

In [None]:
inactive_users.write('user_id' + '\n')
medium_users.write('user_id' + '\n')
active_users.write('user_id' + '\n')

In [None]:
for uid, book_count in user_book.items():
  if book_count < user_segmentation_ranges['A']:
    inactive_users.write(str(uid) + '\n')
  elif user_segmentation_ranges['A'] <= book_count < user_segmentation_ranges['B']:
    medium_users.write(str(uid) + '\n')
  elif book_count >= user_segmentation_ranges['B']:
    active_users.write(str(uid) + '\n')

# file closing
inactive_users.close()
medium_users.close()
active_users.close()