In [53]:
class person:
    """
    The item class, which record the change of the properties and the customer
    of the item
    """
    def __init__(self, id):
        self.person_id = id

        self.purchased_items = []
        self.viewed_items = []
        self.cart_items = []

    def purchase(self, item_id, transcation_id, timestamp):
        """
        record a purchase
        :param item_id: str
        :param transcation_id: str
        :param timestamp: str
        :return: None
        """
        self.purchased_items.append((item_id, transcation_id, timestamp))
    def view(self, item_id, timestamp):
        """
        record a view
        :param item_id: str
        :param timestamp: str
        :return: None
        """
        self.viewed_items.append((item_id,timestamp))
    def add_to_cart(self, item_id, timestamp):
        """
        record a add_to_cart
        :param item_id: str
        :param timestamp: str
        :return: None
        """
        self.cart_items.append((item_id,timestamp))
    def purchases_record(self):
        """
        return the record of the purchases of the person
        :return: list
        """
        return self.purchased_items
    def carts_record(self):
        """
        return the record of items have been added to the cart
        :return: list
        """
        return self.cart_items
    def views_record(self):
        """
        return the record of what the person have viewed
        :return: list
        """
        return self.viewed_items


In [54]:
class item:
    """
    The item class, which record the change of the properties and the customer
    of the item
    """
    def __init__(self, id):
        self.item_id = id
        self.properties = {}
        self.customers = []
        self.viewers = []
        self.cart_adders = []
    def property_change(self, pro_name, value, timestamp):
        """
        record a change of a property
        :param pro_name: str
        :param value: str
        :param timestamp: str
        :return:
        """
        if pro_name not in self.properties:
            self.properties[pro_name] = []
        self.properties[pro_name].append((value, timestamp))
    def purchase(self, customerID, transcation_id,timestamp):
        """
        record a purchase
        :param customerID: str
        :param transcation_id: str
        :param timestamp: str
        :return: None
        """
        self.customers.append((customerID,transcation_id, timestamp))
    def view(self, customerID, timestamp):
        """
        record a view
        :param customerID: str
        :param timestamp: str
        :return: None
        """
        self.viewers.append((customerID,timestamp))
    def add_to_cart(self, customerID, timestamp):
        """
        record a add_to_cart
        :param customerID: str
        :param timestamp: str
        :return: None
        """
        self.cart_adders.append((customerID,timestamp))
    def customers_record(self):
        """
        return the record of customers of the item
        :return: list
        """
        return self.customers
    def carts_record(self):
        """
        return the record of persons who added this item to the cart
        :return: list
        """
        return self.cart_adders
    def viewers_record(self):
        """
        return the record of persons who viewed this item
        :return: list
        """
        return self.viewers
    def property_record(self, pro_name):
        """
        return the record of a property of the item
        :param pro_name: str
        :return: list
        """
        return self.properties[pro_name]


In [55]:
class analyse():
    def __init__(self):
        self.persons = {}
        self.items = {}
    def read_property(self, property_file):
        f, first = open(property_file, "r"), True
        for line in f:
            if first:
                first = False
                continue
            property_info = line.split(",")
            timestamp, item_id, pro_name, value = property_info[0], property_info[1], property_info[2], property_info[3]
            if item_id not in self.items:
                continue
                self.items[item_id] = item(item_id)
            self.items[item_id].property_change(pro_name, value, timestamp)
    def read_event(self, event_file):
        f, first = open(event_file, "r"), True
        for line in f:
            if first:
                first = False
                continue
            event_info = line.split(",")
            timestamp, person_id, event, item_id, transaction_id = event_info[0], event_info[1], event_info[2], event_info[3], event_info[4]
            if person_id not in self.persons:
                self.persons[person_id] = person(person_id)
            if item_id not in self.items:
                
                self.items[item_id] = item(item_id)
            if event == "transaction":
                self.persons[person_id].purchase(item_id, transaction_id, timestamp)
                self.items[item_id].purchase(person_id, transaction_id,timestamp)
            elif event == "view":
                self.persons[person_id].view(item_id, timestamp)
                self.items[item_id].view(person_id, timestamp)
            else:
                self.persons[person_id].add_to_cart(item_id, timestamp)
                self.items[item_id].add_to_cart(person_id, timestamp)



a = analyse()
a.read_event("/Users/Kiki/Downloads/retailrocket-recommender-system-dataset/events.csv")
a.read_property("/Users/Kiki/Downloads/retailrocket-recommender-system-dataset/item_properties_part1.csv")
a.read_property("/Users/Kiki/Downloads/retailrocket-recommender-system-dataset/item_properties_part2.csv")

In [56]:
def subset(threshold):
    res = {}
    for person_name, person in a.persons.items():
        if len(person.viewed_items) + len(person.cart_items) > threshold and len(person.purchased_items) > threshold:
            res[person_name] = person
    return res

In [75]:
def subset_item(persons):
    res = {}
    for person_name, person in persons.items():
        for item in person.viewed_items + person.purchased_items + person.cart_items:
            if item[0] not in res:
                res[item[0]] = res.get(item[0], 0) + 1
                
    return res

In [93]:
import copy
def merge(item_after_subset):
    sec_subset_item = {}
    for item in item_after_subset:
        if "categoryid" not in a.items[item].properties:
            sec_subset_item[item] = a.items[item]
            continue
        if a.items[item].property_record("categoryid")[0][0] not in sec_subset_item:
            sec_subset_item[a.items[item].property_record("categoryid")[0][0]] = copy.deepcopy(a.items[item])
            sec_subset_item[a.items[item].property_record("categoryid")[0][0]].item_id = a.items[item].property_record("categoryid")[0][0]
        else:
            for pro_name, values in a.items[item].properties.items():
                if pro_name not in sec_subset_item[a.items[item].property_record("categoryid")[0][0]].properties:
                    sec_subset_item[a.items[item].property_record("categoryid")[0][0]].properties[pro_name] = []
                sec_subset_item[a.items[item].property_record("categoryid")[0][0]].properties[pro_name] += values
            sec_subset_item[a.items[item].property_record("categoryid")[0][0]].viewers += a.items[item].viewers
            sec_subset_item[a.items[item].property_record("categoryid")[0][0]].customers += a.items[item].customers
            sec_subset_item[a.items[item].property_record("categoryid")[0][0]].cart_adders += a.items[item].cart_adders
    return sec_subset_item


In [76]:
sub_dic1, sub_dic2 = subset(1), subset(2)
print len(sub_dic1), len(sub_dic2)

2549 1016


In [85]:
item_sub_dic1,item_sub_dic2 = subset_item(sub_dic1), subset_item(sub_dic2)
print len(item_sub_dic1), len(item_sub_dic2)

30601 27487


In [94]:
merged_items1, merged_items2 = merge(item_sub_dic1), merge(item_sub_dic2)
print len(merged_items1), len(merged_items2)

2021 1959
