In [None]:
import numpy as np

def score(result, children_wishes, gifts_wishes):

    children_count = len(children_wishes)
    unique_gifts_count = len(gifts_wishes)


    gifts_wishes_len = len(gifts_wishes[0])
    child_wishes_len = len(children_wishes[0])

    gifts_of_each_type_count = children_count // unique_gifts_count

    child_happiness = np.full((children_count, unique_gifts_count), -1, dtype=np.int16)
    array = np.arange(child_wishes_len, 0, -1) * 2 + 1
    for c in range(children_count):
        child_happiness[c, children_wishes[c]] += array

    gift_happiness = np.full((children_count, unique_gifts_count), -1, dtype=np.int16)
    array = np.arange(gifts_wishes_len, 0, -1) * 2 + 1
    for g in range(unique_gifts_count):
        gift_happiness[gifts_wishes[g], g] += array

    child_sum = 0
    gift_sum = 0
    for i, gift_id in enumerate(result):
        child_sum = child_sum + child_happiness[i, gift_id]
        gift_sum = gift_sum + gift_happiness[i, gift_id]

    denominator2 = children_count * child_wishes_len * 2
    denominator1 = unique_gifts_count * gifts_of_each_type_count * gifts_wishes_len * 2
    anch3 = (child_sum / denominator2) ** 3
    ansh3 = (gift_sum / denominator1) ** 3
    return ansh3 + anch3


In [None]:
import gc
import math
from abc import ABC, abstractmethod
from statistics import mean

import numpy as np
import pandas as pd
from ortools.graph import pywrapgraph
from tqdm import tqdm


class AbstractDataReader(ABC):
    @abstractmethod
    def read_children_wishes_and_gift_wishes(self):
        pass


class DataFromFileReader(AbstractDataReader):

    def __init__(self, child_wishlist_path, gift_wishlist_path):
        self.child_wishlist_path = child_wishlist_path
        self.gift_wishlist_path = gift_wishlist_path

    def read_children_wishes_and_gift_wishes(self):
        return self._read_data_from_csv()

    def _read_data_from_csv(self):
        children_wishes = pd.read_csv(self.child_wishlist_path, header=None)
        children_wishes.drop([0], axis=1, inplace=True)

        gift_wishes = pd.read_csv(self.gift_wishlist_path, header=None)
        gift_wishes.drop([0], axis=1, inplace=True)
        return children_wishes.values, gift_wishes.values


class DataRandomReader(AbstractDataReader):

    def __init__(self, children_count, gift_count, children_wishes_len, gifts_wishes_len):
        self.children_num = children_count
        self.gift_num = gift_count
        self.children_wishes_len = children_wishes_len
        self.gifts_wishes_len = gifts_wishes_len

    def read_children_wishes_and_gift_wishes(self):
        return self._generate_random_wishes(self.children_num, self.children_wishes_len, self.gift_num), \
               self._generate_random_wishes(self.gift_num, self.gifts_wishes_len, self.children_num)

    @staticmethod
    def _generate_random_wishes(row_num, columns_num, max_num_in_column_exclusive):
        result = np.empty((row_num, columns_num), dtype=np.int32)
        for i in range(row_num):
            result[i, :] = np.random.choice(max_num_in_column_exclusive, size=columns_num, replace=False)

        return result


class Solver:
    def __init__(self, children_wishes, gifts_wishes, p=1, d=1):
        self.children_wishes = children_wishes
        self.gifts_wishes = gifts_wishes

        self.p = p
        self.d = d

        self.result = None
        self.happinesses = None
        self.graph = None
        self.unhappy_children = None

        self.children_count = len(children_wishes)
        self.unique_gifts_count = len(gifts_wishes)

        twins = math.ceil(0.04 * self.children_count / 2.) * 2
        triplets = math.ceil(
            0.005 * self.children_count / 3.) * 3
        self.last_triplet_id = triplets - 1
        self.last_twin_id = triplets + twins - 1

        self.gifts_of_each_type_count = int(self.children_count // self.unique_gifts_count)

    @property
    def gift_wishes_len(self):
        return len(self.gifts_wishes[0])

    @property
    def child_wishes_len(self):
        return len(self.children_wishes[0])

    @property
    def vent_index(self):
        return self.children_count + self.unique_gifts_count

    def solve(self):
        self.prepare_happinesses()
        self.graph = self.create_flow_graph()
        if not DEBUG:
            del self.happinesses
        gc.collect()

        self.graph.Solve()
        gc.collect()
        optimal_cost = self.graph.OptimalCost()

        print('koszt optymalny: ', optimal_cost)
        print('przepływ', self.graph.MaximumFlow())

        self.calculate_result_from_graph()
        del self.graph
        gc.collect()



    def calculate_result_from_graph(self):

        result = np.full(self.children_count, fill_value=-1, dtype=np.int32)

        graph = self.graph
        for i in range(graph.NumArcs()):

            if graph.Flow(i) == 0:
                continue

            if graph.Tail(i) < self.children_count:
                flow = graph.Flow(i)
                while flow > 0:
                    j = 0
                    while result[graph.Tail(i) + j] != -1:
                        j = j + 1
                    flow = flow - 1
                    result[graph.Tail(i) + j] = self.node_index_to_gift_id(graph.Head(i))
        self.result = result

    def prepare_happinesses(self):
        print("Tworzenie słownika szczęść")
        happinesses = dict()

        for gift_id, gift_wishes in enumerate(self.gifts_wishes):
            for child_position_in_gift_wishes, child_id in enumerate(gift_wishes):
                h = self.d * (self.gift_wishes_len - child_position_in_gift_wishes - 1)

                child_happinesses = happinesses.setdefault(child_id, dict())
                current = child_happinesses.get(gift_id, -1)  # kara za listę dziecka (jeżeli jest to później usuniemy)
                happinesses[child_id][gift_id] = current + h

        for child_id, child_wishes in enumerate(self.children_wishes):
            for gift_position_in_child_wishes, gift_id in enumerate(child_wishes):
                h = self.p * (self.child_wishes_len - gift_position_in_child_wishes - 1)

                child_happinesses = happinesses.setdefault(child_id, dict())
                if gift_id in child_happinesses:
                    current = child_happinesses[gift_id] + 1  # usuwamy kare za listę dziecka
                else:
                    current = -1  # kara ze listę prezentu
                happinesses[child_id][gift_id] = current + h

        # do każdej wartości dodajemy dwa, aby pozbyć się ujemnych
        for x in happinesses.values():
            for y in x.keys():
                x[y] += 2


        self.happinesses = happinesses
        print("Stworzono słownika szczęść")
        print("-" * 20)

    def gift_id_to_node_index(self, gift_id):
        return self.children_count + gift_id

    def node_index_to_gift_id(self, node_index):
        return node_index - self.children_count

    def create_flow_graph(self):
        print("Tworze graf")

        graph = pywrapgraph.SimpleMinCostFlow()
        print('-' * 5 + "trojaczki")
        # trojaczki
        for i in tqdm(range(0, self.last_triplet_id, 3)):
            graph.SetNodeSupply(i, 3)
            graph.AddArcWithCapacityAndUnitCost(tail=i, head=self.vent_index, capacity=3, unit_cost=0)

            positive_gift_ids = set(self.happinesses[i].keys()) | set(self.happinesses[i + 1].keys()) | set(
                self.happinesses[i + 2].keys())

            for gift_id in positive_gift_ids:
                gift_id = int(gift_id)
                first_happiness = self.happinesses[i].get(gift_id, 0)
                second_happiness = self.happinesses[i + 1].get(gift_id, 0)
                third_happiness = self.happinesses[i + 2].get(gift_id, 0)

                avg_happ = int(mean([first_happiness, second_happiness, third_happiness]))

                if avg_happ > 0:
                    gift_node_index = self.gift_id_to_node_index(gift_id)
                    graph.AddArcWithCapacityAndUnitCost(tail=i, head=gift_node_index, capacity=3, unit_cost=-avg_happ)

        print('-' * 5 + "dwojaczki")
        # bliźniaki
        for i in tqdm(range(self.last_triplet_id + 1, self.last_twin_id, 2)):
            graph.SetNodeSupply(i, 2)
            graph.AddArcWithCapacityAndUnitCost(tail=i, head=self.vent_index, capacity=2, unit_cost=0)

            positive_gift_ids = set(self.happinesses[i].keys()) | set(self.happinesses[i + 1].keys())

            for gift_id in positive_gift_ids:
                gift_id = int(gift_id)

                first_happiness = self.happinesses[i].get(gift_id, 0)
                second_happiness = self.happinesses[i + 1].get(gift_id, 0)

                avg_happ = int(mean([first_happiness, second_happiness]))

                if avg_happ > 0:
                    gift_node_index = self.gift_id_to_node_index(gift_id)
                    graph.AddArcWithCapacityAndUnitCost(tail=i, head=gift_node_index, capacity=2, unit_cost=-avg_happ)

        print('-' * 5 + "jedynaki")
        # jedynaki
        for i in tqdm(range(self.last_twin_id + 1, self.children_count)):
            graph.SetNodeSupply(i, 1)
            graph.AddArcWithCapacityAndUnitCost(tail=i, head=self.vent_index, capacity=1, unit_cost=0)

            for gift_id, happ in self.happinesses[i].items():
                gift_node_index = self.gift_id_to_node_index(gift_id)
                graph.AddArcWithCapacityAndUnitCost(tail=i, head=int(gift_node_index), capacity=1, unit_cost=-happ)

        # prezenty to ujścia
        for i in range(self.unique_gifts_count):
            gift_node_index = self.gift_id_to_node_index(i)
            graph.AddArcWithCapacityAndUnitCost(tail=gift_node_index, head=self.vent_index,
                                                capacity=self.gifts_of_each_type_count, unit_cost=0)

        # ujescie, smietnik
        graph.SetNodeSupply(self.vent_index, -self.children_count)

        print("Stworzono graf")
        print('-' * 20)

        return graph

DEBUG = False

INPUT_FOLDER = '/kaggle/input/santa-gift-matching/'
CHILD_WISHLIST_PATH = INPUT_FOLDER + 'child_wishlist_v2.csv.zip'
GIFT_WISHLIST_PATH = INPUT_FOLDER + 'gift_goodkids_v2.csv.zip'


In [None]:

print("Wczytuje dane")
if DEBUG:
    reader = DataRandomReader(children_count=100_000, gift_count=100, children_wishes_len=1, gifts_wishes_len=1)
else:
    reader = DataFromFileReader(CHILD_WISHLIST_PATH, GIFT_WISHLIST_PATH)
c_wishes, g_wishes = reader.read_children_wishes_and_gift_wishes()
print("Wczytano dane")
print('-' * 20)

if DEBUG:
    print("children wishes: ")
    print(c_wishes)
    print("gifts wishes: ")
    print(g_wishes)

p = 1_000_000
solver = Solver(c_wishes, g_wishes, p, d=1)

solver.solve()


In [None]:
if not DEBUG:
    out = open('temp.csv', 'w')
    out.write('ChildId,GiftId\n')
    for gift_id, child_id in enumerate(solver.result):
        out.write(str(gift_id) + ',' + str(child_id) + '\n')
    out.close()

In [None]:
result = solver.result
vent_index = solver.vent_index
unique_gifts_count = solver.unique_gifts_count
last_triplet_id = solver.last_triplet_id
last_twin_id = solver.last_twin_id
gifts_of_each_type_count = solver.gifts_of_each_type_count

In [None]:
result_copy = result.copy()

In [None]:
result = result_copy

In [None]:
if DEBUG:
    print("result before fixing")
    print(result)
    print('vent_index', vent_index)

print('result.max', result.max())
matches_for_every_gift = np.zeros(unique_gifts_count + 1, dtype=np.int32)
print(result.max())
for gift_id in result:
    matches_for_every_gift[gift_id] += 1




In [None]:
print("matches_for_every_gift")
print(matches_for_every_gift)
print("matches_for_every_gift argmin")
print(matches_for_every_gift.argmin())
print("matches_for_every_gift min")
print(matches_for_every_gift.min())
print("matches_for_every_gift max")
print(matches_for_every_gift.max())  
print("matches_for_every_gift argmax")
print(matches_for_every_gift.argmax())

In [None]:
matches = [[matches_count,gift_id] for gift_id,matches_count in enumerate(matches_for_every_gift)]
matches.sort()
print(matches[:10])

In [None]:
for kid_id, gift_id in enumerate(result):
    if gift_id == unique_gifts_count:  # brak prezentu
        
        for match in matches:
            count, not_matched_id = match
            if count < 1000:
                result[kid_id] = not_matched_id
                matches[-1][0] -= 1
                match[0] += 1     
                break
 

In [None]:
print(matches[:8])
print(matches[-8:])

In [None]:
matches = matches[:-1]

In [None]:
matches_for_every_gift = np.zeros(unique_gifts_count, dtype=np.int32)
print("result max", result.max())
for gift_id in result:
    matches_for_every_gift[gift_id] += 1

In [None]:
matches_for_every_gift[-10:]

In [None]:
result_copy2 = result.copy()

In [None]:
result = result_copy2

In [None]:
if DEBUG:
    print("matches_for_every_gift po przydzieleniu każdemu dziecku prezentu")
    print(matches_for_every_gift)
    print(result)


counter = 0
# etap zapewnienia poprawności
for child_id in range(0, last_triplet_id, 3):

    gift1 = result[child_id]
    gift2 = result[child_id + 1]
    gift3 = result[child_id + 2]
    
    if gift1 == gift2 == gift3:
        continue

    counter += 1
    
    if gift1 != gift2 and gift1 != gift3 and gift2 != gift3:
        counter += 1
    
    
    if gift2 == gift3:
        dominant = gift3
    else:
        dominant = gift1

    result[child_id] = dominant
    result[child_id + 1] = dominant
    result[child_id + 2] = dominant

print("zabrane prezenty tylko przez trojaczki", counter)
    
for child_id in range(last_triplet_id + 1, last_twin_id, 2):   
    
    if result[child_id + 1] == result[child_id]:
        continue

    counter += 1
    result[child_id + 1] = result[child_id]

print("zabrane prezenty razem", counter)

In [None]:
# odbierz prezent przyznany za wiele razy
matches_for_every_gift = np.zeros(unique_gifts_count, dtype=np.int32)
for gift_id in result:
    matches_for_every_gift[gift_id] += 1
    

print("matches_for_every_gift")
print(matches_for_every_gift)
print("matches_for_every_gift argmin")
print(matches_for_every_gift.argmin())
print("matches_for_every_gift min")
print(matches_for_every_gift.min())
print("matches_for_every_gift max")
print(matches_for_every_gift.max())  
print("matches_for_every_gift argmax")
print(matches_for_every_gift.argmax())

In [None]:
unhappy_children = []

for child_id in range(last_twin_id + 1, len(result)):
    if matches_for_every_gift[result[child_id]] > gifts_of_each_type_count:
        unhappy_children.append(child_id)
        old_gift = result[child_id]
        least_matched_gift = np.argmin(matches_for_every_gift)
        result[child_id] = least_matched_gift
        matches_for_every_gift[old_gift] -= 1
        matches_for_every_gift[least_matched_gift] += 1

print("Nieszczęśliwe dzieci", len(unhappy_children))


print("matches_for_every_gift po naprawie")
print(matches_for_every_gift)

if matches_for_every_gift.max(initial=-1) > gifts_of_each_type_count:
    print('Nie udało sie dopasować prezentów')

gc.collect()

In [None]:
if not DEBUG:
    out = open('temp2.csv', 'w')
    out.write('ChildId,GiftId\n')
    for gift_id, child_id in enumerate(result):
        out.write(str(gift_id) + ',' + str(child_id) + '\n')
    out.close()

In [None]:
print("Tworzenie słownika szczęść")
happinesses = dict()

children_wishes, gifts_wishes = reader.read_children_wishes_and_gift_wishes()
gift_wishes_len = len(gifts_wishes[0])
child_wishes_len = len(children_wishes[0])

for gift_id, gift_wishes in enumerate(gifts_wishes):
    for child_position_in_gift_wishes, child_id in enumerate(gift_wishes):
        h = (gift_wishes_len - child_position_in_gift_wishes - 1)

        child_happinesses = happinesses.setdefault(child_id, dict())
        current = child_happinesses.get(gift_id, -1)  # kara za listę dziecka (jeżeli jest to później usuniemy)
        happinesses[child_id][gift_id] = current + h

for child_id, child_wishes in enumerate(children_wishes):
    for gift_position_in_child_wishes, gift_id in enumerate(child_wishes):
        h = p * (child_wishes_len - gift_position_in_child_wishes - 1)

        child_happinesses = happinesses.setdefault(child_id, dict())
        if gift_id in child_happinesses:
            current = child_happinesses[gift_id] + 1  # usuwamy kare za listę dziecka
        else:
            current = -1  # kara ze listę prezentu
        happinesses[child_id][gift_id] = current + h

# do każdej wartości dodajemy dwa, aby pozbyć się ujemnych
for x in happinesses.values():
    for y in x.keys():
        x[y] += 2

print("Stworzono słownik szczęść")

In [None]:
print(unhappy_children)

In [None]:
print("Zaczynam heurstykę")
print("Ilość nieszczęśliwych dzieci:", len(unhappy_children))

for child1_id in range(len(unhappy_children)):
    for child2_id in range(child1_id + 1, len(unhappy_children)):
        child1 = unhappy_children[child1_id]
        child2 = unhappy_children[child2_id]
        
        gift1 = result[child1]
        gift2 = result[child2]


        old_happ1 = happinesses.get(child1, {}).get(gift1, 0)
        old_happ2 = happinesses.get(child2, {}).get(gift2, 0)

        new_happ1 = happinesses.get(child1, {}).get(gift2, 0)
        new_happ2 = happinesses.get(child2, {}).get(gift1, 0)

        old = old_happ1 + old_happ2
        new = new_happ1 + new_happ2

        if new > old:
            result[child1] = gift2
            result[child2] = gift1
            
print('Skończyłem heurystykę')

In [None]:
del happinesses

In [None]:
print(result[:10])

In [None]:
gc.collect()

s = score(result, c_wishes, g_wishes)
print("Score: ", s)
if not DEBUG:
    out = open(f'out_{s}.csv', 'w')
    out.write('ChildId,GiftId\n')
    for gift_id, child_id in enumerate(result):
        out.write(str(gift_id) + ',' + str(child_id) + '\n')
    out.close()