In [1]:
import numpy as np
from ortools.linear_solver import pywraplp
import random as rd

FILENAME = "/home/ubuntu/Downloads/data_samples_mini_projects/miniproject-20/data.txt"

## Get data from user's input

In [2]:
def get_data(filename):
    """
    Process user's input and generate data.
    
    N    number of articles
    M    number of scientists
    K    minimal number of scientists working on each article
    L    matrix representation of data. L[i, j] = 1 means scientist j can take on article i
    """
    infile = open(filename, 'r')
    
    N, M, K = [int(i) for i in infile.readline().split()]
    
    A = []
    L = np.zeros((N,M), dtype = 'int')
    for i in range(N):
        A.append(sorted([int(k) for k in infile.readline().split()[1:]]))
        for j in A[i]:
            L[i, j - 1] = 1
    return N, M, K, A, L

## Solve the problem with OR-tools

In [93]:
def ortools():
    """Minimize the maximal number of articles that a scientist can take on and print the assignment."""
    
    solver = pywraplp.Solver.CreateSolver('SCIP')
    INF = solver.infinity()

    X = np.array([[None]*M for _ in range(N)])
    Y = solver.IntVar(-INF, INF, 'Y')

    for i in range(N):
        for j in range(M):
            X[i, j] = solver.IntVar(0, int(L[i, j]), 'X[{}, {}]'.format(i, j))

    for i in range(N):
        solver.Add(K == sum(L[i, j]*X[i, j] for j in range(M)))

    for j in range(M):
        solver.Add(Y >= sum(L[i, j]*X[i, j] for i in range(N)))

    solver.Minimize(Y)
    status = solver.Solve()
    

    if status == pywraplp.Solver.OPTIMAL:
        for i in range(N):
            for j in range(M):
                if X[i, j].solution_value() == 1:
                    print(j + 1, end=' ')
            print()
    else:
        print("No feasible solution")

## Solve the problem with Heuristics

In [94]:
def heuristics():
    L = [[] for i in range(N)]
    for i in range(N):
        for j in range(M):
            if l[i,j] == 1:
                L[i].append(j+1)


    current = [[a + 1, 0] for a in range(M)]
    X= [[]for i in range(N)]
    for i in range(N):
        list = []
        d = 0
        while len(list) < K:
            for j in L[i]:
                if current[j-1][1] == min([current[a-1][1] for a in L[i]]) + d:
                    list.append(j)
            d += 1
        result = rd.sample(list,K)
        X[i] = result
        for j in result:
            current[j - 1][1] += 1

    for i in range(N):
        X[i].sort()
    for i in range(N):
        for j in range(K):
            print(X[i][j], end=' ')
        print()

## Solve the problem with Backtracking

# Another approach

Ideal: 
We aim to minimize the_max_the_article_taken_among_scientists
So, I decided to reduce the max of the article till I can’t reduce it any more.
It’s not mean that I can reduce the max to any value arbitrarily
I decided to reduce the max to the predecessor(the number that has the value just only less than the max number. Ex:...)
If after reduction, we can’t not reach the predecessor, so we find the solution…
Else we continue reduce them :)))

Another case: If we don’t have predecessor(ex: a sequence numbers have the same value) 
So, we reduce ‘slowly’: each value reduce only 1
If after reduce, which value doesn’t change is our solution

Cách này thủ công vl nhưng mà nó ra được kết quả tối ưu. Nên tôi nghĩ là từ những constraint tôi đưa #check = False, mọi người có thể dựa vào nó để mà Backtracking
Constrain :
K = the minimum scientist must take a article:
The number of articles can’t not reduce over K
After reduce there are the value can’t not reduce to the predecessor, stable(in case no predecessor) so solution is the max_value_after_reduce

In [None]:
'''
an arrange function that we can 'easily' reduce the number of article
among 'the_scientist_take_the_max_article'
'''     
def arrange(L):
    L = L[L[:,0].argsort()]
    L = L.T
    L = L[L[:,0].argsort()]
    L = L.T
    return L

#find the predecessor of max(article taken by the scientist)
def predecessor(L):
    i = 0
    while max(L) - L[len(L)-1-i] == 0 and i!=len(L) :
        i += 1
    return [L[len(L)-1-i],i]

'''
    Stop condition for this algorithm:
    - if all the number of scientists take an article = K(the minimum scientist need to take each article)
    - if we can't reduce the_max_number of article taken by a scientist
    any more OR the_max_number after reduce is not equal to the
    predecessor(of max).
    #Note
        #trong qua trinh tru thi se co hang bang K.
        #nhung nhung hang o duoi thi co hang > K
        #tao dieu kien dung(tai mot hang) khi ma ta
        #tru den gia tri toi thieu(o day la K)
'''
def stopCDT(L,K):
    return max(np.where(L[:,0] == K)[0])
'''
#boi vi cai phan cong tru nay toi thay no ton nhieu dien tich nen dinh dat
#no vao trong mot cai ham. Tuy nhien toi so no khong tra ve cac tham so
def congtru(L,N,M,K,i,j):
    global t ,st
    t = 0
    st = stopCDT(L, K)
    if L[N-i,M-j] == 1:   
        L[N-i,M-j] = 0
        L[0,M-j] -= 1
        L[N-i,0] -= 1
        t += 1 #check if we can reduce the_max_article_taken by a scientist 
                #equal to predecessor
                #if not, break and then solution = max(after reduce)
        k = st #update stop
        while L[0,k] == k: 
            k+=1
            st = k 
'''
def solver(L,N,M,K):
    L = arrange(L)
    global check
    p = predecessor(L[0][1:])
    t = 0  
    i = 0
    st = stopCDT(L, K) 
    if L[N,0] == K:
        check = False    
    else:
        if p[0] != max(L[0]): #this sequence has predecessor
            t1 = 0
            for j in range(p[1]): #interate till find the predecessor
                while (L[0,M-j] != p[0]): #check when to stop reduce
                    if N-i == st:
                        break    
                    if (L[N-i,0]!= K) :    
                        if L[N-i,M-j] == 1:   
                                L[N-i,M-j] = 0
                                L[0,M-j] -= 1
                                L[N-i,0] -= 1
                                t += 1 #check if we can reduce the_max_article_taken by a scientist 
                                       #equal to predecessor
                                       #if not, break and then solution = max(after reduce)
                                k = st #update stop
                                while L[0,k] == K: 
                                    k+=1
                                st = k            
                    i += 1
                if t1 <= t:
                    t1 = t
                i = 0
                t = 0
            if t1 < max(L[0]) - predecessor(L[0])[0]:
                return L
                check = False
        else:   # this sequence has no predecessor
            t1 = float('inf')
            for j in range(p[1]):
                while N-i != st:
                    if L[N-i,0] != 3:
                        if L[N-i,M-j] == 1:
                            L[N-i,M-j] = 0
                            L[0,M-j] -= 1
                            L[N-i,0] -= 1
                            t += 1
                            k = st #update stop
                            while L[0,k] == K: 
                                k+=1
                            st = k  
                        break
                    i += 1
                if t1 > t:
                    t1 = t
                t = 0
                i = 0
            if t1 == 0:
                return L
                check = False
    return L

def printmt(L,N,M):
    a = []
    for i in range(1,N+1):
        for j in range(1,M+1):
            if L[i,j] != 0:
                a.append(j)
        print(a)
        a = []

check = True

def main_f():
    N, M, K, A, L = get_data(FILENAME) 
    while check:
        L = solver(L,N,M,K)
    print(L)
    printmt(L,N,M)
    print('solution is: ', max(L[0]))

## Main function

In [7]:
N, M, K, A, L = get_data(FILENAME)

def backtracking():
    global obj_val, opt_sol
    obj_val, opt_sol = float('inf'), None
    sci_index_in_A_taking = [[-1]*K for _ in range(N)]
    number_taked = [0]*M
    
    def Try(i, t):
        for sci_index_in_A in range(sci_index_in_A_taking[i][t - 1] + 1, len(A[i]) - K + t + 1):
            if number_taked[A[i][sci_index_in_A] - 1] + 1  < obj_val:
                sci_index_in_A_taking[i][t] = sci_index_in_A
                number_taked[A[i][sci_index_in_A] - 1] += 1
                if i == N - 1 and t == K - 1:
                    Update()
                else:
                    if t < K - 1: Try(i, t + 1)
                    else: Try(i + 1, 0)
                number_taked[A[i][sci_index_in_A] - 1] -= 1
                sci_index_in_A_taking[i][t] = -1

    def Update():
        global obj_val, opt_sol
        if max(number_taked) < obj_val:
            obj_val = max(number_taked)
            opt_sol = [[A[i][j] for j in sci_index_in_A_taking[i]] for i in range(N)]
        
    Try(0, 0)
    print(obj_val, opt_sol)
    
    




backtracking()

        


5 [[1, 2, 4], [1, 2, 3], [1, 2, 6], [1, 2, 4], [3, 6, 7], [3, 5, 6], [1, 2, 6], [4, 5, 7], [3, 4, 5], [3, 4, 5]]
