In [1]:
import numpy as np
import pandas as pd

num_of_stocks = 2000  # the number of stocks given in our problem

#DataFrame is created with the csv file
df = pd.read_csv('54_hfc_20170614_comp.csv', names=range(num_of_stocks),dtype='float64')

In [2]:
#Panda's DataFrame is easier to handle, 
#but a matrix made in Numpy is much faster to call its values
corr_matrix = np.corrcoef(df.T)
corr_matrix = np.array(corr_matrix, dtype='float64')
corr_matrix.shape

(2000, 2000)

In [3]:
#defining the functions for calculating TICS, CARS and the value to be minimized

def Calc_IC(sub_group_of_stocks):
    return np.corrcoef(df.loc[:,sub_group_of_stocks].T).sum()

def Calc_TICS(list_of_stocks):
    TICS = {}
    for i in range(len(list_of_stocks)):
        TICS[i]=(Calc_IC(list_of_stocks[i])) #here the number i is the group number
    return sum(TICS.values())

def Calc_CARS(list_of_stocks): 
    CARS_dict = {}
    CARS_dict.update({'AR{}'.format(i): df[list_of_stocks[i]].mean(axis=1) for i in range(10)})

    temp_list = []
    for i in range(10):
        temp_list.append(CARS_dict['AR{}'.format(i)])
    
    CARS = np.corrcoef(temp_list).sum()
    return CARS

def objective_function(list_of_stocks):
    return Calc_CARS(list_of_stocks)/Calc_TICS(list_of_stocks)



In [4]:
def find_stock_group(list_of_stocks, stock_number):
    for i in range(10):
        if stock_number in list_of_stocks[i]:
            return (stock_number, i)  #i is the group number 
        
def find_stock_location(list_of_stocks, stock_number):
    for i in range(10):
        if stock_number in list_of_stocks[i]:
            for j in range(200):
                if stock_number == list_of_stocks[i,j]:
                    return (i,j)  #i is the group number and j is the location within the group


In [7]:
def find_the_next_min(group, reference_set):
    index, min_ = 0, 9999  #initialization of the variables
    for i in reference_set:
        if i not in group:
            intra_value = sum([corr_matrix[element,i] for element in group])
            inter_value = (corr_matrix[i].sum() - intra_value - 1)
            value = abs(inter_value/intra_value)
            if value < min_:
                index, min_ = i, value
    return (index, min_)

def add_element_to_group(group, reference_set):
    while len(group) < 200:
        group.append(find_the_next_min(group, reference_set)[0])

def first_element_in_the_next_group(reference_set):
    return np.random.choice(list(reference_set))

In [12]:
#temp list of stocks before partitioning
list_of_stocks = np.arange(2000).reshape(10,200)
reference_set = set(list(range(1,2000)))
group_0 = []
group_0.append(np.random.choice(list(reference_set)))

for i in range(1, 10):  #making the empty lists from group_1 through group_9 
    temp = ''           #group_0 has been already made in the line above
    temp = 'group_'+str(i)+'=[]'
    exec(temp)

def find_partitions(reference_set):
    for i in range(10):
        add_element_to_group(eval('group_{}'.format(i)), reference_set)
        reference_set = reference_set - set(eval('group_{}'.format(i)))
        print('group_{} has been made'.format(i))
        i += 1
        if i != 10:
            eval('group_{}'.format(i)).append(first_element_in_the_next_group(reference_set))

find_partitions(reference_set)

for i in range(10):
    list_of_stocks[i] = eval('group_{}'.format(i))  

group_0 has been made
group_1 has been made
group_2 has been made
group_3 has been made
group_4 has been made
group_5 has been made
group_6 has been made
group_7 has been made
group_8 has been made
group_9 has been made


In [13]:
objective_function(list_of_stocks)

0.00056742933046105223

In [40]:
initial_list_of_stocks = list_of_stocks.copy()
current_min = objective_function(initial_list_of_stocks)

In [36]:
objective_function(initial_list_of_stocks)
list_of_stocks = initial_list_of_stocks.copy()

In [37]:
stock_location_dict = {}
for i in range(num_of_stocks):
    stock_location_dict[i] = find_stock_location(list_of_stocks, i)

#in this method, stock_location_dict needs to be updated whenever locations changed
def fast_stock_location(stock_number):
    return stock_location_dict[stock_number]

#swap function will be used when exchanging a pair of stocks between two different groups
def swap(list_of_stocks,stock1,stock2):
    loc_of_stock1 = fast_stock_location(stock1)
    loc_of_stock2 = fast_stock_location(stock2)
    list_of_stocks[loc_of_stock1], list_of_stocks[loc_of_stock2] = list_of_stocks[loc_of_stock2], list_of_stocks[loc_of_stock1]
    stock_location_dict.update({stock1:loc_of_stock2, stock2: loc_of_stock1})

In [38]:
reference_total = set(np.arange(2000))
def find_the_min(list_of_stocks, group_list, stock_loc): #stock_loc is the location index within the group
    stock = group_list[stock_loc]
    reference = reference_total - set(group_list) | {stock}
    result = find_the_next_min(np.delete(group_list,stock_loc), reference)
    global current_min
    if result[0] != stock: 
        swap(list_of_stocks, stock, result[0])
        temp_ = objective_function(list_of_stocks)
        if  temp_ < current_min:
            current_min = temp_
            print('Swapped, now the min value is', current_min)
        else: 
            swap(list_of_stocks, stock, result[0])
            

In [41]:
current_min

0.00056690105683621583

In [42]:
def optimize_the_first_part(list_of_stocks):
    for i in range(50): #here i is the column number (first i elements in each group)
        print(i, 'has started')
        for j in range(10): #j is the row number (group number)
            find_the_min(list_of_stocks, list_of_stocks[j],i)

def random_optimize(list_of_stocks, num_of_iteration):
    for i in range(num_of_iteration):
        find_the_min(list_of_stocks, list_of_stocks[np.random.randint(10)],np.random.randint(200))

def optimize_the_second_part(list_of_stocks):
    for i in range(150,200): #here i is the column number (first i elements in each group)
        print(i, 'has started')
        for j in range(10): #j is the row number (group number)
            find_the_min(list_of_stocks, list_of_stocks[j],i)
            
def optimize_the_whole_part(list_of_stocks, num_of_iteration):
    for number in range(num_of_iteration):
        optimize_the_first_part(list_of_stocks)
        optimize_the_second_part(list_of_stocks)

In [43]:
optimize_the_whole_part(list_of_stocks,2)

0 has started
Swapped, now the min value is 0.0005662944353
1 has started
Swapped, now the min value is 0.000566224595319
Swapped, now the min value is 0.000566202009791
2 has started
Swapped, now the min value is 0.000566095000097
3 has started
4 has started
Swapped, now the min value is 0.000565949206528
5 has started
Swapped, now the min value is 0.000565928434548
6 has started
Swapped, now the min value is 0.000565823165413
7 has started
8 has started
9 has started
10 has started
11 has started
12 has started
13 has started
14 has started
15 has started
16 has started
17 has started
18 has started
19 has started
20 has started
21 has started
22 has started
23 has started
24 has started
25 has started
26 has started
27 has started
28 has started
29 has started
30 has started
31 has started
32 has started
33 has started
34 has started
35 has started
36 has started
37 has started
Swapped, now the min value is 0.000565791136261
38 has started
Swapped, now the min value is 0.00056577337

In [34]:
optimize_the_first_part(list_of_stocks)

10 has started
11 has started
12 has started
13 has started
14 has started
15 has started
16 has started
17 has started
18 has started
19 has started


In [196]:
optimize_the_second_part(list_of_stocks)

190 has started
191 has started
192 has started
Swapped, now the min value is 0.000565494140734
193 has started
194 has started
195 has started
196 has started
197 has started
198 has started
199 has started


In [44]:
objective_function(list_of_stocks)

0.0005650033768857546

In [45]:
answer_list = list_of_stocks.copy()

In [46]:
current_min

0.0005650033768857546

In [47]:
a = set()
for i in range(10):
    a.update(set(list_of_stocks[i]))
len(a) #this shows that there is no dupulicated element



2000

In [48]:
def minimize(list_of_stocks, fixed_row = 0,fixed_col = 199):
    result = {}
    global current_min
    
    for i in range(0,10):
        if i != fixed_row: #here being in the same row means being in the same group - so avoids the case
            print(i, 'started')
            for j in range(0,199):
                temp_list = list_of_stocks.copy()
                swap(temp_list,temp_list[fixed_row,j],temp_list[i,fixed_col])
                temp_ = objective_function(temp_list)
                if  temp_ < current_min:
                    current_min = temp_
                    print('found',(list_of_stocks[fixed_row,j],list_of_stocks[i,fixed_col]), temp_)
                    result[(list_of_stocks[fixed_row,j],list_of_stocks[i,fixed_col])] = temp_ 
    return result
swap(list_of_stocks, stock, result[0])
        temp_ = objective_function(list_of_stocks)
        if  temp_ < current_min:
            current_min = temp_
            print('Swapped, now the min value is', current_min)
        else: 
            swap(list_of_stocks, stock, result[0])
#starting fixed_row = 0, fixed_col = 199
#in the swapping process, 
#I chose the very last elements in each group first as they contribute to the TICS the least

In [49]:
minimize(list_of_stocks)

1 started


TypeError: 'NoneType' object is not subscriptable