In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/recsysmasterfds-2024/train.csv
/kaggle/input/recsysmasterfds-2024/kaggle_baseline.csv


We will try three different approaches: 
- Collaborative filtering (using cosine similarity)

- Matrix Factorization (using SVD) 

- Factorization machines

In this notebook we will just explore the two first approaches and the last one will have a different notebook.

# Let us first do some preprocessing

In [2]:
#Reading the data 
df = pd.read_csv('/kaggle/input/recsysmasterfds-2024/train.csv')
df.rename(columns={'release_date':'genre', 'sex':'age', 'age':'sex'}, inplace=True) 
df.head()

Unnamed: 0,user_id,title,movie_id,rating,genre,age,sex
0,2592,Top Gun (1986),1101,4,Action|Romance,50,M
1,4318,12 Angry Men (1957),1203,4,Drama,25,M
2,2756,Robocop 2 (1990),2986,2,Action|Crime|Sci-Fi,18,M
3,1706,Modern Times (1936),3462,5,Comedy,25,M
4,4813,Milk Money (1994),276,3,Comedy|Romance,35,F


We want to have a different measure of the rating. In order to do that, and given a user we will consider just the first 15% ranked movies as "seen" (positive) and the others as "unseen".
We will play with both dimensions, ratings and this last measure that we will call "positive". 

In [3]:
# Calculate the threshold rating for each user_id
threshold_ratings = df.groupby('user_id')['rating'].quantile(0.85)

threshold_int = threshold_ratings.astype(int)

delta = (threshold_ratings - threshold_int) > 1e-5

for i in threshold_ratings.index:
    if delta[i] == True:
        threshold_ratings[i] = int(threshold_ratings[i]) + 1
        
# Function to filter ratings for each user
def retain_top_ratings(group):
    threshold = threshold_ratings[group.name]
    return group[group['rating'] >= threshold]

# Apply the filtering function to retain top 85% ratings for each user
top_ratings_df = df.groupby('user_id').apply(retain_top_ratings).reset_index(drop=True)

#print(top_ratings_df['rating'].unique())

  top_ratings_df = df.groupby('user_id').apply(retain_top_ratings).reset_index(drop=True)


In [4]:
# Merge dataframes based on 'user_id' and 'movie_id'
merged_df = pd.merge(df, top_ratings_df, on=['user_id', 'movie_id'], how='left', suffixes=('', '_top'))

# Set binary column based on the merge result
df['positive'] = merged_df['title_top'].notnull().astype(int)

df.head()

Unnamed: 0,user_id,title,movie_id,rating,genre,age,sex,positive
0,2592,Top Gun (1986),1101,4,Action|Romance,50,M,0
1,4318,12 Angry Men (1957),1203,4,Drama,25,M,0
2,2756,Robocop 2 (1990),2986,2,Action|Crime|Sci-Fi,18,M,0
3,1706,Modern Times (1936),3462,5,Comedy,25,M,1
4,4813,Milk Money (1994),276,3,Comedy|Romance,35,F,0


We also tried another rating metrics so that we can impute the unseen films with a 0. This is the one we are showing now because that was the metric we used in the ensemble (combination of MF and CF).

In [5]:
df['rating'] = df['rating'].replace(1,-4)
df['rating'] = df['rating'].replace(2,-2)
df['rating'] = df['rating'].replace(3,-1)
df['rating'] = df['rating'].replace(4,2)
df['rating'] = df['rating'].replace(5,4)

### To build the top25 dictionary

In [6]:
def build_top (preds):
    top_dict = {}
    for user_id in preds.index:
        if user_id%500 == 0: 
            print (user_id)
        user_movies = df[df['user_id'] == user_id]['movie_id'].values
        ratings = list(preds_cf.loc[user_id])
    #     print (ratings)
        top_movies = preds.columns[np.argsort(ratings)[::-1]]
        top_movies_not_watched = [movie_id for movie_id in top_movies if movie_id not in user_movies][:25]
        top_dict[user_id] = top_movies_not_watched
    return top_dict

### To write the final solution

In [7]:
import csv

def write_solution (name, top_dict):
    test_example = pd.read_csv('/kaggle/input/recsysmasterfds-2024/kaggle_baseline.csv')
    with open(name+'.csv', 'w', encoding='UTF8') as f:
        # create the csv writer
        writer = csv.writer(f)
        # write a row to the csv file
        writer.writerow(['user_id', 'prediction'])
        for user_id in test_example.user_id.unique():
            relevant_items = top_dict[user_id]
            list_relevants = ' '.join([str(elem) for elem in relevant_items])
            writer.writerow([str(user_id),list_relevants])

# Collaborative Filtering

Collaborative filtering code predicts user preferences by finding similarities between users or items and recommending items based on those similarities. It involves creating a user-item matrix, calculating similarities, and generating recommendations.

In [8]:
table_cf = pd.pivot_table(df, values='rating', index='user_id', columns='movie_id', aggfunc='last',fill_value=0)
table_cf.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,-2,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


We measure the similarity between two users using cosine similarity. To do that, we determine the inner product of different rows of the table. Here we considered two approaches, one where we consider the whole table (in this case, the similarity matrix is symmetric), and another one where we measure the similarity between one user and another but considering only the seen films for one user. We got better results for the second approach.

In [9]:
from sklearn.metrics.pairwise import cosine_similarity

def cosine (seen_films_only):
    S = []
    for user in table_cf.index:
        if user%1000 == 0:
            print (user)
        
        if seen_films_only:
            seen_films =  list(df[df['user_id']==user]['movie_id'])
            auxdf = table_cf[seen_films]# Sample DataFrame and index
        else:
            auxdf = table_cf
    
        # Get the row of interest
        row_of_interest = auxdf.loc[user:user+1]

        # Calculate cosine similarity with all other rows
        cos_similarities = cosine_similarity(row_of_interest, auxdf)

        # Get the cosine similarities for the row of interest
        cos_similarities_for_row = cos_similarities[0]

        # Now cos_similarities_for_row contains the cosine similarity of the row of interest with all other rows
    #     print (cos_similarities_for_row)

        S.append(cos_similarities_for_row)
    return np.array(S)

S = cosine(seen_films_only = True)
S

1000
2000
3000
4000
5000
6000


array([[ 1.        ,  0.30812876,  0.27266675, ...,  0.        ,
         0.13319886,  0.3291308 ],
       [ 0.21444854,  1.        ,  0.13953908, ...,  0.07071068,
         0.18578764,  0.17027649],
       [ 0.21912525,  0.13661477,  1.        , ...,  0.32071349,
         0.06629935, -0.05801194],
       ...,
       [ 0.        ,  0.06509446,  0.39056673, ...,  1.        ,
         0.19528337, -0.33932831],
       [ 0.06705323,  0.22884643,  0.03713907, ...,  0.0675566 ,
         1.        ,  0.28619987],
       [ 0.08699502,  0.08363054, -0.01652894, ..., -0.08438629,
         0.17203107,  1.        ]])

In [10]:
def get_preds_cf (S, table):
    predictions_cf = S@table.values
    for i,row in enumerate (predictions_cf):
        row/=sum(S[i]) ### Normalizing predictions 
        predictions_cf[i] = row
    preds_cf = pd.DataFrame(predictions_cf)
    preds_cf.columns = table.columns
    preds_cf.index = table.index
    return preds_cf

preds_cf = get_preds_cf (S,table_cf)
preds_cf

movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.175457,0.026222,0.006168,-0.014078,0.000521,0.282429,0.057613,0.002265,-0.022745,0.128560,...,-0.000803,-0.003612,-0.018016,-0.032645,0.010410,0.177255,0.114934,0.016675,0.015605,0.097825
2,0.787743,0.014261,-0.018632,-0.018555,-0.011729,0.279403,0.037690,-0.001139,-0.026887,0.136664,...,0.000895,-0.002627,-0.017986,-0.030622,0.006240,0.151901,0.104893,0.013128,0.011147,0.092499
3,0.899772,0.034263,0.006112,-0.017918,-0.005388,0.327672,0.046054,0.006496,-0.022898,0.203426,...,0.001415,-0.001493,-0.025421,-0.027303,0.008436,0.170716,0.115275,0.016924,0.011583,0.090306
4,0.846890,-0.007251,-0.027475,-0.022539,-0.029163,0.338009,0.006813,-0.001379,-0.026569,0.158538,...,0.010209,-0.003226,-0.020766,-0.028288,0.011060,0.160074,0.125247,0.014401,0.015686,0.087910
5,0.776343,-0.107689,-0.075361,-0.037905,-0.069291,0.095109,-0.050807,-0.009075,-0.028155,-0.068945,...,0.004275,-0.000875,-0.010162,-0.047154,0.015258,0.070934,0.196676,0.010235,0.024723,0.063943
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.722287,-0.053268,-0.047464,-0.033309,-0.039116,0.251068,-0.001792,-0.008599,-0.033229,0.046569,...,-0.000656,-0.002531,-0.017887,-0.041042,0.017448,0.115971,0.148678,0.014849,0.019995,0.089093
6037,0.764117,-0.013395,-0.022628,-0.027022,-0.022515,0.270435,0.018587,-0.004567,-0.026372,0.090154,...,0.002634,-0.001125,-0.015908,-0.030164,0.011347,0.139004,0.125676,0.012495,0.015737,0.085206
6038,1.202499,0.070252,-0.029915,-0.000740,-0.003797,0.290399,0.142487,-0.012429,-0.052542,0.123058,...,0.004829,0.000369,-0.017140,-0.020946,0.011023,0.187950,0.111897,0.020797,0.007125,0.138800
6039,0.875260,-0.002630,-0.022444,-0.018968,-0.010277,0.233873,0.038553,-0.005136,-0.024516,0.101096,...,0.000906,-0.002921,-0.018413,-0.029323,0.014699,0.133172,0.112856,0.014859,0.016633,0.091117


In [11]:
top_dict_cf = build_top (preds_cf)
write_solution('solution_cf', top_dict_cf)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000


# Matrix Factorization (using SVD) 

We are going to implement the matrix factorization using SVD. SVD in matrix factorization efficiently reduces the dimensionality of user-item interaction data while preserving key information, enabling accurate recommendations by capturing latent factors underlying user preferences and item characteristics. Actually, with this approach we have obtained the best score.

In [12]:
table_mf = pd.pivot_table(df, values='rating', index='user_id', columns='movie_id', aggfunc='last')
table_mf.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,-2.0,,,,,...,,,,,,,,,,


In [13]:
#Apply the mean to fill the NaN values (per row)
table1_mf = table_mf.apply(lambda row: row.fillna(row.mean()), axis=1)
table1_mf.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,2.25,2.25,2.25,2.25,2.25,2.25,2.25,2.25,2.25,...,2.25,2.25,2.25,2.25,2.25,2.25,2.25,2.25,2.25,2.25
2,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264,...,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264,0.962264
3,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,...,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
4,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667,...,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667,2.166667
5,-0.035294,-0.035294,-0.035294,-0.035294,-0.035294,-2.0,-0.035294,-0.035294,-0.035294,-0.035294,...,-0.035294,-0.035294,-0.035294,-0.035294,-0.035294,-0.035294,-0.035294,-0.035294,-0.035294,-0.035294


We are going to introduce the core of the algorithm: the SVD decomposition of the user-item table. We can define the number of singular values to consider and the number of svd iterations to perform. A better performance should be obtained for a bigger number of iterations, although we could notice that this was not the real case. This is because the algorithm converges so that films that had just a few repetitions and had a high rating are taking into account more than expected. It matches with the idea of serendipity, but we consider that 1 or 2 ratings are not representetative of the quality of a film. In fact, we got better results for the same rank approximation with only 1 iteration than with more iterations. 

In [14]:
from numpy.linalg import svd
from scipy.sparse.linalg import svds

movie_index = {}
for i, name in enumerate(table1_mf.columns):
    movie_index[name] = i

def svd_mf(k, n_iter):
    tol = 0.01
    delta = 1
    cont = 0
    aux = table1_mf.values

    user_indices = df['user_id'] - 1  # Assuming user_id starts at 1 and we need 0-indexed
    movie_indices = df['movie_id'].map(movie_index)  # Convert movie_id to column index using global movie_index
    positive_values = df['rating'].values

    while delta >= tol:
        if cont == n_iter:
            break
        U, s, Vh = svd(aux, full_matrices=False)
        S = np.diag(np.sqrt(s[:k]))
        Uaux = U[:, :k] @ S
        Vaux = S @ Vh[:k, :]
        aux1 = np.copy(aux)
        aux = Uaux @ Vaux

        # Here we reset the ratings of the watched films
        for user_index, col_index, positive_value in zip(user_indices, movie_indices, positive_values):
            aux[user_index, col_index] = positive_value  
        
        #We decided to measure delta as the highest variation of a element of the table
        delta = np.abs(aux - aux1).max()
#         print(delta)
        cont += 1
    
    preds = pd.DataFrame(aux)
    preds.columns = table_mf.columns
    preds.index = table_mf.index
    preds
    return preds

preds_mf = svd_mf(20,1)
preds_mf

movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.000000,2.280385,2.278081,2.236159,2.274842,2.158643,2.263765,2.250475,2.254814,2.181086,...,2.243816,2.245216,2.249272,2.243641,2.246557,2.254236,2.261953,2.251784,2.257439,2.248871
2,0.928414,0.884402,0.917844,0.903789,0.976297,1.058570,0.883351,0.936827,0.919935,0.636854,...,0.986474,0.955979,0.936246,0.923429,0.924316,1.082832,1.059723,0.954303,0.955156,1.066416
3,1.036849,1.295215,1.384841,1.427677,1.410117,1.480213,1.442687,1.411551,1.365124,1.403508,...,1.411245,1.406343,1.419976,1.403390,1.411028,1.322898,1.364627,1.394593,1.398839,1.381546
4,2.346150,2.170046,2.187859,2.164951,2.179530,2.159283,2.160693,2.162059,2.176624,2.153915,...,2.148253,2.170471,2.161739,2.161145,2.177228,2.121107,2.139899,2.165499,2.167765,2.147147
5,0.806327,-0.102517,-0.095313,-0.015844,-0.132189,-2.000000,-0.125679,-0.026318,0.023957,-0.101722,...,-0.087768,-0.032814,-0.043050,-0.098709,-0.031342,-0.211574,0.018758,-0.031881,-0.007289,-0.105248
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.851874,-0.350734,0.124260,-2.000000,0.374295,-1.000000,0.038542,0.144996,0.129264,-0.738403,...,0.153616,0.212770,0.164135,-0.237643,0.350085,0.068016,1.147728,0.235314,0.333954,0.428959
6037,0.953090,0.861299,0.988179,0.937353,0.954869,0.851069,0.883137,0.975143,1.041285,0.752232,...,0.989191,0.989174,0.941282,0.963942,0.948887,0.962382,1.060192,0.994896,1.016074,0.989637
6038,1.699160,1.538237,1.539706,1.560303,1.553711,1.541939,1.584290,1.539124,1.536983,1.548812,...,1.537471,1.540491,1.556115,1.539414,1.548904,1.498407,1.513463,1.548737,1.539344,1.544623
6039,1.440005,1.314930,1.336673,1.337031,1.380866,1.390225,1.355949,1.333405,1.351406,1.432071,...,1.323689,1.340289,1.337048,1.299325,1.357759,1.301384,1.339247,1.341994,1.358459,1.348331


We approached the problem of recommending films with 1 or 2 ratings only by weighting each rating with the normalized logarithm of the number of users that watched each film. 

In [17]:
# log_counts = np.log(df['movie_id'].value_counts())
# log_counts/=max(log_counts)
# for movie in preds.columns:
#     preds_mf[movie] *= log_counts[movie]
# preds_mf

In [15]:
top_dict_mf = build_top (preds_mf)
write_solution ('solution_mf', top_dict_mf)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000


# Combining both approaches

Finally, we also tried to combine the predictions , by simply calculating the mean of both predicted ratings using Collaborative Filtering and Matrix Factorization. We obtained worse results than for each model independently.

In [18]:
final = (preds_mf.values + preds_cf.values)/2
final = pd.DataFrame(final)
final.columns = table_mf.columns
final.index = table_mf.index
final

movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2.587728,1.153304,1.142125,1.111040,1.137682,1.220536,1.160689,1.126370,1.116035,1.154823,...,1.121506,1.120802,1.115628,1.105498,1.128483,1.215746,1.188444,1.134229,1.136522,1.173348
2,0.858078,0.449331,0.449606,0.442617,0.482284,0.668987,0.460520,0.467844,0.446524,0.386759,...,0.493685,0.476676,0.459130,0.446403,0.465278,0.617367,0.582308,0.483716,0.483151,0.579458
3,0.968310,0.664739,0.695476,0.704880,0.702364,0.903942,0.744370,0.709024,0.671113,0.803467,...,0.706330,0.702425,0.697277,0.688044,0.709732,0.746807,0.739951,0.705759,0.705211,0.735926
4,1.596520,1.081398,1.080192,1.071206,1.075183,1.248646,1.083753,1.080340,1.075027,1.156227,...,1.079231,1.083622,1.070486,1.066428,1.094144,1.140590,1.132573,1.089950,1.091726,1.117528
5,0.791335,-0.105103,-0.085337,-0.026874,-0.100740,-0.952446,-0.088243,-0.017697,-0.002099,-0.085334,...,-0.041746,-0.016845,-0.026606,-0.072932,-0.008042,-0.070320,0.107717,-0.010823,0.008717,-0.020652
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.787080,-0.202001,0.038398,-1.016655,0.167589,-0.374466,0.018375,0.068198,0.048018,-0.345917,...,0.076480,0.105120,0.073124,-0.139342,0.183767,0.091994,0.648203,0.125082,0.176974,0.259026
6037,0.858603,0.423952,0.482775,0.455165,0.466177,0.560752,0.450862,0.485288,0.507456,0.421193,...,0.495912,0.494024,0.462687,0.466889,0.480117,0.550693,0.592934,0.503696,0.515906,0.537421
6038,1.450829,0.804244,0.754896,0.779782,0.774957,0.916169,0.863389,0.763347,0.742221,0.835935,...,0.771150,0.770430,0.769487,0.759234,0.779964,0.843179,0.812680,0.784767,0.773234,0.841711
6039,1.157633,0.656150,0.657114,0.659032,0.685295,0.812049,0.697251,0.664135,0.663445,0.766584,...,0.662297,0.668684,0.659317,0.635001,0.686229,0.717278,0.726051,0.678426,0.687546,0.719724


In [19]:
top_dict_final = build_top (final)
write_solution ('solution_combined', top_dict_final)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
