# Task 1: Taste Prediction

In [602]:
import pandas as pd
from numpy import array, matmul, dot, zeros, ones, eye, diag, polyfit, transpose, inf, arccos
from numpy.linalg import solve, norm, det, eig, eigh, eigvals, eigvalsh, inv, lstsq, svd, matrix_power,matrix_rank
from numpy.random import randn, rand, randint, seed
from math import pi, sin, cos, degrees

#Load all known values in vectors
iva = [2, np.NaN, np.NaN, 2, 1, np.NaN, 1, np.NaN]
emo = [2,1,2,2,1,2,1,-1]
momo = [2,2,2,-1,2,2,-1,-1]
svetlana = [1,1,-2,-1,1,2,-1,1]
teddie = [-1,2,-2,-1,2,2,1,1]
martina = [-1,2,1,1,-1,2,1,-1]

# List of all items to be rated
dishes = ['Stroopwaffle', 'Tuna Subway', 'Kebab', 'Appelflap',
          'Hamkaas', 'Pancakes', 'GEWIS Sandwich', 'Dry Crackers']

dishes_missing = ['Tuna Subway', 'Kebab', 'Pancakes', 'Dry Crackers']

# List of all raters
users = ['Iva', 'Emo', 'Momo', 'Svetlana', 'Teddie', 'Martina']

# Create matrix
matrix = [iva, emo,momo,svetlana,teddie,martina]

# Make matrix into dataframe
df = pd.DataFrame(matrix)

# Set users as index
index = pd.Index(users)
df = df.set_index(index)

df.columns = dishes
df

Unnamed: 0,Stroopwaffle,Tuna Subway,Kebab,Appelflap,Hamkaas,Pancakes,GEWIS Sandwich,Dry Crackers
Iva,2,,,2,1,,1,
Emo,2,1.0,2.0,2,1,2.0,1,-1.0
Momo,2,2.0,2.0,-1,2,2.0,-1,-1.0
Svetlana,1,1.0,-2.0,-1,1,2.0,-1,1.0
Teddie,-1,2.0,-2.0,-1,2,2.0,1,1.0
Martina,-1,2.0,1.0,1,-1,2.0,1,-1.0


In [619]:
# Get global variable name
def var_name(variable):
    for name in globals():
        if eval(name) == variable:
            return name

# Get dataframe with columns where we know all ratings
def known_matrix(df):
    
    known_df = df.copy()
      
    for index, col in known_df.items():
        nanValues = col.isnull().any().any()
        if nanValues:
            known_df.drop(columns = [index], inplace = True)
    return known_df

# Compute the dot product, norm, and finally, angle between two vectors
def compute_angle(v1, v2):
    if norm(v1) == 0:
        angle = inf
    else:
        angle = arccos(dot(v1,v2)/(norm(v1)*norm(v2)))
    #angles[var_name(v1)] = angle
    return degrees(angle)

# Get list of all angles between a certain vector and the others in the dataframe
def get_all_angles(known_df):
    iva_vec = known_df.iloc[0].values
    angles = []
    for i in range(1, len(known_df.index)):
        vec = known_df.iloc[i].values
        angle = 0 if degrees(compute_angle(iva_vec, vec)) < 0.1 else degrees(compute_angle(iva_vec, vec))
        angles.append(angle)
    
    return angles  

def format_matrix(df_matrix):
    col_len = len(df_matrix.columns)
    row_len = len(df_matrix)
    cols = range(0, col_len)
    rows = range(0, row_len)
    over_mat = ""
    for i in cols:
        over_mat += (str(i + 1) + " & ")
    for i in rows:
        for k in cols:
            curr = str(round(df_matrix.loc[i,k], 5))
            if k != (row_len - 1) and k != 0:
                over_mat += (curr + " & ")
            elif k != (row_len - 1):
                over_mat += (str(rows[i] + 1) + " & " + curr + " & ")
            else:
                over_mat += (curr + "\\\\ \n")
    print(over_mat)

# Predict Missing Ratings Using Angles between Vectors

In [604]:
# Get dataframe with only columns with known values
df_known = known_matrix(df)

# Set users as index
index = pd.Index(users)
df_known = df_known.set_index(index)

# Create dataframe with original angles between Iva and everyone
df_angles = pd.DataFrame(columns = users[1:], index = [0])
angles_list = get_all_angles(df_known)
df_angles.iloc[0] = angles_list
df_angles

Unnamed: 0,Emo,Momo,Svetlana,Teddie,Martina
0,0,72.542397,90.0,96.864566,90.0


In [605]:
values = [-2, -1, 1, 2]

# Copy of original dataframe where final rating guesses will be stored 
df_guess = df.copy()
df_angles_guess = pd.DataFrame(columns = ['sum'], index = [0])
df_angles_guess['sum'] = df_angles.sum(axis = 1)
index = 0

for item in dishes_missing:
    
    df_test = df_guess.copy()
    angles_test = df_angles.copy()
    angles_test['rating'] = None
    
    for i in values:
        
        # Add rating value currently being tested
        df_test.loc['Iva', item] = i
        
        # Get new maturix with known ratings
        known_test = known_matrix(df_test)
        
        # Get list of new angles including new rating
        angles_test_list = get_all_angles(known_test)
        
        # Append currently guess rating to list of angles
        angles_test_list.append(i)
        
        # Add angles to dataframe with guess ratings for current item
        angles_test.loc[len(angles_test)] = angles_test_list
        
    # Sum all angles for each rating guess
    angles_test['sum'] = angles_test.sum(axis = 1).to_frame()
    
    # Sum of original angles between Iva and everyone
    og_sum = df_angles_guess.loc[index, 'sum']
    index += 1
    
    # Get closest sum value from dataframe to original angle sum
    app = angles_test.loc[1:].copy()
    df_closest = app.iloc[(app['sum']-og_sum).abs().argsort()[:1]]
    
    # Get rating corresponding to closest sum
    rating = df_closest.iloc[0]['rating']
    df_angles_guess.loc[index, 'sum'] = df_closest.iloc[0]['sum']

    # Add this final guess rating to df_guess dataframe
    df_guess.loc['Iva', item] = rating
    
    
df_guess


Unnamed: 0,Stroopwaffle,Tuna Subway,Kebab,Appelflap,Hamkaas,Pancakes,GEWIS Sandwich,Dry Crackers
Iva,2,1.0,2.0,2,1,1.0,1,-1.0
Emo,2,1.0,2.0,2,1,2.0,1,-1.0
Momo,2,2.0,2.0,-1,2,2.0,-1,-1.0
Svetlana,1,1.0,-2.0,-1,1,2.0,-1,1.0
Teddie,-1,2.0,-2.0,-1,2,2.0,1,1.0
Martina,-1,2.0,1.0,1,-1,2.0,1,-1.0


# Predict Missing Ratings Using Low-Rank Matrices and TSVD

In [598]:
import numpy as np
 
# Create rating matrix
iva_0 = iva = np.array([2, 0, 0, 2, 1, 0, 1, 0])
rating_matri = [iva_0, emo, momo, svetlana, teddie, martina]
rating_matrix = np.transpose(rating_matri)

# Calculate SVD
U, D, VT = np.linalg.svd(rating_matrix, full_matrices = False)
 
# Check if original matrix can be remade using U, D, VT
remake = (U @ np.diag(D) @ VT)

DD = np.delete(np.diag(D), [2, 3, 4, 5], axis = 0)
D_tsvd = np.array([[6.43400785, 0], [0, 5.08272093]])
#dd = np.array([[6.43400785, 0, 0, 0, 0], [0, 5.08272093, 0, 0, 0], [0, 0, 3.80081031, 0, 0], [0, 0, 0, 3.07016882, 0], [0, 0, 0, 0, 1.22239468]])

U_tsvd = np.delete(U, [2, 3, 4, 5], 1)
V_tsvd = np.delete(VT, [2, 3, 4, 5], 0)

In [599]:
df_rating = pd.DataFrame(rating_matrix)
df_rating

Unnamed: 0,0,1,2,3,4,5
0,2,2,2,1,-1,-1
1,0,1,2,1,2,2
2,0,2,2,-2,-2,1
3,2,2,-1,-1,-1,1
4,1,1,2,1,2,-1
5,0,2,2,2,2,2
6,1,1,-1,-1,1,1
7,0,-1,-1,1,1,-1


In [600]:
tsvd_remake = (U_tsvd @ D_tsvd @ V_tsvd)
df_tsvd = pd.DataFrame(tsvd_remake)
df_tsvd = df_tsvd.round(2)
df_tsvd

Unnamed: 0,0,1,2,3,4,5
0,0.85,1.9,1.52,-0.11,-0.2,0.87
1,0.38,1.31,1.97,1.41,1.71,0.83
2,1.15,2.22,1.02,-1.33,-1.71,0.83
3,0.79,1.42,0.45,-1.18,-1.5,0.48
4,0.21,0.89,1.54,1.28,1.56,0.62
5,0.51,1.71,2.46,1.67,2.02,1.06
6,0.26,0.52,0.31,-0.2,-0.26,0.21
7,-0.62,-1.21,-0.6,0.66,0.85,-0.46


In [601]:
#format_matrix(df_tsvd)

In [606]:
# Find most basic and most extreme taste

iva = [2, 2, 1, 1]
emo = [2,2,1,1]
momo = [2,-1,2,-1]
svetlana = [1,-1, 1,-1]
teddie = [-1,-1,2,1]
martina = [-1,1,-1,1]

iva_emo = 0
iva_momo = 72.54
iva_svetlana = 90
iva_teddie = 96.86
iva_martina = 90

iva_ang = (0 + 72.54 + 180 + 96.86)/5
emo_ang = (compute_angle(emo, momo) + compute_angle(emo, svetlana) + compute_angle(emo, teddie) + compute_angle(emo, martina) + compute_angle(emo, iva))/5
momo_ang = (compute_angle(momo, svetlana) +  compute_angle(momo, teddie) + compute_angle(momo, martina) + compute_angle(momo, iva) + compute_angle(momo, emo))/5
svetlana_ang = (compute_angle(svetlana, teddie) + compute_angle(svetlana, martina) + compute_angle(svetlana, iva) + compute_angle(svetlana, emo) + compute_angle(svetlana, momo))/5
teddie_ang = (compute_angle(teddie, martina) + compute_angle(teddie, momo) + compute_angle(teddie, emo) + compute_angle(teddie, iva) + compute_angle(teddie, svetlana))/5
martina_ang = (compute_angle(teddie, martina) + compute_angle(martina, momo) + compute_angle(martina, emo) + compute_angle(martina, iva) + compute_angle(martina, svetlana))/5
              
print(iva_ang, emo_ang, momo_ang, svetlana_ang, teddie_ang, martina_ang)
summ = iva_ang + emo_ang + momo_ang + svetlana_ang + teddie_ang + martina_ang
average_vector = summ/5
average_vector