In [48]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy.spatial.distance import euclidean

def preprocess_data(data):
    numeric_data = data.copy()
    for column in numeric_data.columns:
        if numeric_data[column].dtype == 'object':
            le = LabelEncoder()
            numeric_data[column] = le.fit_transform(numeric_data[column].astype(str))
    
    scaler = StandardScaler()
    return pd.DataFrame(
        scaler.fit_transform(numeric_data),
        columns=numeric_data.columns
    )

def calculate_similarity(vector1, vector2):
    distance = euclidean(vector1, vector2)
    return np.exp(-distance)

def get_recommendations(user_profile, train_data, k=5):
    similarities = np.array([
        calculate_similarity(
            user_profile.iloc[0].values,
            train_data.iloc[i].values
        )
        for i in range(len(train_data))
    ])
    
    similar_indices = np.argsort(similarities)[-k:]
    return train_data.iloc[similar_indices], similarities[similar_indices]

def fill_test_values(test_data, train_data):
    filled_predictions = test_data.copy()
    
    for idx in range(len(test_data)):
        test_user = test_data.iloc[[idx]]
        similar_users, similarities = get_recommendations(test_user, train_data)
        weights = similarities / np.sum(similarities)
        weighted_predictions = np.average(similar_users, weights=weights, axis=0)
        filled_predictions.iloc[idx] = np.round(weighted_predictions)
    
    return filled_predictions

## Load

In [49]:
train = pd.read_csv("../data/train/user_item.csv")
train.head()

Unnamed: 0,profile_id,offer_0,offer_1,offer_2,offer_3,offer_4,offer_5,offer_6,offer_7,offer_8,offer_9
0,86becbd667a94db3a3dee0854470de7c,0,0,0,4,0,0,0,3,6,2
1,676ee3fbf66b46078484ecaa99bc8d1a,4,0,0,0,0,0,0,0,3,4
2,fada060561c24d4a984cc6eba6e2a63a,0,4,0,0,0,0,0,0,0,2
3,5cd3b3a0e5284df1adf8cca2f59ed28f,0,4,12,0,0,0,0,3,0,0
4,8a6a2df8be214007991afb612eb64c1c,0,0,0,0,0,3,3,0,0,0


In [50]:
test = pd.read_csv("../data/test/user_item.csv")
test.head()

Unnamed: 0,profile_id,offer_0,offer_1,offer_2,offer_3,offer_4,offer_5,offer_6,offer_7,offer_8,offer_9
0,b19c8e7ac2ff40ae92b4fcf3247f8912,0,4,4,0,3,0,0,0,0,0
1,0b680efe1a0a40788ebb6fb2c587b4a7,0,0,0,0,3,0,0,3,0,0
2,9232bc9e68744227bdcc537e44d159f7,0,4,0,4,0,0,0,3,3,2
3,6e7d42fc10ee466c80a4056b3ec0b072,0,4,0,4,0,3,0,3,0,0
4,72257b80d8c1407ead2b3af3e7891c25,0,4,0,4,0,0,3,3,3,0


## Transform

In [51]:
# Preprocess data
train_processed = preprocess_data(train)
test_processed = preprocess_data(test)

## Predict

In [None]:
# Make predictions
predictions = fill_test_values(test_processed, train_processed)

## Save