In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%precision 4

'%.4f'

# Code for Hypothetical models and theta values for user

In [26]:
#Cleaning data
#Dropping rows where impression_time<=0; because without seeing the user can't take any swipe action
#Dropping rows where impression_time >= 90000ms; this will be done because almost 99% values are in this range(<100000ms).

def cleanAndNormalizeData(userDataframe):
    indexNames = userDataframe[(userDataframe['impression_time'] <= 0) | (userDataframe['impression_time'] > 90000)].index
    modifiedData= userDataframe.drop(indexNames, axis=0)
    modifiedData['swipe_direction'].replace('SWIPED_OUT',-1,inplace = True)
    modifiedData['swipe_direction'].replace('SWIPED_IN',1,inplace = True)
    
    modifiedNormalizedData = getZScore(modifiedData)
    
    return modifiedNormalizedData

In [3]:
#Using Z-score normaliztion to normalize the impression_time column

def getZScore(modifiedData):
    z_score = (modifiedData['impression_time'] - modifiedData['impression_time'].mean())/modifiedData['impression_time'].std()
    modifiedData['z_score'] = z_score
    return modifiedData

In [172]:
#initializing random rating
def randomRating(cleanedData):
    cleanedData['rating'] = np.random.randint(4000,5000,cleanedData.shape[0])
    cleanedData['rating'] = cleanedData['rating']/10000  
        
    return cleanedData

In [168]:
def getOptimalTheta(iniData):
    iniData['bias_unit'] = 1
    iniTheta = [[0],[0],[0]]
    alpha = 0.01
    num_iters = 1500
    optimal =  []
    
    for user in userList:
        tempItems = iniData.loc[iniData['user_id'] == user]
        userItems = tempItems[['bias_unit','swipe_direction','z_score','rating']]
        featureVec = userItems.iloc[:,[0,1,2]].values
        resultVec = userItems.iloc[:,[3]].values
        optimalTheta = gradientDescent(featureVec,resultVec,iniTheta,alpha,num_iters)

In [169]:
#calculating optimal theta value
def gradientDescent(featureVec, resultVec, theta, alpha, num_iters):
    m = resultVec.size
    J_history = np.zeros(num_iters)
    
    for iter in np.arange(num_iters):
        hypothesis = featureVec.dot(theta)
        theta = theta - alpha*(1/m)*(featureVec.T.dot(hypothesis-resultVec))
    return(theta)

In [170]:
#calculating the cost for predected value and original value
def computeCost(featureVec, resultVec, theta):
    m = resultVec.size
    J = 0
    
    hypothesis = featureVec.dot(theta)
    
    J = 1/(2*m)*np.sum(np.square(hypothesis-resultVec))
    
    return(J)

In [178]:
#loading data from data.csv file
userDframe = pd.read_csv('data.csv')

cleanedData = cleanAndNormalizeData(userDframe)
#print(cleanedData['rating'])
iniData = randomRating(cleanedData)

userList = list(set(iniData['user_id']))
dealList = list(sorted(set(iniData['deal_id'])))

iniData.to_csv('normalizedData.csv', encoding='utf-8', index=False)

getOptimalTheta(iniData)

saveRecommendationList()

# Code for getting recommended deals/cards

In [177]:
df = pd.read_csv('normalizedData.csv',usecols=[0,3,6], names=['user_id', 'deal_id', 'rating'],header=0)
data = pd.pivot_table(df, index='deal_id', columns='user_id', values='rating')
data.head()
uu_sim = data.corr()

In [176]:
#function for getting the neighbours for the user
def get_top_users(uu_sim,target,n=5):
    target_cor = uu_sim.loc[target]
    top_neighbors = target_cor.nlargest(n+1).iloc[1:]
    return top_neighbors
    

In [175]:
#function for generating user's recommendation list
def get_user_deal_score(deal,user):
    neighbors = get_top_users(uu_sim,user)
    rating_sum = 0
    weight_sum = 0
    for user,w in zip(neighbors.index,neighbors.values):
        if np.isnan(deal[user]):
            continue
        rating_sum += deal[user] * w
        weight_sum += w
    if weight_sum == 0:
        return 0
    else:
        return rating_sum/weight_sum

In [174]:
#this function is saving the data in a file.
def saveRecommendationList():
    predictedTable = pd.DataFrame(columns=['user_id', 'deal_id', 'predicted_value'])

    for user in userList:
        table = pd.DataFrame()
        for i in range(len(userList)):    
            predict = data.apply(get_user_deal_score,axis=1,args=(user,))
            predictValue = predict.tolist()
            table = pd.DataFrame({'user_id': user,
                                  'deal_id': dealList,
                                  'predicted_value': predictValue})
        predictedTable = pd.concat([predictedTable,table])    
    
    predictedTable.to_csv('recommendationList.csv', encoding='utf-8', index=True)