# Problem 05
# Mahtab Nejati
# 98209434
## Please download the data from the link below
### https://drive.google.com/drive/folders/1-IwpWHjtZDzpFXUHo5n7M_9ZmMpw_GG5?usp=sharing

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from bs4 import BeautifulSoup
from IPython.display import display

# pd.options.display.max_colwidth = 1000
# pd.options.display.max_rows=100

## Getting the rating dataset in file "./HW3_P05_Data_MahtabNejati_98209434/jester-data-2.xls"

In [2]:
def getRatings(filePath="./HW3_P05_Data_MahtabNejati_98209434/jester-data-2.xls"):
    df = pd.read_excel(filePath,header=None)
    df.index += 1
    df.columns = ['counts']+list(range(1,101))
    df = df.drop(['counts'],axis=1)
    return df

## Normalizing the matrix for calculations

In [3]:
def normalize(matrix):
    matrix[matrix == 99.0] = np.NaN
    means = np.nanmean(matrix,axis=1)
    full_means = np.array([means,]*matrix.shape[1]).transpose()
    matrix = matrix - full_means
    matrix[np.isnan(matrix)]=0
    return matrix

## Getting similarities (user-user and item-item)

In [4]:
def getSimilarities(matrix,axis):
    if axis:
        return cosine_similarity(matrix)
    else:
        return cosine_similarity(matrix.T)

## Getting ready to recommend items

In [5]:
def processData():
    df = getRatings()
    matrix = df.to_numpy(dtype=float).copy()
    similarities = []
    for axis in [0,1]:
        similarities.append(getSimilarities(normalize(matrix),axis))
    item_df = pd.DataFrame(similarities[0],index=df.columns,columns=df.columns)
    user_df = pd.DataFrame(similarities[1],index=df.index,columns=df.index)
    orig_df = df.copy()
    orig_df[orig_df == 99.] = np.NAN
    return item_df,user_df,orig_df

# Making recommendations

In [6]:
def getUserTopK(uid,k,df):
    user_vector = list(dict(df.loc[uid]).items())
    rated_vector = []
    rated = []
    for pair in user_vector:
        if not np.isnan(pair[1]):
            rated_vector.append(pair)
            rated.append(pair[0])
    user_top_k = sorted(rated_vector,key=lambda pair: -pair[1])[:k]
    return rated,user_top_k

def getItemBasedRecom(uid,k,sims,df):
    rated,top_k = getUserTopK(uid,k,df)
    recoms = []
    for item in top_k:
        similars = sims[item[0]]*item[1]
        similars = similars.sort_values(ascending=False)
        count = 0
        for item,score in similars.iteritems():
            if item not in rated:
                recoms.append((item,score))
                count +=1
                if count == k:
                    break
    return recoms

def getUserBasedRecom(uid,k,sims,df):
    rated,top_k = getUserTopK(uid,k,df)
    recoms = []
    similars = sims[uid]
    similars = similars.sort_values(ascending=False)
    countUsers = 0
    for user,sim in similars.iteritems():
        if user != uid:
            user_pref = df.loc[user]*sim
            user_pref = user_pref.sort_values(ascending=False)
            countItems = 0
            for item,score in user_pref.iteritems():
                if item not in rated:
                    recoms.append((item,score))
                    countItems +=1
                    if countItems == k:
                        break
            countUsers += 1
            if countUsers == k:
                break
    return recoms

def getRecoms(uid,k,df,itemSims,userSims):
    rated,top_k = getUserTopK(uid,k,df)
    print('\n'+75*'='+'\nThis user has rated '+str(len(rated))+' jokes.')
    print('Recommentations will be made out of the remaining '+str(100-len(rated))+' jokes.\n'+75*'='+'\n')
    item_based = getItemBasedRecom(uid,k,itemSims,df)
    user_based = getUserBasedRecom(uid,k,userSims,df)
    recoms = item_based+user_based
    rec_df = pd.DataFrame(recoms,columns=['joke_id','score']).sort_values(['score'],ascending=False)
    indexes = rec_df.groupby(['joke_id'])['score'].transform(max) == rec_df['score']
    return rec_df[indexes].head(k)

## Getting joke text

In [7]:
def getJoke(i):
    with open('./HW3_P05_Data_MahtabNejati_98209434/jokes/init'+str(i)+'.html') as f:
        joke = f.read()
    soup = BeautifulSoup(joke)
    body = soup.find('body').text
    joke = body.replace('\n','')
    return joke.strip()

## Main

In [8]:
itemSims,userSims,df = processData()

## Ask for recommendations

In [9]:
again = True
while(again):
    print('\n'+75*'*'+'\n')
    user_id = int(input("Enter the user ID {1...23500}: "))
    k = int(input("Enter the number of recommendations you'ld like to get: "))
    print('\n'+75*'*'+'\n')
    recoms = getRecoms(user_id,k,df,itemSims,userSims)
    items = list(recoms['joke_id'])
    jokes = []
    for i in items:
        joke = getJoke(i)
        jokes.append(joke)
    recoms['joke']=jokes
    display(recoms)
    print('\n\n'+75*'#')
    again = (input("\nWish to continue (y/n): ").lower() == 'y')
    print('\n'+75*'#')


***************************************************************************



Enter the user ID {1...23500}:  23456
Enter the number of recommendations you'ld like to get:  10



***************************************************************************


This user has rated 44 jokes.
Recommentations will be made out of the remaining 56 jokes.



Unnamed: 0,joke_id,score,joke
170,6,4.533079,Bill & Hillary are on a trip back to Arkansas....
110,55,4.485214,"A woman has twins, and gives them up for adopt..."
130,84,4.39545,Q: What is the difference between Mechanical E...
160,28,4.241005,"A mechanical, electrical and a software engine..."
161,1,4.19111,"A man visits the doctor. The doctor says ""I ha..."
131,70,4.091954,"Employer to applicant: ""In this job we need so..."
190,3,3.686994,Q. What's 200 feet long and has 4 teeth? A. Th...
180,59,3.267925,The Chukcha (Russian Eskimo) phones up the Rus...
181,58,3.267925,How many teddybears does it take to change a l...
162,10,3.243121,"Two cannibals are eating a clown, one turns to..."




###########################################################################



Wish to continue (y/n):  y



###########################################################################

***************************************************************************



Enter the user ID {1...23500}:  9876
Enter the number of recommendations you'ld like to get:  30



***************************************************************************


This user has rated 100 jokes.
Recommentations will be made out of the remaining 0 jokes.



Unnamed: 0,joke_id,score,joke




###########################################################################



Wish to continue (y/n):  y



###########################################################################

***************************************************************************



Enter the user ID {1...23500}:  4567
Enter the number of recommendations you'ld like to get:  5



***************************************************************************


This user has rated 38 jokes.
Recommentations will be made out of the remaining 62 jokes.



Unnamed: 0,joke_id,score,joke
41,39,5.517179,What is the difference between men and women:A...
40,70,5.517179,"Employer to applicant: ""In this job we need so..."
42,63,5.28855,"An engineer, a physicist and a mathematician a..."
43,12,4.410133,A guy stood over his tee shot for what seemed ...
44,64,3.273005,What is the rallying cry of the International ...




###########################################################################



Wish to continue (y/n):  n



###########################################################################
