In [15]:
import os
import pandas as pd
from scipy.sparse.linalg import svds
import numpy as np

## Reading data

In [16]:
df_articles=pd.read_csv("newsdata.csv")
df_ratings=pd.read_csv("ratings.csv")

In [17]:
df_articles

Unnamed: 0,articleId,description
0,0,Prince William Marriage: New Biography Claims ...
1,1,WATCH: Exclusive Becoming Fearless Video Series
2,2,15 Things You Should Give Up To Be Happy
3,3,"Lana Kuykendall\, Mom With Flesh-Eating Diseas..."
4,4,Airbus A320 To Offer Extra-Wide Seats For Amer...
...,...,...
10844,10844,RIM CEO Thorsten Heins' 'Significant' Plans Fo...
10845,10845,Maria Sharapova Stunned By Victoria Azarenka I...
10846,10846,"Giants Over Patriots\, Jets Over Colts Among ..."
10847,10847,Aldon Smith Arrested: 49ers Linebacker Busted ...


In [28]:
df_ratings
maxValuesObj = df_ratings.max(axis=0)
print(maxValuesObj[0])


10656


## For better interpretation we pivot the dataframe


In [19]:
from scipy.sparse import csr_matrix
# pivot ratings into article features
df_article_features = df_ratings.pivot(
    index='userId',
    columns='articleId',
    values='viewed'
).fillna(0)

In [20]:

df_article_features

articleId,1,2,3,4,5,6,7,8,9,10,...,175773,175775,175777,175779,175945,175975,176165,176211,176219,176271
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10652,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10654,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10655,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
R = df_article_features.as_matrix()
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

  """Entry point for launching an IPython kernel.


## applying singular value decomposition

In [22]:
from scipy.sparse.linalg import svds
U, sigma, Vt = svds(R_demeaned, k = 50)

In [23]:

sigma = np.diag(sigma)

In [24]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

In [25]:
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = df_article_features.columns)
preds_df.head()

articleId,1,2,3,4,5,6,7,8,9,10,...,175773,175775,175777,175779,175945,175975,176165,176211,176219,176271
0,0.003516,-0.004498,-0.00591,0.002445,0.026904,-0.00589,-0.013607,0.008403,0.003959,-0.013454,...,-0.000391,-0.000391,-0.001209,-0.001209,-0.000391,-0.000391,-0.000862,-0.000391,-0.001054,-0.000391
1,0.213236,-0.009589,0.150765,-0.010701,0.170609,0.182919,0.147508,-0.004078,0.060022,0.086038,...,-0.000529,-0.000529,0.000314,0.000314,-0.000529,-0.000529,0.000901,-0.000529,-0.000358,-0.000529
2,0.063284,-0.007378,-0.00118,0.003322,-0.002715,-0.06603,-0.015043,0.00293,-0.004975,-0.060903,...,9e-06,9e-06,-0.000545,-0.000545,9e-06,9e-06,-0.003212,9e-06,-0.00028,9e-06
3,0.016741,-0.01341,-0.052571,0.002328,-0.036301,0.064221,-0.02425,-0.000991,-0.014872,0.009747,...,-0.000105,-0.000105,-0.000388,-0.000388,-0.000105,-0.000105,4.6e-05,-0.000105,-0.00014,-0.000105
4,0.081302,0.0042,0.010229,0.004434,-0.001037,-0.0517,0.014788,0.008217,-0.001564,-0.041426,...,-0.000122,-0.000122,-0.000719,-0.000719,-0.000122,-0.000122,-0.001225,-0.000122,1.6e-05,-0.000122


## making reccomendations


In [26]:
def recommend_articles(preds_df, userID, articles_df, original_ratings_df, num_recommendations):
    
    # Get and sort the user's predictions
    user_row_number = userID - 1 # UserID starts at 1, not 0
    sorted_user_predictions = preds_df.iloc[user_row_number].sort_values(ascending=False) # UserID starts at 1
    #print(preds_df.iloc[user_row_number])
    #print(sorted_user_predictions)
    # Get the user's data and merge in the article information.
    user_data = original_ratings_df[original_ratings_df.userId == (userID)]
    user_full = (user_data.merge(articles_df, how = 'left', left_on = 'articleId', right_on = 'articleId').
                     sort_values(['viewed'], ascending=False)
                 )
    #print("user {0} is similar to these users".format(userID))
    #print(user_full)
    print ('User {0} has already viewed {1} articles.'.format(userID, user_full.shape[0]))
    #print ('Recommending highest {0} predicted articles not already viewed.'.format(num_recommendations))
           #left_on = 'articleId',right_on = 'articleId').
# merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left').rename(columns = {user_row_number: 'Predictions'}).
    # Recommend the highest predicted rating articles that the user hasn't seen yet.
    recommendations = (articles_df[~articles_df['articleId'].isin(user_full['articleId'])]).merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left', left_on = 'articleId',
               right_on = 'articleId').rename(columns = {user_row_number: 'Predictions'}).sort_values('Predictions', ascending = False).iloc[:num_recommendations, :-1]
                      

    return user_full, recommendations

In [30]:
print("for how many users you have to recommend")
numberofusers=int(input())
for i in range(numberofusers):
    print("enter the userid ")
    userid=int(input())
    if(userid>maxValuesObj[0]):
        print("User id is not in the list,provide userid from 1 to ")
    else:
        print("How many recommendations you want to give for user{0}".format(userid))
        noofreccomendations=int(input())
        already_rated, predictions = recommend_articles(preds_df,userid, df_articles, df_ratings,noofreccomendations)
    #print(already_rated.head(5)) ##artices viewed by user
        print("**Here are the predictions**")
        print(predictions)

for how many users you have to recommend
1
enter the userid 
10655
How many recommendations you want to give for user10655
5
User 10655 has already viewed 720 articles.
**Here are the predictions**
      articleId                                        description
351         377  Preakness Winner I'll Have Another in Live Bid...
854         912  Soul-Talk: Let Go of Toxicity and Restore Bala...
1126       1214      Taste Test: The Best Mint Chocolate Ice Cream
733         780  Gas Prices In U.S. Are Among Lowest In World\,...
3300       3578                 Would You Sell Your Wedding Dress?
