In [2]:
from pyspark import SparkConf, SparkContext
from pyspark.mllib.recommendation import ALS, Rating

def loadMovieNames():
    movieNames = {}
    with open("/home/jovyan/work/data/ml-100k/u.ITEM", encoding='ascii', errors="ignore") as f:
        for line in f:
            fields = line.split('|')
            movieNames[int(fields[0])] = fields[1]
    return movieNames

conf = SparkConf().setMaster("local[*]").setAppName("MovieRecommendationsALS")
sc = SparkContext.getOrCreate(conf = conf)
sc.setCheckpointDir('checkpoint')

print("\nLoading movie names...")
nameDict = loadMovieNames()

data = sc.textFile("file:///home/jovyan/work/data/ml-100k/u.data")

ratings = data.map(lambda l: l.split()).map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2]))).cache()

# Build the recommendation model using Alternating Least Squares
print("\nTraining recommendation model...")
rank = 10
# Lowered numIterations to ensure it works on lower-end systems
numIterations = 6
model = ALS.train(ratings, rank, numIterations)

userID = int(1)

print("\nRatings for user ID " + str(userID) + ":")
userRatings = ratings.filter(lambda l: l[0] == userID)
for rating in userRatings.collect():
    print (nameDict[int(rating[1])] + ": " + str(rating[2]))

print("\nTop 10 recommendations:")
recommendations = model.recommendProducts(userID, 10)
for recommendation in recommendations:
    print (nameDict[int(recommendation[1])] + \
        " score " + str(recommendation[2]))



Loading movie names...

Training recommendation model...

Ratings for user ID 1:
Three Colors: White (1994): 4.0
Grand Day Out, A (1992): 3.0
Desperado (1995): 4.0
Glengarry Glen Ross (1992): 4.0
Angels and Insects (1995): 4.0
Groundhog Day (1993): 5.0
Delicatessen (1991): 5.0
Hunt for Red October, The (1990): 4.0
Dirty Dancing (1987): 2.0
Rock, The (1996): 3.0
Ed Wood (1994): 4.0
Star Trek: First Contact (1996): 4.0
Pillow Book, The (1995): 5.0
Horseman on the Roof, The (Hussard sur le toit, Le) (1995): 5.0
Star Trek VI: The Undiscovered Country (1991): 4.0
From Dusk Till Dawn (1996): 3.0
So I Married an Axe Murderer (1993): 4.0
Shawshank Redemption, The (1994): 5.0
True Romance (1993): 3.0
Star Trek: The Wrath of Khan (1982): 5.0
Kull the Conqueror (1997): 1.0
Independence Day (ID4) (1996): 4.0
Wallace & Gromit: The Best of Aardman Animation (1996): 5.0
Wizard of Oz, The (1939): 4.0
Faster Pussycat! Kill! Kill! (1965): 1.0
Citizen Kane (1941): 4.0
Silence of the Lambs, The (1991): 4

Angel Baby (1995) score 6.200683291877517
Crooklyn (1994) score 5.885100793173569
Pather Panchali (1955) score 5.810917024989516
Umbrellas of Cherbourg, The (Parapluies de Cherbourg, Les) (1964) score 5.46971988885716
Boys, Les (1997) score 5.447815343942311
Living in Oblivion (1995) score 5.397575049871239
Inspector General, The (1949) score 5.388174694919572
Year of the Horse (1997) score 5.3402621828536105
For Whom the Bell Tolls (1943) score 5.296876821327763
Bread and Chocolate (Pane e cioccolata) (1973) score 5.283362322389284
