<a href="https://colab.research.google.com/github/smanishs175/MovieRecommendationSystemUsingALSModel/blob/main/MovieRecommendationUsingBDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://www-us.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop2.7.tgz
!tar xf spark-3.0.1-bin-hadoop2.7.tgz
!pip install -q findspark

In [None]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-3.0.1-bin-hadoop2.7"

In [None]:
import findspark
findspark.init()
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()

In [None]:

from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.sql import Row
from pyspark.sql.functions import lit


def loadMovieNames():
    movieNames = {}
    with open("/content/u.item.txt") as f:
        for line in f:
            fields = line.split('|')
            movieNames[int(fields[0])] = fields[1]
    return movieNames

In [None]:

def parseInput(line):
    fields = line.value.split()
   
    return Row(userID = int(fields[0]), movieID = int(fields[1]), rating = float(fields[2]))


if __name__ == "__main__":
    
    spark = SparkSession.builder.appName("MovieRecs").getOrCreate()

   
    movieNames = loadMovieNames()

   
    lines = spark.read.text("/content/u.data.txt").rdd

    
    ratingsRDD = lines.map(parseInput)

    
    ratings = spark.createDataFrame(ratingsRDD).cache()

   
    als = ALS(maxIter=5, regParam=0.01, userCol="userID", itemCol="movieID", ratingCol="rating")
    
    model = als.fit(ratings)
   
    print("\nRatings for user ID 0:")
   
    userRatings = ratings.filter("userID = 0")
   
    for rating in userRatings.collect():
        print(movieNames[rating['movieID']], rating['rating'])



Ratings for user ID 0:
Star Wars (1977) 5.0
Empire Strikes Back, The (1980) 5.0
Gone with the Wind (1939) 1.0


In [None]:
   
print("\nTop 20 recommendations:")
   
   
ratingCounts = ratings.groupBy("movieID").count().filter("count > 100")
    
   
popularMovies = ratingCounts.select("movieID").withColumn('userID', lit(0))

    
recommendations = model.transform(popularMovies)

    
topRecommendations = recommendations.sort(recommendations.prediction.desc()).take(20)

for recommendation in topRecommendations:
    print (movieNames[recommendation['movieID']], recommendation['prediction'])

spark.stop()




Top 20 recommendations:
Hudsucker Proxy, The (1994) 6.200141429901123
Jackie Chan's First Strike (1996) 6.188178062438965
Princess Bride, The (1987) 6.108372211456299
Army of Darkness (1993) 5.962574481964111
Beavis and Butt-head Do America (1996) 5.776404857635498
Die Hard: With a Vengeance (1995) 5.46513557434082
Swingers (1996) 5.37847900390625
Highlander (1986) 5.347123622894287
Wrong Trousers, The (1993) 5.3443074226379395
Blues Brothers, The (1980) 5.33856201171875
Rumble in the Bronx (1995) 5.317055702209473
Die Hard (1988) 5.171832084655762
Great Escape, The (1963) 5.168797969818115
Alien (1979) 5.113796234130859
Terminator, The (1984) 5.036183834075928
Chasing Amy (1997) 5.011901378631592
Ace Ventura: Pet Detective (1994) 5.002049922943115
Empire Strikes Back, The (1980) 4.995321750640869
Fish Called Wanda, A (1988) 4.9705986976623535
Raiders of the Lost Ark (1981) 4.967935562133789
