In [1]:
from pyspark.sql import SparkSession

In [2]:
sparkSession = SparkSession.builder.enableHiveSupport().master("local").getOrCreate()

In [3]:
from pyspark.sql import Window
from pyspark.sql.functions import row_number, sum, col, abs, count, desc, asc

In [4]:
data = sparkSession.read.parquet("/data/sample264")
meta = sparkSession.read.parquet("/data/meta")

# Graph based Music Recommender. Task 5

For the user with Id 776748 find all the tracks and artists connected to him. Use original dataframe not a normalized one. Sort found items first by artist then by name in ascending order, leave only columns ”Artist” and “Name” and print top-40.

Each output line can take one of the following forms:

1. ```Artist: <artist-name> <track-name>```
1. ```Artist: <artist-name> Artist: <artist-name>```

These two forms help distinguish “user-track” suggestions (as shown in 1) from “user-artist” suggestions (as shown in 2).

The part of the result on the sample dataset:

    ...
    Artist: Blur Artist: Blur
    Artist: Blur Girls and Boys
    Artist: Clawfinger Artist: Clawfinger
    Artist: Clawfinger Nothing Going On
    Artist: Disturbed Artist: Disturbed
    ...

For all subtasks use the same ipython notebook, each subtask should be the continuation of the previous

In [23]:
user_data = data.filter(col("userId") == 776748)
tracks  = user_data.groupBy(col('trackId').alias('id')).count()
artists = user_data.groupBy(col('artistId').alias('id')).count()

users_top = (tracks.union(artists)
           .join(meta, on="id")
           .orderBy(col('Artist'), col('Name'))
           ).cache()

In [26]:
results = (users_top
           .select(col('Artist'), col('Name'))
           .orderBy(col('Artist'), col('Name'))
           .limit(40)
          ).cache()

for a, n in results.collect():
    print("{} {}".format(a,n))

Artist: 3 Doors Down Artist: 3 Doors Down
Artist: 3 Doors Down Kryptonite
Artist: 311 Artist: 311
Artist: 311 Beautiful disaster
Artist: Blur Artist: Blur
Artist: Blur Girls and Boys
Artist: Clawfinger Artist: Clawfinger
Artist: Clawfinger Nothing Going On
Artist: Disturbed Artist: Disturbed
Artist: Disturbed The Vengeful One
Artist: Gotthard Artist: Gotthard
Artist: Gotthard Eagle
Artist: Green Day 21 Guns
Artist: Green Day Artist: Green Day
Artist: Green Day Kill The DJ
Artist: Iggy Pop Artist: Iggy Pop
Artist: Iggy Pop Sunday
Artist: Korn Artist: Korn
Artist: Korn Here To Stay
Artist: Linkin Park Artist: Linkin Park
Artist: Linkin Park In The End
Artist: Linkin Park Numb
Artist: Lordi Artist: Lordi
Artist: Lordi Hard Rock Hallelujah
Artist: Nickelback Artist: Nickelback
Artist: Nickelback She Keeps Me Up
Artist: Nomy Artist: Nomy
Artist: Nomy Cocaine
Artist: Papa Roach Artist: Papa Roach
Artist: Papa Roach Getting Away With Murder
Artist: Rise Against Artist: Rise Against
Artist: Ri