In [1]:
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession.builder.appName("RatingsJoin").getOrCreate()

In [3]:
usernames = spark.read.csv(
    'random_user_id.csv',
    sep=",",
    header=True,
    quote='"',
    encoding="UTF-8",
    inferSchema=True,)

In [4]:
ratings = spark.read.csv(
    'DatafinitiElectronicsProductData02.csv',
    sep=",",
    header=True,
    quote='"',
    encoding="UTF-8",
    inferSchema=True,)

In [5]:
usernames.printSchema()

root
 |-- gender: string (nullable = true)
 |-- name.title: string (nullable = true)
 |-- name.first: string (nullable = true)
 |-- name.last: string (nullable = true)
 |-- location.street.number: integer (nullable = true)
 |-- location.street.name: string (nullable = true)
 |-- location.city: string (nullable = true)
 |-- location.state: string (nullable = true)
 |-- location.country: string (nullable = true)
 |-- location.postcode: string (nullable = true)
 |-- location.coordinates.latitude: double (nullable = true)
 |-- location.coordinates.longitude: double (nullable = true)
 |-- location.timezone.offset: string (nullable = true)
 |-- location.timezone.description: string (nullable = true)
 |-- email: string (nullable = true)
 |-- login.uuid: string (nullable = true)
 |-- login.username: string (nullable = true)
 |-- login.password: string (nullable = true)
 |-- login.salt: string (nullable = true)
 |-- login.md5: string (nullable = true)
 |-- login.sha1: string (nullable = true)
 

In [6]:
usernames = usernames.select("`name.title`","`name.first`","`login.username`","`registered.date`")

In [7]:
ratings.printSchema()

root
 |-- id: string (nullable = true)
 |-- asins: string (nullable = true)
 |-- brand: string (nullable = true)
 |-- categories: string (nullable = true)
 |-- colors: string (nullable = true)
 |-- dateAdded: timestamp (nullable = true)
 |-- dateUpdated: timestamp (nullable = true)
 |-- dimension: string (nullable = true)
 |-- ean: decimal(20,0) (nullable = true)
 |-- imageURLs: string (nullable = true)
 |-- keys: string (nullable = true)
 |-- manufacturer: string (nullable = true)
 |-- manufacturerNumber: string (nullable = true)
 |-- name: string (nullable = true)
 |-- primaryCategories: string (nullable = true)
 |-- reviews_date: timestamp (nullable = true)
 |-- reviews_dateSeen: string (nullable = true)
 |-- reviews_doRecommend: boolean (nullable = true)
 |-- reviews_numHelpful: integer (nullable = true)
 |-- reviews_rating: integer (nullable = true)
 |-- reviews_sourceURLs: string (nullable = true)
 |-- reviews_text: string (nullable = true)
 |-- reviews_title: string (nullable = 

In [8]:
ratings = ratings.select("id","manufacturer","reviews_title","reviews_username","reviews_rating","reviews_username")

In [9]:
opinions = ratings.join(usernames, ratings["reviews_username"] == usernames["`login.username`"])

In [12]:
opinions.select("id","manufacturer","reviews_title","reviews_username","reviews_rating","reviews_username","`name.title`","`name.first`", "`registered.date`").show()

+--------------------+------------+--------------------+--------------------+--------------+--------------------+------------+----------+--------------------+
|                  id|manufacturer|       reviews_title|    reviews_username|reviews_rating|    reviews_username|  name.title|name.first|     registered.date|
+--------------------+------------+--------------------+--------------------+--------------+--------------------+------------+----------+--------------------+
|AVpf3txeLJeJML43FN82|   Microsoft|Love the fingerpr...|     angryladybug494|             5|     angryladybug494|          Ms|   Addison|2007-06-01 12:31:...|
|AVpf3txeLJeJML43FN82|   Microsoft|                Nice|       greenzebra746|             4|       greenzebra746|          Mr|  Zinedine|2016-08-30 08:41:...|
|AVpf3txeLJeJML43FN82|   Microsoft|                 New|         redzebra452|             4|         redzebra452|          Mr|     Blake|2009-09-16 05:29:...|
|AVpf3txeLJeJML43FN82|   Microsoft|       Nice

In [12]:
opinions.show()

+--------------------+------------+--------------------+--------------------+--------------+--------------------+------------+----------+--------------------+
|                  id|manufacturer|       reviews_title|    reviews_username|reviews_rating|    reviews_username|  name.title|name.first|      login.username|
+--------------------+------------+--------------------+--------------------+--------------+--------------------+------------+----------+--------------------+
|AVpf3txeLJeJML43FN82|   Microsoft|Love the fingerpr...|     angryladybug494|             5|     angryladybug494|          Ms|   Addison|     angryladybug494|
|AVpf3txeLJeJML43FN82|   Microsoft|                Nice|       greenzebra746|             4|       greenzebra746|          Mr|  Zinedine|       greenzebra746|
|AVpf3txeLJeJML43FN82|   Microsoft|                 New|         redzebra452|             4|         redzebra452|          Mr|     Blake|         redzebra452|
|AVpf3txeLJeJML43FN82|   Microsoft|       Nice