# Big Data Exam Report @ UniBo a.y. 2023/2024

- Manuel Andruccioli
- Kelvin Olaiya

In [None]:
%%configure -f
{"executorMemory":"8G", "numExecutors":3, "executorCores":3, "conf": {"spark.dynamicAllocation.enabled": "false"}}

## Data structures and definitions

### Utility function for parsing

In [None]:
def getCharIndexes(line: String, char: Char): Seq[Int] = line.zipWithIndex.filter(_._1 == char).map(_._2) 
def splitAt(s: String, indices: Seq[Int]): Seq[String] = indices match {
  case h +: t => s.splitAt(h) match {
    case (a, b) => a +: splitAt(b, t.map(_ - h))
  }
  case Nil => Seq(s)
}
def parseCSVLine(l: String): Seq[String] = {
  val apices = getCharIndexes(l, '"').grouped(2).map { case Seq(a, b) => (a, b) }.toSeq
  val commas = getCharIndexes(l, ',').filter(i => !apices.exists { case (a, b) => a < i && i < b })
  return splitAt(l, commas).map(_.dropWhile(s => s == ',' || s == ' ')).map(_.replaceAll("^\"|\"$", ""))
}

In [None]:
def unpersistRDDs() = sc.getPersistentRDDs.foreach(_._2.unpersist())

### Data structures

In [None]:
case class Track(
  uri: String,
  name: String,
  duration: Int,
  explicit: Boolean,
  artists: String,            // List of artists uri, separated by |
  available_markets: String,  // List of markets, separated by |
  album_uri: String,
  popularity: Int,
)

object Tracks {
  def fromCSVLine(line: String): Option[Track] = 
    parseCSVLine(line) match {
      case Seq(uri, name, duration, explicit, artists, available_markets, album_uri, popularity) =>
        try {
          Some(Track(uri, name, duration.toInt, explicit.toBoolean, artists, available_markets, album_uri, popularity.toInt))
        } catch {
          case _: Throwable => None
        }
    }
}

case class Playlist(
  pid: Int,
  name: String,
  num_follower: Int,
)

object Playlists {
  def fromCSVLine(line: String): Option[Playlist] = 
    parseCSVLine(line) match {
      case Seq(pid, name, num_follower) =>
        try {
            Some(Playlist(pid.toInt, name, num_follower.toInt))
        } catch {
          case _: Throwable => None
        }
    }
}

case class TrackInPlaylist(
  pid: Int,
  track_uri: String,
  pos: Int,
)

object TrackInPlaylists {
  def fromCSVLine(line: String): Option[TrackInPlaylist] = 
    parseCSVLine(line) match {
      case Seq(pid, track_uri, pos) =>
        try {
          Some(TrackInPlaylist(pid.toInt, track_uri, pos.toInt))
        } catch {
          case _: Throwable => None
        }
    }
}

case class Artist(
  uri: String,
  name: String,
  followers: Int,
  genres: String,             // List of genres, separated by |
  popularity: Int,
)

object Artists {
  def fromCSVLine(line: String): Option[Artist] =
    parseCSVLine(line) match {
      case Seq(uri, name, followers, genres, popularity) =>
        try {
          Some(Artist(uri, name, followers.toInt, genres, popularity.toInt))
        } catch {
          case _: Throwable => None
        }
    }
}

case class Album(
  uri: String,
  name: String,
  album_type: String,         // album, compilation, single.
  artists: String,            // List of artists uri, separated by |
  available_markets: String,  // List of markets, separated by |
  release_year: String,
  total_tracks: Int,
)

object Albums {
  def fromCSVLine(line: String): Option[Album] = 
    parseCSVLine(line) match {
      case Seq(uri, name, album_type, artists, available_markets, release_year, total_tracks) =>
        try {
          Some(Album(uri, name, album_type, artists, available_markets, release_year, total_tracks.toInt))
        } catch {
          case _: Throwable => None
        }
    }
}

case class Feature(
  uri: String,
  key: Int,
  loudness: Double,
  tempo: Double,
  mode: Boolean,
  danceability: Double,
  valence: Double,
  instrumentalness: Double,
  liveness: Double,
  acousticness: Double,
  energy: Double,
  speechiness: Double,
)

object Features {
  def fromCSVLine(line: String): Option[Feature] = 
    parseCSVLine(line) match {
      case Seq(uri, key, loudness, tempo, mode, danceability, valence, instrumentalness, liveness, acousticness, energy, speechiness) =>
        try {
          Some(Feature(uri, key.toInt, loudness.toDouble, tempo.toDouble, mode.toInt == 1, danceability.toDouble, valence.toDouble, instrumentalness.toDouble, liveness.toDouble, acousticness.toDouble, energy.toDouble, speechiness.toDouble))
        } catch {
          case e: Throwable => None
        }
    }
}

## Dataset exploration

In [None]:
val bucketname = "unibo-bd2324-olaiya"

val path_albums = "s3a://"+bucketname+"/exam/albums.csv"
val path_artists = "s3a://"+bucketname+"/exam/artists.csv"
val path_features = "s3a://"+bucketname+"/exam/features.csv"
val path_playlists = "s3a://"+bucketname+"/exam/playlists.csv"
val path_tracksInPlaylist = "s3a://"+bucketname+"/exam/tracks_in_playlists.csv"
val path_tracks = "s3a://"+bucketname+"/exam/tracks.csv"

sc.applicationId

"SPARK UI: Enable forwarding of port 20888 and connect to http://localhost:20888/proxy/" + sc.applicationId + "/"

In [None]:
val datasetPath = "dataset/"

val albumRdd = sc.textFile(path_albums).flatMap(Albums.fromCSVLine)
val artistRdd = sc.textFile(path_artists).flatMap(Artists.fromCSVLine)
val featureRdd = sc.textFile(path_features).flatMap(Features.fromCSVLine)
val playlistRdd = sc.textFile(path_playlists).flatMap(Playlists.fromCSVLine)
val trackInPlaylistRdd = sc.textFile(path_tracksInPlaylist).flatMap(TrackInPlaylists.fromCSVLine)
val trackRdd = sc.textFile(path_tracks).flatMap(Tracks.fromCSVLine)


- Date le seguenti metriche:
  - popolarità della traccia
  - popolarità media delle tracce in anno
  - popolarità dell'artista (se più artisti, media di essi)
Capire come una playlist viene influenzata maggiormente dalle precedenti metriche, mediando i valori delle tracce di cui è composta. Inoltre, aggregare le playlist sull'influenza precedentemente calcolata, mediando per il numero di followers delle playlist.
La query permette di rispondere alla seguente domanda:
una playlist influenzata maggiormente dalla popolarità delle tracce ha in media 500 followers. (stessa cosa per le altre due metriche di partenza)
 
- Given the following classes: slowly danceable (tempo <= 130BPM, danceability > 0.5), swiftly danceable (tempo >130BPM, danceability > 0.5), slowly undanceable (tempo <= 130BPM, danceability <= 0.5), swiftly undanceable (tempo >130BPM, danceability <= 0.5); and the various keys (C, C#/Db, ...).
  for each class and (key ---OR--- range of followers) get:
    - The number of playlist.
    - Average playlist's percentage.
    - Percentage of explicit songs.
    - Average number of playlist followers.
    - Average tracks tempo
    - Average tracks danceability
  (The key of a playlist is the most present key among its tracks)

## Job 2

Given the following classes: slowly danceable (tempo <= 130BPM, danceability > 0.5), swiftly danceable (tempo >130BPM, danceability > 0.5), slowly undanceable (tempo <= 130BPM, danceability <= 0.5), swiftly undanceable (tempo >130BPM, danceability <= 0.5); and the various keys (C, C#/Db, ...).
  for each class and (key ---OR--- range of followers) get:
  - The number of playlist.
  - Average playlist's explicitness percentage.
  - Average number of tracks in playlist.
  - Average number of playlist followers.
  <!-- - Average playlist danceability.
  - Average playlist tempo. -->
  (The key of a playlist is the most present key among its tracks)

In [None]:
def toClass(tempo: Double, danceablility: Double): String = (tempo, danceablility) match {
  case (t, d) if t <= 130 && d > 0.5 => "slowly danceable"
  case (t, d) if t > 130 && d > 0.5 => "swiftly danceable"
  case (t, d) if t <= 130 && d <= 0.5 => "slowly undanceable"
  case (t, d) if t > 130 && d <= 0.5 => "swiftly undanceable"
}

def toKey(key: Int): String = Seq("C", "C#/Db", "D", "D#/Eb", "E", "F", "F#/Gb", "G", "G#/Ab", "A", "A#/Bb", "B")(key)

def incrementKey(map: Map[Int, Int], key: Int) = {
    val currentValue = map.getOrElse(key, 0)
    map.updated(key, currentValue + 1)
}

def joinMap(map1: Map[Int, Int], map2: Map[Int, Int]): Map[Int, Int] = map1.map { case(k, v) => (k, map2.getOrElse(k, 0) + v) }

In [None]:
val features = featureRdd.map(t => (t.uri, (t.tempo, t.danceability, t.key))).
  join(trackRdd.map(t => (t.uri, t.explicit))).
  map { case (uri, ((t, d, k), e)) => (uri, (t, d, k, e)) }

val tracksInPlaylist = trackInPlaylistRdd.map(t => (t.track_uri, t.pid))

val playlistWithFollowers = playlistRdd.map(p => (p.pid, p.num_follower))

### First implementation

Following the self-join pattern we computed the `playlistClasses` rdd by joining the `tracksInPlaylist`'s rdd with the `features`'s one and then aggregating on the playlist ID (pid) in order to compute the average tempo and danceability of each playlist. 
After that, we joined `tracksInPlaylistWithFeatures` with `playlistWithFollowers` so that by aggregating on the **pid** we could compute the ratio of explict songs and the number of followers of each playlist. Finally, we joined this last result with `playlistClasses` so that we could calculate per each of those classes the number of playlist belonging to the class, the average of explicit song ratio and number of followers.  

In [None]:
// Compute the class of each playlist
val tracksInPlaylistWithFeatures = tracksInPlaylist.join(features)

val playlistClasses = tracksInPlaylistWithFeatures.
    map { case (t_uri, (pid, (t, d, k, _))) => (pid, (t, d, k)) }.
    aggregateByKey((0.0, 0.0, (0 to 11).map((_, 0)).toMap, 0))(
        { case ((accT, accD, accK, c), (t, d, k)) => (accT + t, accD + d, incrementKey(accK, k), c + 1) },
        { case ( (accT1, accD1, accK1, c1), (accT2, accD2, accK2, c2)) => (accT1 + accT2, accD1+ accD2, joinMap(accK1, accK2), c1 + c2) }
    ).
    mapValues { case (accT, accD, accK, c) => (toKey(accK.maxBy(_._2)._1), toClass(accT/c, accD/c)) } //(pid, (k, cls))

val job1 = tracksInPlaylistWithFeatures.
    map { case (t_uri, (pid, (_,_,_, e))) => (pid, if (e) 1 else 0) }.
    join(playlistWithFollowers). // (pid, (e, num_followers)) --> consider --J&A-- or A&J
    aggregateByKey((0, 0, 0))(
        { case ((accE, nF, c), (e, f)) => (accE + e, f, c + 1)  },
        { case ((accE1, nF, c1), (accE2, _, c2)) => (accE1 + accE2, nF, c1 + c2) }
    ).
    mapValues { case (accE, nF, c) => (accE/c, nF) }. // (pid, avgE, NF)
    join(playlistClasses). // (pid ((avgE, NF), (k, cls)))
    map { case (pid, ((avgE, nF), (k, cls))) => ((k, cls), (avgE, nF))}.
    aggregateByKey((0.0, 0, 0))(
        { case ((accE, accF, c), (e, f)) => (accE + e, accF + f, c+1) },
        { case ((accE1, accF1, c1), (accE2, accF2, c2)) => (accE1 + accE2, accF1 + accF2, c1 + c2) }
    ).
    mapValues { case (accE, accF, c) => (accE/c, accF/c, c) }
    
val result = job1.collect    

This job completed in around `22 minutes`. This is surely due the size of the input data, in particular those regarding the tracks in playlist which is `8.6GB` large.

## Optimizations

As a first step toward optimizing execution time we noticed that we accessed the result of the join between `tracksInPlaylist` and `features` multiple times. So we dediced to cache it in order to avoid loading the dataset more than once and thus avoiding waste of time.

In [None]:
// Compute the class of each playlist
val tracksInPlaylistWithFeatures = tracksInPlaylist.join(features).cache

val playlistClasses = tracksInPlaylistWithFeatures.
    map { case (t_uri, (pid, (t, d, k, _))) => (pid, (t, d, k)) }.
    aggregateByKey((0.0, 0.0, (0 to 11).map((_, 0)).toMap, 0))(
        { case ((accT, accD, accK, c), (t, d, k)) => (accT + t, accD + d, incrementKey(accK, k), c + 1) },
        { case ( (accT1, accD1, accK1, c1), (accT2, accD2, accK2, c2)) => (accT1 + accT2, accD1 + accD2, joinMap(accK1, accK2), c1 + c2) }
    ).
    mapValues { case (accT, accD, accK, c) => (toKey(accK.maxBy(_._2)._1), toClass(accT/c, accD/c)) } //(pid, (k, cls))

val job2 = tracksInPlaylistWithFeatures.
    map { case (t_uri, (pid, (_,_,_, e))) => (pid, if (e) 1 else 0) }.
    join(playlistWithFollowers). // (pid, (e, num_followers))
    aggregateByKey((0, 0, 0))(
        { case ((accE, nF, c), (e, f)) => (accE + e, f, c + 1)  },
        { case ((accE1, nF, c1), (accE2, _, c2)) => (accE1 + accE2, nF, c1 + c2) }
    ).
    mapValues { case (accE, nF, c) => (accE/c, nF) }. // (pid, avgE, NF)
    join(playlistClasses). // (pid ((avgE, NF), (k, cls)))
    map { case (pid, ((avgE, nF), (k, cls))) => ((k, cls), (avgE, nF))}.
    aggregateByKey((0.0, 0, 0))(
        { case ((accE, accF, c), (e, f)) => (accE + e, accF + f, c+1) },
        { case ((accE1, accF1, c1), (accE2, accF2, c2)) => (accE1 + accE2, accF1 + accF2, c1 + c2) }
    ).
    mapValues { case (accE, accF, c) => (accE/c, accF/c, c) }
    
val result = job2.collect    

No reduction in time was registered as the job took still `22 minutes` to perform. We therefore tried to change the execution plan. In the previous job, when joining `tracksInPlaylistWithFeatures` with `playlistFollowers`, we performed the so called `Join & Aggregate`. So we tried to perform the `Aggregate and Join` to see if there was any benefit.

In [None]:
// Compute the class of each playlist
val tracksInPlaylistWithFeatures = tracksInPlaylist.join(features).cache

val playlistClasses = tracksInPlaylistWithFeatures.
    map { case (t_uri, (pid, (t, d, k, _))) => (pid, (t, d, k)) }.
    aggregateByKey((0.0, 0.0, (0 to 11).map((_, 0)).toMap, 0))(
        { case ((accT, accD, accK, c), (t, d, k)) => (accT + t, accD + d, incrementKey(accK, k), c + 1) },
        { case ( (accT1, accD1, accK1, c1), (accT2, accD2, accK2, c2)) => (accT1 + accT2, accD1 + accD2, joinMap(accK1, accK2), c1 + c2) }
    ).
    mapValues { case (accT, accD, accK, c) => (toKey(accK.maxBy(_._2)._1), toClass(accT/c, accD/c)) } //(pid, (k, cls))

val playlistsWithClasses = playlistWithFollowers.join(playlistClasses).mapValues { case (f, (k, cls)) => (k, cls, f) }

val job3 = tracksInPlaylistWithFeatures.
    map { case (t_uri, (pid, (_,_,_, e))) => (pid, if (e) 1 else 0) }.
    aggregateByKey((0.0, 0))(
        { case ((accE, c), e) => (accE + e, c + 1)},
        { case ((accE1, c1), (accE2, c2)) => (accE1 + accE2, c1 + c2) }
    ).
    mapValues { case (accE, c) => accE / c }.
    join(playlistsWithClasses). // (pid, (avgE, (k, cls, f)))
    map { case (pid, (avgE, (k, cls, nF))) => ((k, cls), (avgE, nF))}.
    aggregateByKey((0.0, 0, 0))(
        { case ((accE, accF, c), (e, f)) => (accE + e, accF + f, c+1) },
        { case ((accE1, accF1, c1), (accE2, accF2, c2)) => (accE1 + accE2, accF1 + accF2, c1 + c2) }
    ).
    mapValues { case (accE, accF, c) => (accE/c, accF/c, c) }
    
val result = job3.collect    

By performing an *Aggregate and Join* the execution time increase of `2 minutes`. So ultimately we tried to compute the playlist classes and the requested averages in a single aggregation step before joining with `playlistWithFollowers` and aggregating to obtain the final result.

In [None]:
val tracksInPlaylistWithFeatures = tracksInPlaylist.join(features).
    map { case (trackUri, (pid, (t, d, k, e))) => (pid, (t, d, k, e)) }


val tracksInPlaylistWithClasses = tracksInPlaylistWithFeatures.
        aggregateByKey((0.0, 0.0, (0 to 11).map((_, 0)).toMap, 0.0, 0))(
          { case ((accT, accD, ks, ec, c), (t, d, k, e)) => (accT+t, accD+d, incrementKey(ks, k), ec+(if (e) 1 else 0), c+1) },
          { case ((accT1, accD1, k1, ec1, c1), (accT2, accD2, k2, ec2, c2)) => (accT1+accT2, accD1+accD2, joinMap(k1, k2), ec1+ec2, c1+c2) }).
        mapValues({ case (accT, accD, k, ec, c) => (toKey(k.maxBy(_._2)._1), toClass(accT/c, accD/c), ec/c, c) }) // (pid, (k, class, avgE, c))

val job4 = playlistWithFollowers.join(tracksInPlaylistWithClasses). // (pid, (num_follower, (k, class, avgE, c)))
        map { case (pid, (num_follower, (k, cls, avgE, tc))) => ((k, cls), (num_follower, avgE, tc)) }.
        aggregateByKey((0.0, 0.0, 0.0, 0))(
          { case ((accF, accE, accTC, c), (f, e, tc)) => (accF+f, accE+e, accTC+tc, c+1) },
          { case ((accF1, accE1, accTC1, c1), (accF2, accE2, accTC2, c2)) => (accF1+accF2, accE1+accE2, accTC1+accTC2, c1+c2) }
        ).
        mapValues { case (accF, accE, accTC,c) => (accF/c, accE/c, accTC/c, c) } // ((k, class), (avgF, avgE, avgTC, c))
        
val result = job4.collect

This last job took in total `20 minutes`. Overall we obtain a `4 minutes` improvement.

### Further optimizations

Since the last job was in term of time consuption more efficient, it has been choosen for further optimizations.
First of all, by looking at the job details on SparkUI it becomes evident that the stage that is more costsly is the join between the `tracksInPlaylist` with the `features`. This may be due to the fact that the `tracksInPlaylist` form a total of 277 block thus 277 tasks. This very likely results in a big scheduling overhead and to the generation of a lot of intermediate files due to the shuffling strategy. So we tried reducing the number of partitions. 

### Coalescing the number of partitions

In [None]:
unpersistRDDs()
// val tracksInPlaylist = trackInPlaylistRdd.map(t => (t.track_uri, t.pid)).coalesce(150)
// val tracksInPlaylist = trackInPlaylistRdd.map(t => (t.track_uri, t.pid)).coalesce(50)
// val tracksInPlaylist = trackInPlaylistRdd.map(t => (t.track_uri, t.pid)).coalesce(10)
val tracksInPlaylist = trackInPlaylistRdd.map(t => (t.track_uri, t.pid)).coalesce(6)


val tracksInPlaylistWithFeatures = tracksInPlaylist.join(features).
    map { case (trackUri, (pid, (t, d, k, e))) => (pid, (t, d, k, e)) }


val tracksInPlaylistWithClasses = tracksInPlaylistWithFeatures.
        aggregateByKey((0.0, 0.0, (0 to 11).map((_, 0)).toMap, 0.0, 0))(
          { case ((accT, accD, ks, ec, c), (t, d, k, e)) => (accT+t, accD+d, incrementKey(ks, k), ec+(if (e) 1 else 0), c+1) },
          { case ((accT1, accD1, k1, ec1, c1), (accT2, accD2, k2, ec2, c2)) => (accT1+accT2, accD1+accD2, joinMap(k1, k2), ec1+ec2, c1+c2) }).
        mapValues({ case (accT, accD, k, ec, c) => (toKey(k.maxBy(_._2)._1), toClass(accT/c, accD/c), ec/c, c) }) // (pid, (k, class, avgE, c))

val job5 = playlistWithFollowers.join(tracksInPlaylistWithClasses). // (pid, (num_follower, (k, class, avgE, c)))
        map { case (pid, (num_follower, (k, cls, avgE, tc))) => ((k, cls), (num_follower, avgE, tc)) }.
        aggregateByKey((0.0, 0.0, 0.0, 0))(
          { case ((accF, accE, accTC, c), (f, e, tc)) => (accF+f, accE+e, accTC+tc, c+1) },
          { case ((accF1, accE1, accTC1, c1), (accF2, accE2, accTC2, c2)) => (accF1+accF2, accE1+accE2, accTC1+accTC2, c1+c2) }
        ).
        mapValues { case (accF, accE, accTC,c) => (accF/c, accE/c, accTC/c, c) } // ((k, class), (avgF, avgE, avgTC, c))
job5.collect        

Here's the the result of the various executions:

|N. of tasks |Join step shuffle data size|Exection time|
|------------|---------------------------|-------------|
|150|12.2 GB|17 min|
|50|7.0 GB|13 min|
|10|1704.8 MB|14 min|
|6|---|8.6 min|
|5|1013.7MB|9.3 min|

So the best number of partitions seems to be `6`.

## Enforcing a partition criteria

Next we tried to enforce the same partion criteria on the rdd involved in the join. We tried with different number of partiotions:

In [None]:
import org.apache.spark.HashPartitioner
//val p = new HashPartitioner(50)
//val p = new HashPartitioner(10)
//val p = new HashPartitioner(6)
val p = new HashPartitioner(5)

unpersistRDDs()

val features = featureRdd.map(t => (t.uri, (t.tempo, t.danceability, t.key))).
  join(trackRdd.map(t => (t.uri, t.explicit))).
  map { case (uri, ((t, d, k), e)) => (uri, (t, d, k, e)) }.partitionBy(p)
val tracksInPlaylist = trackInPlaylistRdd.map(t => (t.track_uri, t.pid)).partitionBy(p)


val tracksInPlaylistWithFeatures = tracksInPlaylist.join(features).
    map { case (trackUri, (pid, (t, d, k, e))) => (pid, (t, d, k, e)) }


val tracksInPlaylistWithClasses = tracksInPlaylistWithFeatures.
        aggregateByKey((0.0, 0.0, (0 to 11).map((_, 0)).toMap, 0.0, 0))(
          { case ((accT, accD, ks, ec, c), (t, d, k, e)) => (accT+t, accD+d, incrementKey(ks, k), ec+(if (e) 1 else 0), c+1) },
          { case ((accT1, accD1, k1, ec1, c1), (accT2, accD2, k2, ec2, c2)) => (accT1+accT2, accD1+accD2, joinMap(k1, k2), ec1+ec2, c1+c2) }).
        mapValues({ case (accT, accD, k, ec, c) => (toKey(k.maxBy(_._2)._1), toClass(accT/c, accD/c), ec/c, c) }).partitionBy(p) // (pid, (k, class, avgE, c))

val job4 = playlistWithFollowers.partitionBy(p).join(tracksInPlaylistWithClasses). // (pid, (num_follower, (k, class, avgE, c)))
        map { case (pid, (num_follower, (k, cls, avgE, tc))) => ((k, cls), (num_follower, avgE, tc)) }.
        aggregateByKey((0.0, 0.0, 0.0, 0))(
          { case ((accF, accE, accTC, c), (f, e, tc)) => (accF+f, accE+e, accTC+tc, c+1) },
          { case ((accF1, accE1, accTC1, c1), (accF2, accE2, accTC2, c2)) => (accF1+accF2, accE1+accE2, accTC1+accTC2, c1+c2) }
        ).
        mapValues { case (accF, accE, accTC,c) => (accF/c, accE/c, accTC/c, c) } // ((k, class), (avgF, avgE, avgTC, c))
val result = job4.collect        

Here's the the result of the various executions:

|N. of tasks |Join step shuffle data size|Exection time|
|------------|---------------------------|-------------|
|50|7.0 GB|13 min|
|10|1704.8 MB|12 min|
|6|---|7.5 min|
|5|1013.7 MB|7.5 min|

So the best number of partitions seems to be `6 or 5`.

So overall we obtain a speed-up of $ S=\frac{20 min}{7.5 min} = 2.66 $