In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType,StructField,IntegerType,DateType
from pyspark.sql.functions import col,count,max
from datetime import datetime

spark = SparkSession.builder.appName("app").master("local[2]").getOrCreate()

In [0]:
schema = StructType([
    StructField("requester_id",IntegerType(),False),
    StructField("accepter_id",IntegerType(),False),
    StructField("accept_date",DateType(),False)
])
data = [
( 1            , 2           , datetime(2016,6,3) ) ,
( 1            , 3           , datetime(2016,6,8) ) ,
( 2            , 3           , datetime(2016,6,8) ) ,
( 3            , 4           , datetime(2016,6,9) )
]
request = spark.createDataFrame(data,schema)
request.show()

+------------+-----------+-----------+
|requester_id|accepter_id|accept_date|
+------------+-----------+-----------+
|           1|          2| 2016-06-03|
|           1|          3| 2016-06-08|
|           2|          3| 2016-06-08|
|           3|          4| 2016-06-09|
+------------+-----------+-----------+



In [0]:
# Write a solution to find the people who have the most friends and the most friends number.
# The test cases are generated so that only one person has the most friends.
request.select("requester_id").unionAll(request.select("accepter_id"))\
    .groupBy(col("requester_id").alias("id")).agg(count("requester_id").alias("count"))\
    .orderBy(col("count").desc()).limit(1).show()

+---+-----+
| id|count|
+---+-----+
|  3|    3|
+---+-----+



In [0]:
request.createOrReplaceTempView("r")
spark.sql("""with cte as 
          (select requester_id from r union all select accepter_id from r)
          select requester_id as `id`, count(*) as num from cte group by 1 order by 2 desc limit 1""").show()

+---+---+
| id|num|
+---+---+
|  3|  3|
+---+---+



In [0]:
spark.stop()