In [7]:
from graphframes import GraphFrame

In [8]:
inputPathVertexes = "data/Ex53/data/vertexes.csv"
inputPathEdges = "data/Ex53/data/edges.csv"
outputPath = "resOut_ex53/"

In [9]:
# Read the content of vertexes.csv
vDF = spark.read.load(inputPathVertexes,\
                             format="csv",
                             header=True,\
                             inferSchema=True)

In [10]:
#vDF.printSchema()
#vDF.show()

In [11]:
# Read the content of edges.csv
eDF = spark.read.load(inputPathEdges,\
                             format="csv",
                             header=True,\
                             inferSchema=True)

In [12]:
#eDF.printSchema()
#eDF.show()

In [13]:
# Only the "follow" edges are useful
# Filter the input edge dataframe before creating the graph
filteredEdfes = eDF.filter("linktype='follow' ")

In [14]:
# Create the input graph
g = GraphFrame(vDF, filteredEdfes)

In [15]:
# Count the number of followers for each user (i.e., the number of in-links) 
userNumFollowersDF = g.inDegrees.withColumnRenamed("inDegree","numFollowers")

In [16]:
#userNumFollowersDF.printSchema()
#userNumFollowersDF.show()

In [17]:
# Select the user(s) with the maximum number of followers

In [18]:
# Compute the maximum value of "number of followers"
maxFollowersDF = userNumFollowersDF.agg({"numFollowers":"max"})\
.withColumnRenamed("max(numFollowers)","maxFollowers")

In [19]:
#maxFollowersDF.printSchema()
#maxFollowersDF.show()

In [20]:
# Select the single record/Row of the DataFrame maxFollowersDF
rowMaxNumFollowers = maxFollowersDF.first()

In [21]:
#type(maxFollowers)

In [22]:
# Retrieve the maximum number of followers from rowMaxNumFollowers
maxNumFollowers=rowMaxNumFollowers.maxFollowers

In [23]:
# Select the user(s) with the maximum number of followers
selectedUsersDF=userNumFollowersDF.filter(userNumFollowersDF.numFollowers==maxNumFollowers)
# Or
#electedUsersDF=userNumFollowersDF.filter("numFollowers="+str(maxNumFollowers))

In [24]:
selectedUsersDF.printSchema()
selectedUsersDF.show()

root
 |-- id: string (nullable = true)
 |-- numFollowers: integer (nullable = false)

+---+------------+
| id|numFollowers|
+---+------------+
| u3|           2|
| u6|           2|
+---+------------+



In [63]:
# Save the result in the output folder
selectedUsersDF.write.csv(outputPath, header=True)