In [3]:
#****************************************************************************
# (C) Cloudera, Inc. 2020-2025
#  All rights reserved.
#
#  Applicable Open Source License: GNU Affero General Public License v3.0
#
#  NOTE: Cloudera open source products are modular software products
#  made up of hundreds of individual components, each of which was
#  individually copyrighted.  Each Cloudera open source product is a
#  collective work under U.S. Copyright Law. Your license to use the
#  collective work is as provided in your written agreement with
#  Cloudera.  Used apart from the collective work, this file is
#  licensed for your use pursuant to the open source license
#  identified above.
#
#  This code is provided to you pursuant a written agreement with
#  (i) Cloudera, Inc. or (ii) a third-party authorized to distribute
#  this code. If you do not have a written agreement with Cloudera nor
#  with an authorized and properly licensed third party, you do not
#  have any rights to access nor to use this code.
#
#  Absent a written agreement with Cloudera, Inc. (“Cloudera”) to the
#  contrary, A) CLOUDERA PROVIDES THIS CODE TO YOU WITHOUT WARRANTIES OF ANY
#  KIND; (B) CLOUDERA DISCLAIMS ANY AND ALL EXPRESS AND IMPLIED
#  WARRANTIES WITH RESPECT TO THIS CODE, INCLUDING BUT NOT LIMITED TO
#  IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY AND
#  FITNESS FOR A PARTICULAR PURPOSE; (C) CLOUDERA IS NOT LIABLE TO YOU,
#  AND WILL NOT DEFEND, INDEMNIFY, NOR HOLD YOU HARMLESS FOR ANY CLAIMS
#  ARISING FROM OR RELATED TO THE CODE; AND (D)WITH RESPECT TO YOUR EXERCISE
#  OF ANY RIGHTS GRANTED TO YOU FOR THE CODE, CLOUDERA IS NOT LIABLE FOR ANY
#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, PUNITIVE OR
#  CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, DAMAGES
#  RELATED TO LOST REVENUE, LOST PROFITS, LOSS OF INCOME, LOSS OF
#  BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
#  DATA.
#
# #  Author(s): Paul de Fusco
#***************************************************************************/

In [25]:
import cudf
import cugraph

# Create an edge list as a DataFrame
edgelist_df = cudf.DataFrame({
  "src": [0, 1, 2, 0, 1, 1, 1, 3],
  "dst": [1, 2, 3, 3, 1, 0, 0, 0],
  "weight": [1.0, 1.0, 1.0, 0.5, 0.5, 0.5, 0.5, 5.0]
})

In [26]:
# Create a cuGraph graph from the edge list
G = cugraph.Graph()
G.from_cudf_edgelist(edgelist_df, source="src", destination="dst", edge_attr="weight")

# Print the graph
print(G)

<cugraph.structure.graph_classes.Graph object at 0x7fd4ad348670>


In [27]:
# Compute personalized PageRank
pr = cugraph.pagerank(G)

# Print the personalized PageRank scores
print(pr)

   vertex  pagerank
0       1  0.209811
1       0  0.291183
2       2  0.187393
3       3  0.311613




### With Personalization

In [18]:
# Let's bump up some weights and see how that changes the results
personalization_vec = cudf.DataFrame()
personalization_vec['vertex'] = [100, 26]
personalization_vec['values'] = [0.25, 0.75]

In [19]:
personalization_vec

Unnamed: 0,vertex,values
0,100,0.25
1,26,0.75


In [20]:
ppr = cugraph.pagerank(G, alpha=0.85, personalization=personalization_vec, max_iter=100, tol=1.0e-5, nstart=None)



In [21]:
ppr.sort_values('pagerank', ascending=False).head(3)

Unnamed: 0,vertex,pagerank
0,0,1.2e-05
1,1,1.2e-05
2,2,1.2e-05
