<a href="https://colab.research.google.com/github/nishu204/DM_notebooks/blob/main/Page_Rank.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install dash
!pip install dash_bootstrap_components

Collecting dash
  Downloading dash-2.14.2-py3-none-any.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting ansi2html (from dash)
  Downloading ansi2html-1.9.1-py3-none-any.whl (17 kB)
Installing collected packages: dash-table, dash-html-components, dash-core-components, retrying, ansi2html, dash
Successfully installed ansi2html-1.9.1 dash-2.14.2 dash-core-components-2.0.0 dash-html-components-2.0.0 dash-table-5.0.0 retrying-1.3.4
Collecting dash_bootstrap_components
  Downloading d

In [5]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import networkx as nx
import dash_bootstrap_components as dbc

In [6]:
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])

In [7]:
# Function to calculate PageRank from scratch
def calculate_pagerank(file_path, damping_factor=0.85, max_iter=100, tol=1e-6):
    # Read the edge list from the file
    edges = pd.read_csv(file_path, sep='\t', comment='#', header=None, names=['source', 'target'], dtype=int)

    # Create a directed graph
    G = nx.DiGraph()

    # Add edges to the graph
    G.add_edges_from(edges.values)

    # Get the number of nodes
    num_nodes = G.number_of_nodes()

    # Initialize PageRank values
    pagerank = {node: 1 / num_nodes for node in G.nodes}

    # Iterate until convergence or maximum iterations
    for _ in range(max_iter):
        pagerank_new = {node: (1 - damping_factor) / num_nodes + damping_factor * sum(pagerank[neighbor] / G.out_degree(neighbor) for neighbor in G.predecessors(node)) for node in G.nodes}

        # Check for convergence
        if all(abs(pagerank_new[node] - pagerank[node]) < tol for node in G.nodes):
            break

        pagerank = pagerank_new

    return pagerank

In [8]:
# Function to create a Dash table from the results
def create_table(data, max_rows=10):
    df = pd.DataFrame(list(data.items()), columns=['Page', 'PageRank'])
    df = df.sort_values(by='PageRank', ascending=False).head(max_rows)

    # Define styles
    table_style = {
        'border-spacing': '10px',
        'border-collapse': 'separate',
        'width': '100%',  # Adjust the width as needed
    }

    th_style = {
        'padding': '10px',  # Adjust the padding as needed
    }

    td_style = {
        'padding': '5px',  # Adjust the padding as needed
    }

    # Create the HTML table
    return html.Table(
        # Header
        [html.Tr([html.Th(col, style=th_style) for col in df.columns], style=table_style)] +
        # Body
        [html.Tr([html.Td(df.iloc[i][col], style=td_style) for col in df.columns], style=table_style) for i in range(min(len(df), max_rows))]
    )


In [9]:
# Specify the path to the downloaded file
file_path = "/content/web-Google.txt"

In [10]:
# Calculate PageRank
pagerank_data = calculate_pagerank(file_path)

In [11]:
# Define the layout of the app
app.layout = html.Div(children=[
    html.H1(children='PageRank Dashboard'),

    # Display the adjacency matrix
    html.Div(children=[
        html.H2(children='Adjacency Matrix'),
        dcc.Markdown(children='''
            The adjacency matrix is not displayed here due to its large size.
            However, it is used internally for PageRank calculations.
        ''')
    ]),

    # Display the PageRank results in a table
    html.Div(children=[
        html.H2(children='PageRank Results'),
        create_table(pagerank_data)
    ])
])


In [13]:
if __name__ == '__main__':
    app.run_server(debug=True)

AssertionError: ignored

In [12]:
# import dash
# from dash import dcc, html
# from dash.dependencies import Input, Output
# import pandas as pd
# import networkx as nx

# dash.register_page(__name__,
#                    path='/page',
#                    name='PageRank',
#                    title='PageRank')

# # Function to calculate PageRank from scratch
# def calculate_pagerank(file_path, damping_factor=0.85, max_iter=100, tol=1e-6):
#     # Read the edge list from the file
#     edges = pd.read_csv(file_path, sep='\t', comment='#', header=None, names=['source', 'target'], dtype=int)

#     # Create a directed graph
#     G = nx.DiGraph()

#     # Add edges to the graph
#     G.add_edges_from(edges.values)

#     # Get the number of nodes
#     num_nodes = G.number_of_nodes()

#     # Initialize PageRank values
#     pagerank = {node: 1 / num_nodes for node in G.nodes}

#     # Iterate until convergence or maximum iterations
#     for _ in range(max_iter):
#         pagerank_new = {node: (1 - damping_factor) / num_nodes + damping_factor * sum(pagerank[neighbor] / G.out_degree(neighbor) for neighbor in G.predecessors(node)) for node in G.nodes}

#         # Check for convergence
#         if all(abs(pagerank_new[node] - pagerank[node]) < tol for node in G.nodes):
#             break

#         pagerank = pagerank_new

#     return pagerank

# # Function to create a Dash table from the results
# def create_table(data, max_rows=10):
#     df = pd.DataFrame(list(data.items()), columns=['Page', 'PageRank'])
#     df = df.sort_values(by='PageRank', ascending=False).head(max_rows)

#     # Define styles
#     table_style = {
#         'border-spacing': '10px',
#         'border-collapse': 'separate',
#         'width': '100%',  # Adjust the width as needed
#     }

#     th_style = {
#         'padding': '10px',  # Adjust the padding as needed
#     }

#     td_style = {
#         'padding': '5px',  # Adjust the padding as needed
#     }

#     # Create the HTML table
#     return html.Table(
#         # Header
#         [html.Tr([html.Th(col, style=th_style) for col in df.columns], style=table_style)] +
#         # Body
#         [html.Tr([html.Td(df.iloc[i][col], style=td_style) for col in df.columns], style=table_style) for i in range(min(len(df), max_rows))]
#     )

# # Define the Dash app
# app = dash.Dash(__name__)

# # Specify the path to the downloaded file
# file_path = r'C:\Users\nisha\Desktop\New folder\web-Google.txt'

# # Calculate PageRank
# pagerank_data = calculate_pagerank(file_path)

# # Define the layout of the app
# app.layout = html.Div(children=[
#     html.H1(children='PageRank Dashboard'),

#     # Display the adjacency matrix
#     html.Div(children=[
#         html.H2(children='Adjacency Matrix'),
#         dcc.Markdown(children='''
#             The adjacency matrix is not displayed here due to its large size.
#             However, it is used internally for PageRank calculations.
#         ''')
#     ]),

#     # Display the PageRank results in a table
#     html.Div(children=[
#         html.H2(children='PageRank Results'),
#         create_table(pagerank_data)
#     ])
# ])

# if __name__ == '__main__':
#     app.run_server(debug=True)
