<a href="https://colab.research.google.com/github/nishu204/DM_notebooks/blob/main/HITS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install dash
!pip install dash_bootstrap_components

Collecting dash
  Downloading dash-2.14.2-py3-none-any.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting ansi2html (from dash)
  Downloading ansi2html-1.9.1-py3-none-any.whl (17 kB)
Installing collected packages: dash-table, dash-html-components, dash-core-components, retrying, ansi2html, dash
Successfully installed ansi2html-1.9.1 dash-2.14.2 dash-core-components-2.0.0 dash-html-components-2.0.0 dash-table-5.0.0 retrying-1.3.4
Collecting dash_bootstrap_components
  Downloading d

In [4]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import networkx as nx



# Function to calculate HITS from scratch
def calculate_hits(file_path, max_iter=100, tol=1e-6):
    # Read the edge list from the file
    edges = pd.read_csv(file_path, sep='\t', comment='#', header=None, names=['source', 'target'], dtype=int)

    # Create a directed graph
    G = nx.DiGraph()

    # Add edges to the graph
    G.add_edges_from(edges.values)

    # Get the number of nodes
    num_nodes = G.number_of_nodes()

    # Initialize Hub and Authority scores
    hub_scores = {node: 1 for node in G.nodes}
    authority_scores = {node: 1 for node in G.nodes}

    # Iterate until convergence or maximum iterations
    for _ in range(max_iter):
        # Update Authority scores
        new_authority_scores = {node: sum(hub_scores[neighbor] for neighbor in G.predecessors(node)) for node in G.nodes}

        # Update Hub scores
        new_hub_scores = {node: sum(authority_scores[neighbor] for neighbor in G.successors(node)) for node in G.nodes}

        # Normalize scores
        norm_factor = max(max(new_authority_scores.values()), max(new_hub_scores.values()))
        new_authority_scores = {node: score / norm_factor for node, score in new_authority_scores.items()}
        new_hub_scores = {node: score / norm_factor for node, score in new_hub_scores.items()}

        # Check for convergence
        if all(abs(new_authority_scores[node] - authority_scores[node]) < tol and
               abs(new_hub_scores[node] - hub_scores[node]) < tol for node in G.nodes):
            break

        authority_scores = new_authority_scores
        hub_scores = new_hub_scores

    return hub_scores, authority_scores

# Function to create a Dash table from the results
def create_table(hub_data, authority_data, max_rows=10):
    hub_df = pd.DataFrame(list(hub_data.items()), columns=['Page', 'Hub'])
    authority_df = pd.DataFrame(list(authority_data.items()), columns=['Page', 'Authority'])

    hub_df = hub_df.sort_values(by='Hub', ascending=False).head(max_rows)
    authority_df = authority_df.sort_values(by='Authority', ascending=False).head(max_rows)

    # Define styles
    table_style = {
        'border-spacing': '10px',
        'border-collapse': 'separate',
        'width': '100%',  # Adjust the width as needed
    }

    th_style = {
        'padding': '10px',  # Adjust the padding as needed
    }

    td_style = {
        'padding': '5px',  # Adjust the padding as needed
    }

    return html.Div(children=[
        html.H2(children=''),
        html.Div(children=[
            html.Div(children=[
                html.H3(children='Hub Scores'),
                html.Table(
                    # Header
                    [html.Tr([html.Th(col, style=th_style) for col in hub_df.columns], style=table_style)] +
                    # Body
                    [html.Tr([html.Td(hub_df.iloc[i][col], style=td_style) for col in hub_df.columns], style=table_style) for i in range(min(len(hub_df), max_rows))]
                ),
            ], style={'width': '48%', 'float': 'left', 'margin-right': '2%'}),

            html.Div(children=[
                html.H3(children='Authority Scores'),
                html.Table(
                    # Header
                    [html.Tr([html.Th(col, style=th_style) for col in authority_df.columns], style=table_style)] +
                    # Body
                    [html.Tr([html.Td(authority_df.iloc[i][col], style=td_style) for col in authority_df.columns], style=table_style) for i in range(min(len(authority_df), max_rows))]
                ),
            ], style={'width': '48%', 'float': 'left'}),
        ], style={'margin-bottom': '20px'})
    ])

# Define the Dash app
app = dash.Dash(__name__)

# Specify the path to the downloaded file
file_path = '/content/web-Google.txt'

# Calculate HITS
hits_data = calculate_hits(file_path)

# Define the layout of the app
app.layout = html.Div(children=[
    html.H1(children='HITS Algorithm Dashboard'),

    # Display the adjacency matrix
    html.Div(children=[
        html.H2(children='Adjacency Matrix'),
        dcc.Markdown(children='''
            The adjacency matrix is not displayed here due to its large size.
            However, it is used internally for HITS calculations.
        ''')
    ]),

    # Display the HITS results in tables
    html.Div(children=[
        html.H2(children='HITS Results'),
        create_table(hits_data[0], hits_data[1])
    ])
])

if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>