In [15]:
import pandas as pd
import numpy as np
from tabulate import tabulate  # Make sure to install tabulate: pip install tabulate

# Load the dataset
file_path = "500ECE.csv"
data = pd.read_csv(file_path)

# Calculate helpfulness score (b_ij)
def calculate_helpfulness(row):
    try:
        helpful_votes = row['helpful__001']
        total_votes = row['helpful__002']
        return (helpful_votes ** 2) / total_votes if total_votes > 0 else 0
    except:
        return 0

data['b_ij'] = data.apply(calculate_helpfulness, axis=1)

# Normalize helpfulness scores (h_ij)
total_bij = data['b_ij'].sum()
data['h_ij'] = data['b_ij'] / total_bij if total_bij > 0 else 0

# Calculate reliability scores
data['z_ij'] = 1 / np.exp(2)
data['most_ij'] = data['z_ij'] / data['z_ij'].sum()
data['q_ij'] = (1 / (data.index + 1)) * len(data)
data['top_ij'] = data['q_ij'] / data['q_ij'].sum()

# Calculate combined reliability score (d_ij)
alpha = 0.5
data['d_ij'] = alpha * data['top_ij'] + (1 - alpha) * data['most_ij']

# Sort reviewers
sorted_by_helpfulness = data.sort_values(by="b_ij", ascending=False)
sorted_by_reliability = data.sort_values(by="d_ij", ascending=False)

# Create formatted tables
helpfulness_table = sorted_by_helpfulness[['reviewerName', 'b_ij', 'h_ij']].head()
reliability_table = sorted_by_reliability[['reviewerName', 'd_ij', 'top_ij', 'most_ij']].head()

# Print tables using tabulate
print("Top 5 Reviewers by Helpfulness Score:")
print(tabulate(helpfulness_table, headers=['Reviewer Name', 'Helpfulness Score', 'Normalized Score'], 
              tablefmt='grid', floatfmt='.4f'))

print("\nTop 5 Reviewers by Reliability Score:")
print(tabulate(reliability_table, 
              headers=['Reviewer Name', 'Reliability Score', 'Top Ranking Score', 'Most Recent Score'],
              tablefmt='grid', floatfmt='.4f'))


Top 5 Reviewers by Helpfulness Score:
+-----+------------------+---------------------+--------------------+
|     | Reviewer Name    |   Helpfulness Score |   Normalized Score |
| 481 | cacalazani       |            489.6171 |             0.1721 |
+-----+------------------+---------------------+--------------------+
| 411 | J. Peplinski     |            373.2545 |             0.1312 |
+-----+------------------+---------------------+--------------------+
| 357 | suzatm           |            252.1364 |             0.0886 |
+-----+------------------+---------------------+--------------------+
| 300 | Alan  Houston    |            221.1082 |             0.0777 |
+-----+------------------+---------------------+--------------------+
| 352 | Rob "riverstone" |            151.5848 |             0.0533 |
+-----+------------------+---------------------+--------------------+

Top 5 Reviewers by Reliability Score:
+----+--------------------------+---------------------+---------------------+------