In [3]:
import json
import pandas as pd

def load_json(file_path):
    records = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            try:
                records.append(json.loads(line))
            except json.JSONDecodeError:
                continue
    return records

def compute_scores(records):
    user_item_scores = []
    for record in records:
        user_id = record.get("reviewerID", "")
        item_id = record.get("asin", "")
        helpful_votes, total_votes = record.get("helpful", [0, 0])
        b_ij = (helpful_votes ** 2) / total_votes if total_votes > 0 else 0
        user_item_scores.append((user_id, item_id, b_ij))
    return user_item_scores

def normalize_scores(user_item_scores):
    df = pd.DataFrame(user_item_scores, columns=["User ID", "Item ID", "b_ij"])
    df["h_ij"] = df.groupby("Item ID")["b_ij"].transform(lambda x: x / x.sum())
    df["z_ij"] = df.groupby("Item ID").cumcount(ascending=False).apply(lambda x: sum(1 / (e**2) for e in range(1, x+2)))
    df["most_ij"] = df.groupby("Item ID")["z_ij"].transform(lambda x: x / x.sum())
    df["q_ij"] = (1 / (df.groupby("User ID").cumcount() + 1)) * df.groupby("Item ID").cumcount(ascending=False)
    df["top_ij"] = df.groupby("Item ID")["q_ij"].transform(lambda x: x / x.sum())
    alpha = 0.5
    df["d_ij"] = alpha * df["top_ij"] + (1 - alpha) * df["most_ij"]
    df["avg_score"] = (df["h_ij"] + df["d_ij"]) / 2
    df["avg_score"] = df["avg_score"].clip(0, 1)
    return df[["User ID", "Item ID", "h_ij", "d_ij", "avg_score"]]

def handle_missing_values(df):

    h_ij_mean = df[df['h_ij'] != 0]['h_ij'].mean()
    df['h_ij'] = df['h_ij'].replace(0, h_ij_mean)

    avg_score_mean = df[df['avg_score'] != 0]['avg_score'].mean()
    df['avg_score'] = df['avg_score'].replace(0, avg_score_mean)

    return df
file_path = r"D:\6TH SEMESTER\miniProject\Electronics_5.json\Electronics_5.json"
records = load_json(file_path)
user_item_scores = compute_scores(records)
normalized_scores = normalize_scores(user_item_scores)

normalized_scores = handle_missing_values(normalized_scores)


output_csv = "amazon_electronics_scores.csv"
normalized_scores.to_csv(output_csv, index=False)

print(f"Data saved to {output_csv}")

Data saved to amazon_electronics_scores.csv


In [None]:
import shutil

old_path = "amazon_electronics_scores.ipynb"  # Ensure this is the correct current path
new_path = r"D:\6TH SEMESTER\miniProject\amazon_electronics_scores.ipynb"

shutil.move(old_path, new_path)
print(f"Notebook moved to {new_path}")
