In [2]:
import gdown
import pandas as pd
from sklearn.metrics import mean_absolute_error

In [3]:
# Set options to display all columns and rows
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 10)

# Evaluation

In [4]:
gdown.download(
    "https://drive.google.com/uc?id=195ltI0Jgg9zsWatxUP4UlPf5sgsY9Rzw",
    "df_train.parquet",
)
gdown.download(
    "https://drive.google.com/uc?id=1-65WSGzYhFToxureZ-Rhg9POZNzfJSWJ",
    "df_test.parquet",
)
df_train = pd.read_parquet("df_train.parquet")
df_test = pd.read_parquet("df_test.parquet")

Downloading...
From: https://drive.google.com/uc?id=195ltI0Jgg9zsWatxUP4UlPf5sgsY9Rzw
To: /home/terrabot/bk-imp/math/assignment/df_train.parquet
100%|██████████| 12.3M/12.3M [00:01<00:00, 11.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-65WSGzYhFToxureZ-Rhg9POZNzfJSWJ
To: /home/terrabot/bk-imp/math/assignment/df_test.parquet
100%|██████████| 1.42M/1.42M [00:00<00:00, 4.68MB/s]


In [5]:
# Sampling down the dataset to 30% for faster computation
df_train = df_train.sample(frac=0.3, random_state=42)
df_test = df_test.sample(frac=0.3, random_state=42)

In [6]:
results = {
    "louvain": "1xP1HlCeHquUp497Cu5yFsB4hgH2_O7be",
    "label_prop": "1b34ZM0zr0JwtAfA2pFrhugIgxiAI9EZe",
    "greedy_modularity": "16cGBsC9Vh3vhG4TI5JVYE3nB7v-Ufixp",
}

In [7]:
for algorithm in results.keys():
    gdown.download(
        f"https://drive.google.com/uc?id={results[algorithm]}",
        "algorithm_result_df.parquet",
    )

    predict_result_df = pd.read_parquet("algorithm_result_df.parquet")
    predict_result_df = predict_result_df[
        predict_result_df["predicted_overall"].isna() == False
    ]

    df_evaluate = pd.merge(
        predict_result_df, df_test, on=["reviewerID", "asin"], how="inner"
    )
    df_evaluate["predicted_overall"] = df_evaluate["predicted_overall"].astype(
        int
    )
    df_evaluate.head(2)

    ground_truth = df_evaluate.overall.values
    predicted_values = df_evaluate.predicted_overall.values

    mae = mean_absolute_error(ground_truth, predicted_values)

    print(f"Mean Absolute Error (MAE) from algorithm {algorithm}: {mae}")

Downloading...
From: https://drive.google.com/uc?id=1xP1HlCeHquUp497Cu5yFsB4hgH2_O7be
To: /home/terrabot/bk-imp/math/assignment/algorithm_result_df.parquet
100%|██████████| 29.7k/29.7k [00:00<00:00, 502kB/s]


Mean Absolute Error (MAE) from algorithm louvain: 1.3076923076923077


Downloading...
From: https://drive.google.com/uc?id=1b34ZM0zr0JwtAfA2pFrhugIgxiAI9EZe
To: /home/terrabot/bk-imp/math/assignment/algorithm_result_df.parquet
100%|██████████| 29.5k/29.5k [00:00<00:00, 469kB/s]


Mean Absolute Error (MAE) from algorithm label_prop: 1.75


Downloading...
From: https://drive.google.com/uc?id=16cGBsC9Vh3vhG4TI5JVYE3nB7v-Ufixp
To: /home/terrabot/bk-imp/math/assignment/algorithm_result_df.parquet
100%|██████████| 30.1k/30.1k [00:00<00:00, 489kB/s]

Mean Absolute Error (MAE) from algorithm greedy_modularity: 1.125984251968504



