In [None]:
from google.colab import files
import os
import pandas as pd
import json
from datetime import datetime
import matplotlib.pyplot as plt

Summrize all metadata files

In [None]:
def summarize_metadata(metadata_path):
    rows = []
    for file in os.listdir(metadata_path):
        if file.endswith(".json"):
            with open(os.path.join(metadata_path, file), "r") as f:
                data = json.load(f)
            starts = [datetime.fromisoformat(t.replace("Z", "")) for t in data["calibration_started"]]
            ends = [datetime.fromisoformat(t.replace("Z", "")) for t in data["back_from_calibration"]]
            durations = [(e - s).total_seconds() for s, e in zip(starts, ends)]
            total_time = sum(durations)
            mean_time = total_time / len(durations) if durations else 0
            rows.append({
                "user_id": data["user_id"],
                "video_id": data["video_id"],
                "count_calibrations": len(durations),
                "total_calibration_time": total_time,
                "mean_calibration_time": mean_time
            })
    return pd.DataFrame(rows)

Upload metadata JSON files

In [None]:
print("Please upload ALL metadata JSON files (one or more):")
uploaded_metadata = files.upload()

metadata_dir = "uploaded_metadata"
os.makedirs(metadata_dir, exist_ok=True)
for filename, content in uploaded_metadata.items():
    with open(os.path.join(metadata_dir, filename), "wb") as f:
        f.write(content)

metadata_summary = summarize_metadata(metadata_dir)

Upload performance CSV file

In [None]:
print("\nPlease upload the performance CSV file:")
uploaded_performance = files.upload()

performance_filename = list(uploaded_performance.keys())[0]
performance_path = os.path.join(".", performance_filename)
with open(performance_path, "wb") as f:
    f.write(uploaded_performance[performance_filename])

performance = pd.read_csv(performance_path)

Reshape performance data

In [None]:
melted = performance.melt(
    id_vars=["participant"],
    value_vars=["Merav1_mean", "Merav2_mean", "Merav3_mean"],
    var_name="video_id",
    value_name="performance_score"
)
melted["video_id"] = melted["video_id"].str.replace("_mean", "")
melted.rename(columns={"participant": "user_id"}, inplace=True)


Merge performance + metadata

In [None]:
merged = melted.merge(metadata_summary, on=["user_id", "video_id"], how="left")


Correlation analysis

In [None]:
corrs = merged[["performance_score", "count_calibrations", "total_calibration_time", "mean_calibration_time"]].corr(method="pearson")
print("\nCorrelation matrix:\n", corrs)


 Save and visualize

In [None]:
merged.to_csv("correlation_results.csv", index=False)
print("\nSaved merged results as 'correlation_results.csv'. You can download it below:")

files.download("correlation_results.csv")

# Get the correlation value
corr_value = corrs.loc["total_calibration_time", "performance_score"]

plt.scatter(merged["total_calibration_time"], merged["performance_score"])
plt.xlabel("Total Calibration Time (s)")
plt.ylabel("Performance Score")
plt.title(f"Performance vs Calibration Time (Correlation: {corr_value:.2f})")
plt.show()

In [None]:
print("\nCorrelation with number of calibrations only:")

corr_count = merged[["performance_score", "count_calibrations"]].corr(method="pearson")
print(corr_count)

corr_value_count = corr_count.loc["performance_score", "count_calibrations"]
plt.scatter(merged["count_calibrations"], merged["performance_score"])
plt.xlabel("Number of Calibrations")
plt.ylabel("Performance Score")
plt.title(f"Performance vs Number of Calibrations (Correlation: {corr_value_count:.2f})")
plt.show()

Survey data summary


In [None]:
# === Step 1: Upload all survey JSON files ===
print("Please upload ALL survey_*.json files:")
uploaded_files = files.upload()

# === Step 2: Save uploaded files temporarily ===
survey_dir = "uploaded_surveys"
os.makedirs(survey_dir, exist_ok=True)

for filename, content in uploaded_files.items():
    with open(os.path.join(survey_dir, filename), "wb") as f:
        f.write(content)

# === Step 3: Parse all JSONs and build a summary ===
rows = []
for file in os.listdir(survey_dir):
    if file.endswith(".json"):
        with open(os.path.join(survey_dir, file), "r") as f:
            data = json.load(f)
        rows.append({
            "user_id": data.get("user_id"),
            "video_id": data.get("video_id"),
            "EnjoymentRating": data.get("EnjoymentRating"),
            "FutureActivityRating": data.get("FutureActivityRating")
        })

# === Step 4: Create and sort DataFrame ===
df = pd.DataFrame(rows)
df.sort_values(by=["user_id", "video_id"], inplace=True)
print("\nCombined survey table:\n")
print(df)

# === Step 5: Save as CSV and download ===
output_file = "survey_summary.csv"
df.to_csv(output_file, index=False)
print(f"\nSaved summary as {output_file}")
files.download(output_file)

Survey and performance correlation analysis

In [None]:
# Merge the survey data with the existing merged DataFrame
merged_with_survey = merged.merge(df, on=["user_id", "video_id"], how="left")

# Calculate correlations including survey ratings
corrs_with_survey = merged_with_survey[["performance_score", "EnjoymentRating", "FutureActivityRating"]].corr(method="pearson")

print("\nCorrelation matrix between performance and survey ratings:\n", corrs_with_survey)

# Get the correlation values
corr_enjoyment = corrs_with_survey.loc["performance_score", "EnjoymentRating"]
corr_future_activity = corrs_with_survey.loc["performance_score", "FutureActivityRating"]

# Add plots for performance vs survey ratings
plt.scatter(merged_with_survey["EnjoymentRating"], merged_with_survey["performance_score"])
plt.xlabel("Enjoyment Rating")
plt.ylabel("Performance Score")
plt.title(f"Performance vs Enjoyment Rating (Correlation: {corr_enjoyment:.2f})")
plt.show()

plt.scatter(merged_with_survey["FutureActivityRating"], merged_with_survey["performance_score"])
plt.xlabel("Future Activity Rating")
plt.ylabel("Performance Score")
plt.title(f"Performance vs Future Activity Rating (Correlation: {corr_future_activity:.2f})")
plt.show()