# Suvivor voteoff reactions, by season
> How did contestants in different season respond after their torches were snuffed? This notebook calculates the season average score for how often voted-off castaways acknowledged their tribemates by looking, smiling, gesturing or speaking. 

#### Load Python tools and Jupyter config

In [1]:
import os
import json
import boto3
import pandas as pd
import jupyter_black

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000

---

## Fetch

#### Read season summary from survivoR2py repo

In [3]:
# Limit to US seasons, change data types and limit columns
season_summary_df = pd.read_csv(
    "https://raw.githubusercontent.com/stiles/survivoR2py/main/data/raw/csv/season_summary.csv",
    dtype={"season": str, "episode": str},
).query('version == "US"')[["season", "n_cast", "tribe_setup", "location", "country"]]

In [4]:
season_summary_df["season"] = season_summary_df["season"].str.replace(".0", "")

#### Read vote off log processed in `scripts/01_vote_off_reaction.py`

In [5]:
voteoff_src = pd.read_json(
    "https://stilesdata.com/survivor/survivor_vote_off_reactions.json",
    dtype={"season": str, "vote": str, "episode": str},
)

#### Add season summary details to vote offs

In [6]:
voteoff_df = pd.merge(voteoff_src, season_summary_df, on="season")

---

## Aggregate

#### Mean score over the life of the series

In [8]:
series_score = round(float(voteoff_df["ack_score"].mean()), 2)
series_score

1.69

#### Group by gender and get mean score

In [7]:
gender_scores = (
    (
        voteoff_df.groupby(["gender"])
        .agg({"castaway_id": "count", "ack_score": "mean"})
        .round(2)
    )
    .reset_index()
    .rename(columns={"castaway_id": "count", "ack_score": "mean_score"})
)
gender_scores

Unnamed: 0,gender,count,mean_score
0,Female,353,1.7
1,Male,339,1.68
2,Non-binary,1,3.0


#### Mean score by season

In [9]:
season_scores = (
    voteoff_df.groupby(["season", "season_name", "n_cast", "location", "country"])[
        "ack_score"
    ]
    .mean()
    .round(2)
    .reset_index(name="mean_ack_score")
)

In [10]:
season_scores["series_score"] = series_score

#### Negative score = less acknowledgement in a season

In [11]:
season_scores["season_score_diff"] = (
    season_scores["mean_ack_score"] - season_scores["series_score"]
)

#### Highest acknowledgement

In [12]:
season_scores.sort_values("season_score_diff", ascending=False).head()

Unnamed: 0,season,season_name,n_cast,location,country,mean_ack_score,series_score,season_score_diff
7,16,Survivor: Micronesia,20,"Koror, Palau",Palau,2.93,1.69,1.24
31,38,Survivor: Edge of Extinction,18,"Mamanuca Islands, Fiji",Fiji,2.88,1.69,1.19
36,42,Survivor: 42,18,"Mamanuca Islands, Fiji",Fiji,2.71,1.69,1.02
41,5,Survivor: Thailand,16,"Ko Tarutao, Satun Province, Thailand",Thailand,2.71,1.69,1.02
38,44,Survivor: 44,18,"Mamanuca Islands, Fiji",Fiji,2.69,1.69,1.0


#### Lowest acknowledgement

In [13]:
season_scores.sort_values("season_score_diff", ascending=False).tail()

Unnamed: 0,season,season_name,n_cast,location,country,mean_ack_score,series_score,season_score_diff
14,22,Survivor: Redemption Island,18,"San Juan del Sur, Rivas, Nicaragua",Nicaragua,0.65,1.69,-1.04
43,7,Survivor: Pearl Islands,16,"Pearl Islands, Panama",Panama,0.62,1.69,-1.07
6,15,Survivor: China,16,"Zhelin, Jiujiang, Jiangxi, China",China,0.54,1.69,-1.15
11,2,Survivor: The Australian Outback,16,"Herbert River at Goshen Station, Queensland, A...",Australia,0.0,1.69,-1.69
22,3,Survivor: Africa,16,"Shaba National Reserve, Kenya",Kenya,0.0,1.69,-1.69


---

## Export

In [14]:
# Output paths
csv_output_path = "../data/processed/survivor_voteoff_ack_scores_seasons.csv"
json_output_path = "../data/processed/survivor_voteoff_ack_scores_seasons.json"

In [15]:
# Save season scores to CSV
season_scores.to_csv(csv_output_path, index=False)

In [16]:
# Save season scores to JSON
season_scores.to_json(json_output_path, orient="records", indent=4)

In [17]:
# Upload CSV and JSON to S3
s3_bucket = "stilesdata.com"
s3_csv_key = "survivor/survivor_voteoff_ack_scores_seasons.csv"
s3_json_key = "survivor/survivor_voteoff_ack_scores_seasons.json"

# Initialize boto3 client with environment variables
s3_client = boto3.client(
    "s3",
    aws_access_key_id=os.getenv("MY_AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("MY_AWS_SECRET_ACCESS_KEY"),
    aws_session_token=os.getenv("MY_AWS_SESSION_TOKEN"),
)

In [18]:
# Upload the CSV file
s3_client.upload_file(str(csv_output_path), s3_bucket, s3_csv_key)
print(f"CSV file uploaded to s3://{s3_bucket}/{s3_csv_key}")

CSV file uploaded to s3://stilesdata.com/survivor/survivor_voteoff_ack_scores_seasons.csv


In [19]:
# Upload the JSON file
s3_client.upload_file(str(json_output_path), s3_bucket, s3_json_key)
print(f"JSON file uploaded to s3://{s3_bucket}/{s3_json_key}")

JSON file uploaded to s3://stilesdata.com/survivor/survivor_voteoff_ack_scores_seasons.json
