In [None]:
!pip install pyreadr

In [None]:
!sudo apt-get install python3-dev graphviz libgraphviz-dev pkg-config

In [None]:
!pip install pygraphviz

In [None]:
import pyreadr
import networkx as nx
import graphviz
from dataclasses import dataclass

In [None]:
pyreadr.download_file(
    "https://github.com/doehm/survivoR/blob/master/data/castaways.rda?raw=true",
    "castaways.rda",
)

In [None]:
result = pyreadr.read_r("castaways.rda")

In [None]:
df = result["castaways"]
df = df[df["version"] == "US"]

In [None]:
# df[df["version"]=="US"][["castaway_id", "state"]].drop_duplicates()["state"].value_counts()
# df[["castaway_id", "full_name"]].value_counts().head(20)

In [None]:
seasons = df[["version_season", "season_name", "season"]].drop_duplicates()
seasons["season"] = seasons["season"].astype(int)
g = nx.DiGraph()
for s in seasons.itertuples():
    l = f"S{s.season}: {s.season_name.replace('Survivor:', '').strip()}"
    g.add_node(s.season, label=l)

In [None]:
@dataclass
class Castaway(object):
    id: str
    name: str
    season: int


# there are duplicates because of some were brought back to the same season
survivors = df[
    ["castaway_id", "full_name", "version_season", "season_name", "season"]
].drop_duplicates()
survivors["season"] = survivors["season"].astype(int)
prev_survivor = None
for s in survivors.sort_values(["castaway_id", "season"]).itertuples():
    curr_survivor = Castaway(s.castaway_id, s.full_name, s.season)
    if prev_survivor and prev_survivor.id == curr_survivor.id:
        g.add_edge(prev_survivor.season, curr_survivor.season)
    prev_survivor = curr_survivor

In [None]:
nx.nx_agraph.write_dot(g, "survivor.dot")

In [None]:
with open("survivor.dot") as f:
    dot_txt = f.read()
    print(dot_txt)
    dot = graphviz.Source(dot_txt, format="png")
dot.view()

In [None]:
edgelist = nx.to_pandas_edgelist(g)
labels = nx.get_node_attributes(g, "label")
edgelist["source"] = edgelist["source"].map(labels)
edgelist["target"] = edgelist["target"].map(labels)
edgelist = edgelist.rename(columns={"source": "Returnees From", "target": "Season"})
out_table = edgelist.groupby("Season").agg(list)
out_table["Returnees From"] = out_table["Returnees From"].str.join(", ")
out_table = out_table.sort_values("Season")
out_table

In [None]:
print(out_table.to_markdown())