# Project name

#### Load Python tools and Jupyter config

In [173]:
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
from tqdm.notebook import tqdm

In [174]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [175]:
today = pd.Timestamp("today").strftime("%Y%m%d")

---

## Read data

#### Headers

In [20]:
season_urls = {
    "Survivor: Borneo": "https://survivor.fandom.com/wiki/Survivor:_Borneo",
    "Survivor: The Australian Outback": "https://survivor.fandom.com/wiki/Survivor:_The_Australian_Outback",
    "Survivor: Africa": "https://survivor.fandom.com/wiki/Survivor:_Africa",
    "Survivor: Marquesas": "https://survivor.fandom.com/wiki/Survivor:_Marquesas",
    "Survivor: Thailand": "https://survivor.fandom.com/wiki/Survivor:_Thailand",
    "Survivor: The Amazon": "https://survivor.fandom.com/wiki/Survivor:_The_Amazon",
    "Survivor: Pearl Islands": "https://survivor.fandom.com/wiki/Survivor:_Pearl_Islands",
    "Survivor: All-Stars": "https://survivor.fandom.com/wiki/Survivor:_All-Stars",
    "Survivor: Vanuatu": "https://survivor.fandom.com/wiki/Survivor:_Vanuatu",
    "Survivor: Palau": "https://survivor.fandom.com/wiki/Survivor:_Palau",
    "Survivor: Guatemala": "https://survivor.fandom.com/wiki/Survivor:_Guatemala",
    "Survivor: Panama": "https://survivor.fandom.com/wiki/Survivor:_Panama",
    "Survivor: Cook Islands": "https://survivor.fandom.com/wiki/Survivor:_Cook_Islands",
    "Survivor: Fiji": "https://survivor.fandom.com/wiki/Survivor:_Fiji",
    "Survivor: China": "https://survivor.fandom.com/wiki/Survivor:_China",
    "Survivor: Micronesia": "https://survivor.fandom.com/wiki/Survivor:_Micronesia",
    "Survivor: Gabon": "https://survivor.fandom.com/wiki/Survivor:_Gabon",
    "Survivor: Tocantins": "https://survivor.fandom.com/wiki/Survivor:_Tocantins",
    "Survivor: Samoa": "https://survivor.fandom.com/wiki/Survivor:_Samoa",
    "Survivor: Heroes vs. Villains": "https://survivor.fandom.com/wiki/Survivor:_Heroes_vs._Villains",
    "Survivor: Nicaragua": "https://survivor.fandom.com/wiki/Survivor:_Nicaragua",
    "Survivor: Redemption Island": "https://survivor.fandom.com/wiki/Survivor:_Redemption_Island",
    "Survivor: South Pacific": "https://survivor.fandom.com/wiki/Survivor:_South_Pacific",
    "Survivor: One World": "https://survivor.fandom.com/wiki/Survivor:_One_World",
    "Survivor: Philippines": "https://survivor.fandom.com/wiki/Survivor:_Philippines",
    "Survivor: Caramoan": "https://survivor.fandom.com/wiki/Survivor:_Caramoan",
    "Survivor: Blood vs. Water": "https://survivor.fandom.com/wiki/Survivor:_Blood_vs._Water",
    "Survivor: Cagayan": "https://survivor.fandom.com/wiki/Survivor:_Cagayan",
    "Survivor: San Juan del Sur": "https://survivor.fandom.com/wiki/Survivor:_San_Juan_del_Sur",
    "Survivor: Worlds Apart": "https://survivor.fandom.com/wiki/Survivor:_Worlds_Apart",
    "Survivor: Cambodia": "https://survivor.fandom.com/wiki/Survivor:_Cambodia",
    "Survivor: Kaôh Rōng": "https://survivor.fandom.com/wiki/Survivor:_Kaôh_Rōng",
    "Survivor: Millennials vs. Gen X": "https://survivor.fandom.com/wiki/Survivor:_Millennials_vs._Gen_X",
    "Survivor: Game Changers": "https://survivor.fandom.com/wiki/Survivor:_Game_Changers",
    "Survivor: Heroes vs. Healers vs. Hustlers": "https://survivor.fandom.com/wiki/Survivor:_Heroes_vs._Healers_vs._Hustlers",
    "Survivor: Ghost Island": "https://survivor.fandom.com/wiki/Survivor:_Ghost_Island",
    "Survivor: David vs. Goliath": "https://survivor.fandom.com/wiki/Survivor:_David_vs._Goliath",
    "Survivor: Edge of Extinction": "https://survivor.fandom.com/wiki/Survivor:_Edge_of_Extinction",
    "Survivor: Island of the Idols": "https://survivor.fandom.com/wiki/Survivor:_Island_of_the_Idols",
    "Survivor: Winners at War": "https://survivor.fandom.com/wiki/Survivor:_Winners_at_War",
    "Survivor 41": "https://survivor.fandom.com/wiki/Survivor_41",
    "Survivor 42": "https://survivor.fandom.com/wiki/Survivor_42",
    "Survivor 43": "https://survivor.fandom.com/wiki/Survivor_43",
    "Survivor 44": "https://survivor.fandom.com/wiki/Survivor_44",
    "Survivor 45": "https://survivor.fandom.com/wiki/Survivor_45",
    "Survivor 46": "https://survivor.fandom.com/wiki/Survivor_46",
}

In [179]:
seasons_df = pd.DataFrame(list(season_urls.items()), columns=["season", "url"])

In [None]:
urls = seasons_df["url"].to_list()

In [185]:
urls

['https://survivor.fandom.com/wiki/Survivor:_Borneo',
 'https://survivor.fandom.com/wiki/Survivor:_The_Australian_Outback',
 'https://survivor.fandom.com/wiki/Survivor:_Africa',
 'https://survivor.fandom.com/wiki/Survivor:_Marquesas',
 'https://survivor.fandom.com/wiki/Survivor:_Thailand',
 'https://survivor.fandom.com/wiki/Survivor:_The_Amazon',
 'https://survivor.fandom.com/wiki/Survivor:_Pearl_Islands',
 'https://survivor.fandom.com/wiki/Survivor:_All-Stars',
 'https://survivor.fandom.com/wiki/Survivor:_Vanuatu',
 'https://survivor.fandom.com/wiki/Survivor:_Palau',
 'https://survivor.fandom.com/wiki/Survivor:_Guatemala',
 'https://survivor.fandom.com/wiki/Survivor:_Panama',
 'https://survivor.fandom.com/wiki/Survivor:_Cook_Islands',
 'https://survivor.fandom.com/wiki/Survivor:_Fiji',
 'https://survivor.fandom.com/wiki/Survivor:_China',
 'https://survivor.fandom.com/wiki/Survivor:_Micronesia',
 'https://survivor.fandom.com/wiki/Survivor:_Gabon',
 'https://survivor.fandom.com/wiki/Sur

In [None]:
src_all['episode_name'] = src_all['episode_name'].str.replace('"', '')

In [145]:
src_all['reward'] = src_all['reward'].str.replace(r'\d+$', '', regex=True).fillna('')
src_all['immunity'] = src_all['immunity'].str.replace(r'\d+$', '', regex=True).fillna('')

In [146]:
src_all['air_date'] = pd.to_datetime(src_all['air_date'])
src_all['year'] = pd.to_datetime(src_all['air_date']).dt.year

In [147]:
src_all[['eliminated', 'vote']] = src_all['eliminated_vote'].str.replace(')', '').str.split('(', expand=True)

In [148]:
src_all[['finish', 'day']] = src_all['finish'].str.replace(' Voted Out', '').str.split('Day ', expand=True)

In [149]:
src_all['viewers_millions'] = src_all['viewers_millions'].str.split('[', expand=True)[0]
src_all['ratings_src'] = src_all['ratings'].str.split('[', expand=True)[0]
src_all['ratings'] = src_all['ratings_src'].str.split('/', expand=True)[0]
src_all['share'] = src_all['ratings_src'].str.split('/', expand=True)[1]

In [168]:
src_all['season_name'] = src_all['wiki_url'].str.replace('Survivor:', '').str.replace('_', ' ').str.replace('https://survivor.fandom.com/wiki/', '')

In [176]:
df = src_all[
    [
        "episode_no",
        "episode_name",
        "air_date",
        "season_name",
        "reward",
        "immunity",
        "finish",
        "viewers_millions",
        "ratings",
        "year",
        "eliminated",
        "vote",
        "day",
        "share",
        "wiki_url",
    ]
]

In [177]:
df.head()

Unnamed: 0,episode_no,episode_name,air_date,season_name,reward,immunity,finish,viewers_millions,ratings,year,eliminated,vote,day,share,wiki_url
0,1,May the Best Generation Win,2016-09-21,Borneo,Vanua,Vanua,1st,9.46,2.3,2016,Rachel,5-3-1-1,4,8,https://survivor.fandom.com/wiki/Survivor:_Borneo
1,2,Love Goggles,2016-09-28,Borneo,Takali,Takali,2nd,9.16,2.1,2016,Mari,7-3,7,8,https://survivor.fandom.com/wiki/Survivor:_Borneo
2,3,Your Job Is Recon,2016-10-05,Borneo,Vanua,Vanua,3rd,8.52,1.8,2016,Paul,6-3,10,7,https://survivor.fandom.com/wiki/Survivor:_Borneo
3,4,Who's the Sucker at the Table?,2016-10-12,Borneo,Takali,Vanua,4th,9.06,2.0,2016,Lucy,2-1-02,12,8,https://survivor.fandom.com/wiki/Survivor:_Borneo
4,5,Idol Search Party,2016-10-19,Borneo,,Ikabula,5th,8.59,1.9,2016,CeCe,4-1,15,7,https://survivor.fandom.com/wiki/Survivor:_Borneo


## Exports

#### JSON

In [17]:
# df.to_json(
#     f"data/processed/NAME.json",
#     indent=4,
#     orient="records",
# )

#### CSV

In [None]:
# df.to_csv(
#     f"data/processed/NAME.csv", index=False
# )

In [None]:
seasons_df.to_csv('data/raw/season_names_urls.csv', index=False)

#### GeoJSON

In [19]:
# gdf.to_file(
#     f"data/processed/NAME.geojson",
#     driver="GeoJSON",
# )