# Parse Season and Episode Info

---

In [250]:
import json

In [155]:
INPUT_FILE = 'season_info.txt'

with open(INPUT_FILE, 'r') as f:
    lines = [line.strip() for line in f.readlines()]
# lines
    
# Remove all blank lines
lines = [line for line in lines if line != '']

---

## Database Key Constants

In [156]:
SEASON_ID = 10000
EPISODE_ID = 20000
QUOTE_ID = 30000
CHARACTER_ID = 40000
QUOTEE_ID = 50000

---

## Functions

### Database Key Creation

In [183]:
def create_db_id(BASE_ID, item_no):
    x = len(str(BASE_ID)) - len(str(item_no))
    base_item_id = str(BASE_ID)[:x]
    item_id = f'{base_item_id}{item_no}'

    return item_id

create_db_id(EPISODE_ID, 180)

'20180'

### Parse and Format Dates

#### Season Year

In [198]:
def parse_season_date_year(date_year):
    
    air_date_years = []

    # If there is only one year in the date section
    if '-' not in date_year:
        if len(date_year) == 4:
            air_date_years.append(date_year)
        elif len(date_year) == 2:
            date_year = f'19{date_year}'
            air_date_years.append(date_year)
    # If there are two years in the date section
    else:
        years = date_year.split('-')
        start_year = f'19{years[0]}'
        end_year = f'19{years[1]}'
        air_date_years = [start_year, end_year]
    # Return year(s) as strings in a list
    return air_date_years

# parse_date_year('97-98')

#### Episode Full Date

In [199]:
# Parse date format for episodes
def parse_episode_air_date(episode_air_date):

    date_sections = episode_air_date.split('/')
    month = date_sections[0]
    day = date_sections[1]
    year = date_sections[2]

    if len(month) != 2:
        month = f'0{month}'
    if len(day) != 2:
        day = f'0{day}'
    if len(year) != 4:
        year = f'19{year}'

    new_date_sections = [month, day, year]       

    formatted_episode_air_date = '/'.join(new_date_sections) 
    return formatted_episode_air_date

---

## Separate Season and Episode Info

### Season Lines

In [200]:
# Extracts only the lines containing the season info
season_lines = [line for line in lines if 'Season' in line]
season_line_indices = []

# Season line indices (used to find episode lines)
for i, line in enumerate(lines):
    if 'Season' in line:
        season_line_indices.append(i)

### Episode Lines

In [239]:
episode_lines = []

# Find all lines in lines that hold episode info
for i in range(len(season_line_indices)-1):
    # The start of the season's episode info
    start = season_line_indices[i]+1
    end = season_line_indices[i+1]
    # Create a list for that season's episodes
    current_episodes = lines[start:end]
    # Append the list to the master list
    episode_lines.append(current_episodes)

# Episodes in the final season
start = season_line_indices[-1] + 1
current_episodes = lines[start:]
episode_lines.append(current_episodes)

# episode_lines

---

## JSON

### EPISODE INFO

In [240]:
episode_info_json = []

for season_no, season_episodes in enumerate(episode_lines, 1):
    for episode_no, episode_line in enumerate(season_episodes, 1):
        episode_info = episode_line.split()

        # Episode chronological number
        chrono_no = int(episode_info[0])
        
        # Episode DB ID
        episode_id = create_db_id(EPISODE_ID, chrono_no)
        
        # Episode air date
        episode_air_date = episode_info[-1][1:-1]
        formatted_episode_air_date = parse_episode_air_date(episode_air_date)

        # Episode title
        title = ' '.join(episode_info[1:-1])

        # Episode json
        ep_json = {}

        ep_json['episode_id'] = episode_id
        ep_json['episode_no'] = episode_no
        ep_json['season_no'] = season_no
        ep_json['title'] = title
        ep_json['air_date'] = formatted_episode_air_date
        ep_json['script_link'] = ''

        episode_info_json.append(ep_json)

# len(episode_info_json)

### SEASON INFO

In [241]:
season_info_json = []

# Check season lines for season number and air dates
for i, season_line in enumerate(season_lines, 1):
    s = {}
    season_info = season_line.split()

    # SEASON NO
    season_no = i

    # SEASON ID
    season_id = create_db_id(SEASON_ID, season_no)
    
    # AIR DATES
    date = season_info[-1][1:-1]
    air_dates = parse_season_date_year(date)
    
    # Create dict
    s['season_id'] = season_id
    s['season_no'] = season_no
    s['air_dates'] = air_dates
    s['episodes'] = []

    # Add dict to list
    season_info_json.append(s)

# for s in season_info_json:
#     print(s)

#### Add Episodes to Season JSON

In [243]:
for episode in episode_info_json:
    season_no = episode['season_no'] - 1
    season_info_json[season_no]['episodes'].append(episode)

# season_info_json[0]['episodes']

---

## Write JSON to File

In [246]:
SEASON_JSON_FILE = 'seasons.json'
EPISODE_JSON_FILE = 'episodes.json'

with open(SEASON_JSON_FILE, 'w') as f:
    json.dump(season_info_json, f)

