In [280]:
from dateutil.parser import parse as dateparse
from dateutil.relativedelta import relativedelta
import calendar
import itertools
from pytz import timezone
import time

melbourne = timezone('Australia/Melbourne')

# get list of month names
month_names = list(calendar.month_name)[1:]
# get list of day names
day_names = list(calendar.day_name)

In [281]:
def is_time(x):
    try:
        time.strptime(x.strip(), "%H:%M%p")
        return True
    except:
        return False

In [282]:
class Program:
    """A calendar of film screenings split up into themed Seasons"""
    year = None
    seasons = []
    
    def __repr__(self):
        return ('<Program year="' + str(self.year) +
                '" seasons=' + repr(self.seasons) +
                '>')

class Season:
    """A series of film screenings as part of a Program"""
    dates = None
    start = None
    end = None
    title = None
    desc = None
    screenings = []
    
    def __repr__(self):
        return ('<Season dates="'+ self.dates +
                '" start="' + str(self.start) + 
                '" end="' + str(self.end) + 
                '" title="' + self.title + 
                '" desc="' + self.desc +
                '" screenings=' + repr(self.screenings) +
                '>')
    
class Screening:
    """A film screening as part of a Season"""
    date = None
    start_time = None
    end_time = None
    time = None
    title = None
    info = None
    desc = None
    director = None
    minutes = None
    rating = None
    
    def __repr__(self):
        return ('<Screening' + 
                    ' date="' + self.date +
                    '" time="' + self.time +
                    '" title="' + self.title +
                    '" info="' + self.info +
                    '" desc="' + self.desc +
                    '">'
                   )

In [283]:
def process_season(lines):
    """Process a Melbourne Cinematheque season text into a Season object"""
    # first line is the season dates
    season_dates = lines[0].strip()
    # next line is the season title
    season_title = lines[1].strip()
    # next line is the season description
    season_desc = lines[2].strip()
    
    screenings_lines = lines[4:]
    
    screenings = []
    
    # split up into screen nights
    def is_new_screennight(x):
        if len(x.strip()) > 0 and x.split()[0] in day_names:
            is_new_screennight.count+=1
        return is_new_screennight.count

    is_new_screennight.count=0
    
    for key,screennight in itertools.groupby(screenings_lines,is_new_screennight):
        
        # split up to screenings
        def is_new_screening(x):
            if is_time(x):
                is_new_screening.count+=1
            return is_new_screening.count

        is_new_screening.count=0
        
        night_lines = list(screennight)

        # first line of a screen night is the night of screening
        screening_day_month_date = night_lines.pop(0).strip()
        
        for kkey,a_screening in itertools.groupby(night_lines,is_new_screening):
        
            screen_lines = list(a_screening)
            
            screening = Screening()
            screening.date = screening_day_month_date
            screening.time = screen_lines[0].strip()
            screening.title = screen_lines[1].strip()
            screening.info = screen_lines[2].strip()
            screening.desc = screen_lines[3].strip()
            screenings.append(screening)
    
    # create season
    season = Season()
    season.dates = season_dates
    season.title = season_title
    season.desc = season_desc
    season.screenings = screenings
    
    return season

In [284]:
def get_screening_minutes(screening):
    """Return integer minutes running time for a screening"""
    return int(screening.info.split(')')[1].split('–')[0].replace('mins','').strip())

In [285]:
def fix_dates_in_program(program):
    # fix seasons start, end date
    for season in program.seasons:
        # fix screening datetimes
        for screening in season.screenings:
            screening.start_time = melbourne.localize(dateparse(
                screening.date + " " + 
                screening.time + " " + 
                str(program.year)))
            screening.minutes = get_screening_minutes(screening)
            screening.end_time = \
                screening.start_time + relativedelta(minutes=screening.minutes)
        season.start = season.screenings[0].start_time
        season.end = season.screenings[-1].end_time

In [286]:
# split up contents by movie season
def is_new_season(x):
    if len(x.strip()) > 0 and x.split()[0] in month_names:
        is_new_season.count+=1
    return is_new_season.count

is_new_season.count=0

program = Program()
program.year = 2020
    
# process the Melbourne Cinematheque calendar text
with open('calendar.txt') as f:
    
    for key,grp in itertools.groupby(f,is_new_season):
        
        season = process_season(list(grp))
        program.seasons.append(season)
    
    fix_dates_in_program(program)

program

<Program year="2020" seasons=[<Season dates="February 5" start="2020-02-05 18:30:00+11:00" end="2020-02-05 22:45:00+11:00" title="2020 OPENING NIGHT" desc="The opening night of our 2020 program features recent restorations of key works by two of the towering figures of post-World War II European cinema: Bernardo Bertolucci and Jacques Rivette. Both filmmakers emerged as key figures in the 1960s, each betraying and confirming their strong affinity with the history of cinema and other art forms. This program profiles two of their less widely seen but central works from this seminal period, opening with Bertolucci’s appropriately labyrinthine and mercurial adaptation of a short story by Jorge Luis Borges. Released within months of The Conformist, The Spider’s Stratagem demonstrates both the full range of Bertolucci’s work and his characteristic preoccupation with the legacies of history, place and identity. The exquisite recent restoration of Rivette’s La religieuse profiles an extraordin

In [287]:
from ics import Calendar, Event

output_file = "melbourne_cinematheque.ics"

# create new calendar
cal = Calendar()

for season in program.seasons:
    
    # add event for season to calendar
    event = Event()
    event.name = "SEASON: " + season.title
    # make this an all day event so its clearer its a season
    event.begin = melbourne.localize(season.start)
    event.end = season.end
    event.make_all_day()
    event.description = season.desc
    event.location = 'The Capitol, Swanston St, Melbourne'
    cal.events.add(event)
    
    for screening in season.screenings:
        # add event for screening to calendar
        event = Event()
        event.name = "SCREENING: " + screening.title
        event.begin = screening.start_time
        event.end = screening.end_time
        event.description = screening.desc
        event.location = 'The Capitol, Swanston St, Melbourne'
        cal.events.add(event) 

# write to file
with open(output_file, 'w') as f:
    f.write(str(cal))

In [289]:
import pandas as pd
cols = ['date', 'time', 'start_time', 'end_time','minutes', 'title']
data = []
for season in program.seasons:
    for screening in season.screenings:
        data.append((screening.date, screening.time, screening.start_time, screening.end_time, screening.minutes, screening.title))
df = pd.DataFrame(data=data, columns=cols)
df

Unnamed: 0,date,time,start_time,end_time,minutes,title
0,Wednesday February 5,6:30pm,2020-02-05 18:30:00+11:00,2020-02-05 20:10:00+11:00,100,THE SPIDER’S STRATAGEM
1,Wednesday February 5,8:25pm,2020-02-05 20:25:00+11:00,2020-02-05 22:45:00+11:00,140,LA RELIGIEUSE
2,Wednesday February 12,6:30pm,2020-02-12 18:30:00+11:00,2020-02-12 19:59:00+11:00,89,BICYCLE THIEVES
3,Wednesday February 12,8:10pm,2020-02-12 20:10:00+11:00,2020-02-12 21:47:00+11:00,97,MIRACLE IN MILAN
4,Wednesday February 19,6:30pm,2020-02-19 18:30:00+11:00,2020-02-19 20:04:00+11:00,94,THE GARDEN OF THE FINZI-CONTINIS
...,...,...,...,...,...,...
85,Wednesday December 2,7:00pm,2020-12-02 19:00:00+11:00,2020-12-02 20:14:00+11:00,74,CAIRO STATION
86,Wednesday December 2,8:25pm,2020-12-02 20:25:00+11:00,2020-12-02 22:35:00+11:00,130,THE LAND
87,Wednesday December 9,7:00pm,2020-12-09 19:00:00+11:00,2020-12-09 21:13:00+11:00,133,ALEXANDRIA… WHY?
88,Wednesday December 9,9:25pm,2020-12-09 21:25:00+11:00,2020-12-09 23:15:00+11:00,110,MY ONE AND ONLY LOVE
