In [None]:
# Import librarys
import pandas as pd
import numpy as np
#from datetime import time

In [None]:
# Read in sheet
df = pd.read_excel('./data/2026-exam-timetable-excel-version.xlsx')

# Drop first column
df.drop(df.columns[0], axis=1, inplace=True)

# Rename columns to simple letters
df.columns = ['A', 'B', 'C', 'D']

# Drop rows with 'Course', 'Morning', 'Afternoon' in column A
df = df[df.A != 'Course']
df = df[df.A != 'Morning']
df = df[df.A != 'Afternoon']

# Create a new column with the date in datetime format
df['DATE'] = pd.to_datetime(df['A'], format='%d-%m-%Y',  errors='coerce')

# In the Date column, fill in any empty cells with the previous row value
df.DATE = df.DATE.ffill()

# Drop all rows that don’t have at least 3 non-NaN values:
df = df.dropna(thresh=3)

# Remove suffixes from D (exam time) column
df.D = df.D.str.removesuffix('* (approx)')
df.D = df.D.str.removesuffix('*')

# Split D (exam time) column into two columns for start-time and end-time
df[['T1', 'T2']] = df['D'].str.split(' - ', n=1, expand=True)

# Create two columns with the date and the time of the start and finish of exams
df['DT1'] = df.DATE.astype(str) + ' ' + df.T1.astype(str)
df['DT2'] = df.DATE.astype(str) + ' ' + df.T2.astype(str)

# Convert these two columns into datetime objects
df.DT1 = pd.to_datetime(df['DT1'])
df.DT2 = pd.to_datetime(df['DT2'])

# Create a new column with the Timedelta of the exam
df['L'] = df.DT2 - df.DT1 

# Add in columns for the duration of the exams with 25% and 50% extra time
df['L%25'] = df['L'] * 1.25
df['L%50'] = df['L'] * 1.50

# Add in columns with the datetime of the end of these extended exams
df['DT2%25'] = df['DT1'] + df['L%25']
df['DT2%50'] = df['DT1'] + df['L%50']

# Add in columns with figures about but in format of HH:MM
df['T2%25'] = df['DT2%25'].dt.strftime('%H:%M')
df['T2%50'] = df['DT2%50'].dt.strftime('%H:%M')

# Convert Timedelta into format of HH:MM (it doesn't seem to have a strftime function)
df['L1'] = df['L'].apply(lambda x: f'{x.components.hours:02d}:{x.components.minutes:02d}')
df['L1%25'] = df['L%25'].apply(lambda x: f'{x.components.hours:02d}:{x.components.minutes:02d}')
df['L1%50'] = df['L%50'].apply(lambda x: f'{x.components.hours:02d}:{x.components.minutes:02d}')

# Create new columns with subject, level and extra time labels combined
df['A1'] = df['A'] + " - " + df['B']
df['A2'] = df['A1'] + " + 25%"
df['A3'] = df['A1'] + " + 50%"

df.head()

In [None]:
# Create new df for final values
dfe = pd.DataFrame()

# Add in key columns
dfe['Subject'] = df.A
dfe['Level'] = df.B
dfe['Label 1'] = df.A1
dfe['Label 2'] = df.A2
dfe['Label 3'] = df.A3
dfe['Start Datetime'] = df.DT1
dfe['End Datetime'] = df.DT2
dfe['Start Date'] = df.DATE
dfe['Start Date'] = df.DATE
dfe['Start Time'] = df.T1
dfe['End Date'] = df.DATE
dfe['End Time'] = df.T2
dfe['Duration'] = df.L1
dfe['Description'] = df.C

# Add in extra time columns
dfe['Duration (+25%)'] = df['L1%25']
dfe['End Time (+25%)'] = df['T2%25']
dfe['End Datetime (+25%)'] = df['DT2%25']
dfe['Duration (+50%)'] = df['L1%50']
dfe['End Time (+50%)'] = df['T2%50']
dfe['End Datetime (+50%)'] = df['DT2%50']


dfe.head()

In [None]:
dfe.to_csv('./csvs/sqa_exam_dates.csv', index = False)