In [1]:
library(dplyr)
library(ggplot2)
library(tidyr)
library(openai)

# Load the data
OD2526 <- read.csv("oriel-training-year-2526.csv")
OD2425 <- read.csv("oriel-training-year-2425.csv")
OD2324 <- read.csv("oriel-training-year-2324.csv")
OD2223 <- read.csv("oriel-training-year-2223.csv")


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
# Clean OD2526

colnames(OD2526)

OrielDataFor2526 <- OD2526 %>%
  rename(
    Region = Region,
    SubRegion = Sub.Region,
    Sector = Sector,
    ProgramPreference = Programme.Preference,
    ProgramTitle = Programme.Title,
    ProgramDescription = Programme.Description,
    AvailablePlaces = Places.Available,
    EmployerType = Employer.Type,
    EmployerName = Employer.Name,
    OrgSizePrimaryCareOnly = Size.of.organisation..Primary.care.only.,
    HospitalSpecialties = Specialties..Hospital.only.,
    SkilledWorkerVisa = Skilled.worker.visa,
    LicenseCount = Number.of.Licences,
    Salary = Salary....,
    TrainingProvider = Training.Provider,
    WeeklyHours = Hours.per.Week,
    ICBName = ICB.Name,
    PlacementRotationCount = Number.of.Placements.Rotations,
    RotationSetting1 = Practice.Placement.Rotation.Setting.1,
    RotationSetting2 = Practice.Placement.Rotation.Setting.2,
    RotationSetting3 = Practice.Placement.Rotation.Setting.3,
    RotationSetting4 = Practice.Placement.Rotation.Setting.4
  )

KeepColumns2526 <- c("Region","ProgramTitle","ProgramDescription","AvailablePlaces","EmployerType","EmployerName","OrgSizePrimaryCareOnly","SkilledWorkerVisa","Salary","TrainingProvider","WeeklyHours","ICBName","PlacementRotationCount","RotationSetting1","RotationSetting2","RotationSetting3","RotationSetting4")
# Other columns are either irrelevant or redundant.

# OrielDataFor2526[1,]
OrielDataFor2526 <- OrielDataFor2526[KeepColumns2526]
# Expand the data to have one row per available place
ExpandedOrielDataFor2526 <- OrielDataFor2526 %>%
    uncount(AvailablePlaces)
str(ExpandedOrielDataFor2526)

write.csv(ExpandedOrielDataFor2526, "ExpandedOrielDataFor2526.csv")


'data.frame':	4757 obs. of  16 variables:
 $ Region                : chr  "North West" "Yorkshire and the Humber" "East Midlands" "Yorkshire and the Humber" ...
 $ ProgramTitle          : chr  "(Imaan & MENTAL HEALTH HOSPITAL) Lower House Pharmacy + MerseyCare Trust/L11 2SF + L31 1HW" "(Imaan & MENTAL HEALTH) Ridge Pharmacy + Bradford District Care NHS Trust/BD7 3JX+BD9 6DP" "(Imaan & MENTAL HEALTH) Tupton Pharmacy+ Hartington Unit Chesterfield Hosp/S42 6BH+S44 5BL" "(Imaan HC & DRUG/ALCOHOL) Chelmsford Rd Pharmacy + New Vision Unity House/BD7 3JX+BD1 3DN" ...
 $ ProgramDescription    : chr  "This programme consists of a hospital rotation (13 weeks) & Community (39 weeks). Mersey Care offer specialist "| __truncated__ "This programme consists of a Mental Health Hospital rotation (13 weeks) & Community (39 weeks). Trainees will h"| __truncated__ "This programme consists of a hospital rotation (26 weeks) & Community (26 weeks). Experience an exciting, multi"| __truncated__ "This programm

In [3]:
colnames(OD2425)

OrielDataFor2425 <- OD2425 %>%
  rename(
    Region = Region,
    SubRegion = Sub.Region,
    Sector = Sector,
    ProgramPreference = Programme.Preference,
    ProgramTitle = Programme.Title,
    ProgramDescription = Programme.Description,
    AvailablePlaces = Places.Available,
    EmployerType = Employer.Type,
    EmployerName = Employer.Name,
    OrgSizePrimaryCareOnly = Size.of.organisation..Primary.care.only.,
    HospitalSpecialties = Specialties..Hospital.only.,
    SkilledWorkerVisa = Skilled.worker.visa,
    LicenseCount = Number.of.Licences,
    Salary = Salary....,
    TrainingProvider = Training.Provider,
    WeeklyHours = Hours.per.Week,
    ICBName = ICS.Name,
    PlacementRotationCount = Number.of.Placements..3.months.or.more.,
    RotationSetting1 = Practice.Placement.Setting.1,
    RotationSetting2 = Practice.Placement.Setting.2,
    RotationSetting3 = Practice.Placement.Setting.3,
    RotationSetting4 = Practice.Placement.Setting.4
  )

# Formely called ICS but will be named ICB for consistency
KeepColumns2425 <- c("Region","ProgramTitle","ProgramDescription","AvailablePlaces","EmployerType","EmployerName","OrgSizePrimaryCareOnly","SkilledWorkerVisa","Salary","TrainingProvider","WeeklyHours","ICBName","PlacementRotationCount","RotationSetting1","RotationSetting2","RotationSetting3","RotationSetting4")

# Expand the data to have one row per available place
OrielDataFor2425 <- OrielDataFor2425[KeepColumns2425]
ExpandedOrielDataFor2425 <- OrielDataFor2425 %>%
    uncount(AvailablePlaces)

str(ExpandedOrielDataFor2425)
write.csv(ExpandedOrielDataFor2425, "ExpandedOrielDataFor2425.csv")

'data.frame':	3930 obs. of  16 variables:
 $ Region                : chr  "Health Education England Kent, Surrey and Sussex" "Health Education England Kent, Surrey and Sussex" "Health Education England Yorkshire and the Humber" "Health Education England East Midlands" ...
 $ ProgramTitle          : chr  "(Anna Pharmacy) Nima Chemist/Stoneleigh/KT17 2HS" "(Anna Pharmacy) Patsons Chemist/Stoneleigh/KT17 2HS" "(Imaan HC & GP) Abrar Rehman Pharmacy & Ashwell Medical Centre GP /HEEGP/Bradford/ BD8 9DP" "(Imaan HC & GP) Allestree Pharmacy & Balance Street GP /HEEGP/Derby/DE22 2DL & ST14 8JG" ...
 $ ProgramDescription    : chr  "At Anna Pharmacy Group Anna Pharmacy group is a fast-growing regional pharmacy based in the south. We currently"| __truncated__ "At Anna Pharmacy Group Anna Pharmacy group is a fast-growing regional pharmacy based in the south. We currently"| __truncated__ "GP Practice clinical placement (6 months) & Community Pharmacy (6 months) make up this popular, exciting, natio"

In [5]:
OrielDataFor2324 <- OD2324 %>%
  rename(
    Region = Region,
    Sector = Area.Sector.,
    ProgramTitle = Programme.Title,
    ProgramDescription = Programme.Description,
    AvailablePlaces = Places.Available,
    EmployerType = Employer.Type,
    EmployerName = Employer.Name,
    OrgSizePrimaryCareOnly = Size.of.Organisation,
    HospitalSpecialties = Specialties,
    SkilledWorkerVisa = Skilled.worker.visa,
    LicenseCount = No.of.Licences,
    Salary = Salary,
    TrainingProvider = Training.Provider,
    WeeklyHours = Hours.Per.Week,
    PlacementRotationCount = Number.of.Placements,
    RotationSetting1 = Setting.1,
    RotationSetting2 = Setting.2,
    RotationSetting3 = Setting.3,
    RotationSetting4 = Setting.4
  )

colnames(OrielDataFor2324)

KeepColumns2324 <- c("EmployerName","Region","Sector","ProgramTitle","ProgramDescription","AvailablePlaces","EmployerType","EmployerName","OrgSizePrimaryCareOnly","SkilledWorkerVisa","Salary","TrainingProvider","WeeklyHours","PlacementRotationCount","RotationSetting1","RotationSetting2","RotationSetting3","RotationSetting4")
OrielDataFor2324 <- OrielDataFor2324[KeepColumns2324]
# Expand the data to have one row per available place
ExpandedOrielDataFor2324 <- OrielDataFor2324 %>%
    uncount(AvailablePlaces)

unique(OrielDataFor2324$EmployerType)