In [2]:
# For subject listing in schools

import pandas 
import webbrowser
import os

# Read the dataset into a data table using Pandas
df = pandas.read_csv("subjects-offered.csv")

# Remove all primary school names if they contain the word 'Primary'
# While some schools are still left as names do not contain 'Primary' eg. 'AI TONG SCHOOL'
# They will be further removed in Excel when we merge them with the main dataset
df = df[~df['school_name'].isin(['PRIMARY'])]

# Combine all subjects from the same school into one row
df = df.groupby(["school_name"])['subject_desc'].apply(lambda x: ', '.join(x)).reset_index()

# Save combined data to a new file
df.to_csv("subjects-offered-clean.csv")

In [3]:
# For MOE programmes in schools

import pandas 
import webbrowser
import os

# Read the dataset into a data table using Pandas
df = pandas.read_csv("moe-programmes.csv")

# Remove all primary school names if they contain the word 'Primary'
# While some schools are still left as names do not contain 'Primary' eg. 'AI TONG SCHOOL'
# They will be further removed in Excel when we merge them with the main dataset
df = df[~df['school_name'].isin(['PRIMARY'])]

# Combine all programmes from the same school into one row
df = df.groupby(["school_name"])['moe_programme_desc'].apply(lambda x: ', '.join(x)).reset_index()

# Save combined data to a new file
df.to_csv("moe-programmes-clean.csv")

                           school_name  \
0       AHMAD IBRAHIM SECONDARY SCHOOL   
1            ANDERSON SECONDARY SCHOOL   
2                 ANGLICAN HIGH SCHOOL   
3         ANGLO-CHINESE JUNIOR COLLEGE   
4   ANGLO-CHINESE SCHOOL (INDEPENDENT)   
5      BUKIT PANJANG GOVT. HIGH SCHOOL   
6                 CATHOLIC HIGH SCHOOL   
7              CATHOLIC JUNIOR COLLEGE   
8        CEDAR GIRLS' SECONDARY SCHOOL   
9                  CHIJ KATONG CONVENT   
10          CHIJ SECONDARY (TOA PAYOH)   
11     CHIJ ST. NICHOLAS GIRLS' SCHOOL   
12      CHUNG CHENG HIGH SCHOOL (MAIN)   
13       COMMONWEALTH SECONDARY SCHOOL   
14              CRESCENT GIRLS' SCHOOL   
15                  DUNMAN HIGH SCHOOL   
16               EUNOIA JUNIOR COLLEGE   
17               HWA CHONG INSTITUTION   
18       JURONG PIONEER JUNIOR COLLEGE   
19        JURONG WEST SECONDARY SCHOOL   

                                   moe_programme_desc  
0                            ENHANCED MUSIC PROGRAMME  
1   E

In [9]:
# For CCAs in schools

import pandas 
import webbrowser
import os

# Read the dataset into a data table using Pandas
df = pandas.read_csv("co-curricular-activities-ccas.csv")

# Remove all primary school names if they contain the word 'Primary'
# While some schools are still left as names do not contain 'Primary' eg. 'AI TONG SCHOOL'
# They will be further removed in Excel when we merge them with the main dataset
df = df[~df['school_name'].isin(['PRIMARY'])]

# Combine all CCAs of the same type from the same school into one row
df = df.groupby(["school_name", 'cca_grouping_desc'])['cca_generic_name'].apply(lambda x: ', '.join(x)).reset_index()

# Pivot the table such that each type of CCA is a column in itself
#              | CLUBS & SOCIETIES | PHYSICAL SPORTS | UNIFORMED GROUPS | PERFORMING ARTS | OTHERS
# School Name  |     A, B, C       |   D, E, F       |     G, H, I      |    J, K, L      |  M, N
df = df.pivot(index='school_name', columns='cca_grouping_desc', values='cca_generic_name')

# Save combined data to a new file
df.to_csv("co-curricular-activities-ccas-clean.csv")

cca_grouping_desc                                                 CLUBS AND SOCIETIES  \
school_name                                                                             
ADMIRALTY PRIMARY SCHOOL            ENGLISH LANGUAGE, DRAMA AND DEBATING, INFOCOMM...   
ADMIRALTY SECONDARY SCHOOL                                                   ROBOTICS   
AHMAD IBRAHIM PRIMARY SCHOOL                                                 ROBOTICS   
AHMAD IBRAHIM SECONDARY SCHOOL                                             GREEN CLUB   
AI TONG SCHOOL                                                               ROBOTICS   
ALEXANDRA PRIMARY SCHOOL            CHINESE CULTURE AND LANGUAGE, ROBOTICS, INFOCO...   
ANCHOR GREEN PRIMARY SCHOOL         ROBOTICS, INFOCOMM TECHNOLOGY (MEDIA PRODUCTIO...   
ANDERSON PRIMARY SCHOOL              ROBOTICS, INFOCOMM TECHNOLOGY (MEDIA PRODUCTION)   
ANDERSON SECONDARY SCHOOL           INFOCOMM TECHNOLOGY (MEDIA PRODUCTION), BIOLOG...   
ANDERSON SERANGOON JU