In [1]:
import json
import pandas as pd
from sodapy import Socrata

In [2]:
# Import the tokens to access the API
tokens = json.loads(open("./secret/tokens.json", "r").read())
app_token = tokens["app_token"]
secret_token = tokens["secret_token"]

In [3]:
def pull_data(source_url: str, identifier: str):
    """
    Pulls data using the Socrata API.

    Parameters:
        source_url (str): The URL of the source.
        identifier (str): The identifier of the dataset.
    """
    # Calling Socrata
    client = Socrata(source_url, app_token=app_token)
    client.timeout = 60

    # Save Records
    records = []
    offset = 0
    size = 10**5
    total_records = int(client.get(dataset_identifier=identifier, select="COUNT(*)")[0]["COUNT"])
    while len(records) != total_records:
        records.extend(client.get(dataset_identifier=identifier, offset=offset, limit=size))
        offset += size
    return (records, total_records)

In [4]:
# Pull data
source_url = "data.cityofnewyork.us"
ela_identifier = "iebs-5yhr"
ela_data, ela_total_records = pull_data(source_url, ela_identifier)

math_identifier = "74kb-55u9"
math_data, math_total_records = pull_data(source_url, math_identifier)

In [5]:
# Double Checking
print(len(ela_data) == ela_total_records)
print(len(math_data) == math_total_records)

True
True


In [6]:
def fix_data(data: list):
    """
    Takes in a list of records and returns a DataFrame with the relevant columns.

    Parameters:
        data (list): A list of records.
    """
    df = pd.DataFrame(data)
    new_df = df[(df.report_category == "School") & (df.grade != "All Grades") & (df.mean_scale_score != "s")].reset_index(drop=True)
    new_df.drop(columns="report_category", inplace=True)
    return new_df

In [7]:
ela_df = fix_data(ela_data)
math_df = fix_data(math_data)

In [8]:
ela_df.head()

Unnamed: 0,geographic_subdivision,grade,year,category,number_tested,mean_scale_score,level_1,level_1_1,level_2,level_2_1,level_3,level_3_1,level_4,level_4_1,level_3_4,level_3_4_1,school_name
0,01M015,3,2023,All Students,24,455,4,16.7,5,20.8,11,45.8,4,16.7,15,62.5,P.S. 015 ROBERTO CLEMENTE
1,01M015,4,2023,All Students,17,454,1,5.9,6,35.3,8,47.1,2,11.8,10,58.8,P.S. 015 ROBERTO CLEMENTE
2,01M015,5,2023,All Students,30,441,10,33.3,11,36.7,7,23.3,2,6.7,9,30.0,P.S. 015 ROBERTO CLEMENTE
3,01M015,3,2022,All Students,21,594,4,19.0,12,57.1,4,19.0,1,4.8,5,23.8,P.S. 015 ROBERTO CLEMENTE
4,01M015,4,2022,All Students,30,596,6,20.0,14,46.7,5,16.7,5,16.7,10,33.3,P.S. 015 ROBERTO CLEMENTE


In [9]:
math_df.head()

Unnamed: 0,geographic_division,grade,year,student_category,number_tested,mean_scale_score,num_level_1,pct_level_1,num_level_2,pct_level_2,num_level_3,pct_level_3,num_level_4,pct_level_4,num_level_3_and_4,pct_level_3_and_4,school_name
0,01M015,3,2023,All Students,27,447,6,22.2,9,33.3,7,25.9,5,18.5,12,44.4,P.S. 015 ROBERTO CLEMENTE
1,01M015,4,2023,All Students,23,445,7,30.4,3,13.0,12,52.2,1,4.3,13,56.5,P.S. 015 ROBERTO CLEMENTE
2,01M015,5,2023,All Students,30,432,14,46.7,11,36.7,5,16.7,0,0.0,5,16.7,P.S. 015 ROBERTO CLEMENTE
3,01M015,3,2022,All Students,21,583,12,57.1,4,19.0,5,23.8,0,0.0,5,23.8,P.S. 015 ROBERTO CLEMENTE
4,01M015,4,2022,All Students,30,591,10,33.3,13,43.3,5,16.7,2,6.7,7,23.3,P.S. 015 ROBERTO CLEMENTE


In [10]:
# Fix the column names for ELA
new_columns_ela = {
    "geographic_subdivision": "school_code",
    "category": "student_type",
    "number_tested": "ela_number_tested",
    "mean_scale_score": "ela_avg_of_total_students_tested",
    "level_1": "ela_lvl_one_count",
    "level_1_1": "ela_lvl_one_pct",
    "level_2": "ela_lvl_two_count",
    "level_2_1": "ela_lvl_two_pct",
    "level_3": "ela_lvl_three_count",
    "level_3_1": "ela_lvl_three_pct",
    "level_4": "ela_lvl_four_count",
    "level_4_1": "ela_lvl_four_pct",
    "level_3_4": "ela_lvl_three_and_four_count",
    "level_3_4_1": "ela_lvl_three_and_four_pct"
}

ela_df.rename(columns=new_columns_ela, inplace=True)
ela_df.head()

Unnamed: 0,school_code,grade,year,student_type,ela_number_tested,ela_avg_of_total_students_tested,ela_lvl_one_count,ela_lvl_one_pct,ela_lvl_two_count,ela_lvl_two_pct,ela_lvl_three_count,ela_lvl_three_pct,ela_lvl_four_count,ela_lvl_four_pct,ela_lvl_three_and_four_count,ela_lvl_three_and_four_pct,school_name
0,01M015,3,2023,All Students,24,455,4,16.7,5,20.8,11,45.8,4,16.7,15,62.5,P.S. 015 ROBERTO CLEMENTE
1,01M015,4,2023,All Students,17,454,1,5.9,6,35.3,8,47.1,2,11.8,10,58.8,P.S. 015 ROBERTO CLEMENTE
2,01M015,5,2023,All Students,30,441,10,33.3,11,36.7,7,23.3,2,6.7,9,30.0,P.S. 015 ROBERTO CLEMENTE
3,01M015,3,2022,All Students,21,594,4,19.0,12,57.1,4,19.0,1,4.8,5,23.8,P.S. 015 ROBERTO CLEMENTE
4,01M015,4,2022,All Students,30,596,6,20.0,14,46.7,5,16.7,5,16.7,10,33.3,P.S. 015 ROBERTO CLEMENTE


In [11]:
# Fix the column names for Math
new_columns_math = {
    "geographic_division": "school_code",
    "student_category": "student_type",
    "number_tested": "math_number_tested",
    "mean_scale_score": "math_avg_of_total_students_tested",
    "num_level_1": "math_lvl_one_count",
    "pct_level_1": "math_lvl_one_pct",
    "num_level_2": "math_lvl_two_count",
    "pct_level_2": "math_lvl_two_pct",
    "num_level_3": "math_lvl_three_count",
    "pct_level_3": "math_lvl_three_pct",
    "num_level_4": "math_lvl_four_count",
    "pct_level_4": "math_lvl_four_pct",
    "num_level_3_and_4": "math_lvl_three_and_four_count",
    "pct_level_3_and_4": "math_lvl_three_and_four_pct"
}

math_df.rename(columns=new_columns_math, inplace=True)
math_df.head()

Unnamed: 0,school_code,grade,year,student_type,math_number_tested,math_avg_of_total_students_tested,math_lvl_one_count,math_lvl_one_pct,math_lvl_two_count,math_lvl_two_pct,math_lvl_three_count,math_lvl_three_pct,math_lvl_four_count,math_lvl_four_pct,math_lvl_three_and_four_count,math_lvl_three_and_four_pct,school_name
0,01M015,3,2023,All Students,27,447,6,22.2,9,33.3,7,25.9,5,18.5,12,44.4,P.S. 015 ROBERTO CLEMENTE
1,01M015,4,2023,All Students,23,445,7,30.4,3,13.0,12,52.2,1,4.3,13,56.5,P.S. 015 ROBERTO CLEMENTE
2,01M015,5,2023,All Students,30,432,14,46.7,11,36.7,5,16.7,0,0.0,5,16.7,P.S. 015 ROBERTO CLEMENTE
3,01M015,3,2022,All Students,21,583,12,57.1,4,19.0,5,23.8,0,0.0,5,23.8,P.S. 015 ROBERTO CLEMENTE
4,01M015,4,2022,All Students,30,591,10,33.3,13,43.3,5,16.7,2,6.7,7,23.3,P.S. 015 ROBERTO CLEMENTE


In [12]:
def fix_dtype(df: pd.DataFrame, mode: bool):
    """
    Fixes the data types of the DataFrame.

    Parameters:
        df (pd.DataFrame): The DataFrame to fix.
        mode (bool): If True, the function will fix the data types for the ELA DataFrame otherwise it will fix the data types for the Math DataFrame.
    """
    if mode:
        code = "ela"
    else:
        code = "math"

    # Converting object values
    df["grade"] = df["grade"].astype("int")
    df[f"{code}_number_tested"] = df[f"{code}_number_tested"].astype("int")
    df[f"{code}_avg_of_total_students_tested"] = df[f"{code}_avg_of_total_students_tested"].astype("int")
    df[f"{code}_lvl_one_count"] = df[f"{code}_lvl_one_count"].astype("int")
    df[f"{code}_lvl_two_count"] = df[f"{code}_lvl_two_count"].astype("int")
    df[f"{code}_lvl_three_count"] = df[f"{code}_lvl_three_count"].astype("int")
    df[f"{code}_lvl_four_count"] = df[f"{code}_lvl_four_count"].astype("int")
    df[f"{code}_lvl_three_and_four_count"] = df[f"{code}_lvl_three_and_four_count"].astype("int")

    # Converting percent values
    df[f"{code}_lvl_one_pct"] = df[f"{code}_lvl_one_pct"].astype("float")
    df[f"{code}_lvl_two_pct"] = df[f"{code}_lvl_two_pct"].astype("float")
    df[f"{code}_lvl_three_pct"] = df[f"{code}_lvl_three_pct"].astype("float")
    df[f"{code}_lvl_four_pct"] = df[f"{code}_lvl_four_pct"].astype("float")
    df[f"{code}_lvl_three_and_four_pct"] = df[f"{code}_lvl_three_and_four_pct"].astype("float")

    # Fix the year column
    df["year"] = pd.to_datetime(df["year"], format='%Y')
    return df

In [13]:
ela_df = fix_dtype(ela_df, True)
math_df = fix_dtype(math_df, False)

In [14]:
# Dataframe Information
ela_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 321389 entries, 0 to 321388
Data columns (total 17 columns):
 #   Column                            Non-Null Count   Dtype         
---  ------                            --------------   -----         
 0   school_code                       321389 non-null  object        
 1   grade                             321389 non-null  int32         
 2   year                              321389 non-null  datetime64[ns]
 3   student_type                      321389 non-null  object        
 4   ela_number_tested                 321389 non-null  int32         
 5   ela_avg_of_total_students_tested  321389 non-null  int32         
 6   ela_lvl_one_count                 321389 non-null  int32         
 7   ela_lvl_one_pct                   321389 non-null  float64       
 8   ela_lvl_two_count                 321389 non-null  int32         
 9   ela_lvl_two_pct                   321389 non-null  float64       
 10  ela_lvl_three_count             

In [15]:
math_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 317385 entries, 0 to 317384
Data columns (total 17 columns):
 #   Column                             Non-Null Count   Dtype         
---  ------                             --------------   -----         
 0   school_code                        317385 non-null  object        
 1   grade                              317385 non-null  int32         
 2   year                               317385 non-null  datetime64[ns]
 3   student_type                       317385 non-null  object        
 4   math_number_tested                 317385 non-null  int32         
 5   math_avg_of_total_students_tested  317385 non-null  int32         
 6   math_lvl_one_count                 317385 non-null  int32         
 7   math_lvl_one_pct                   317385 non-null  float64       
 8   math_lvl_two_count                 317385 non-null  int32         
 9   math_lvl_two_pct                   317385 non-null  float64       
 10  math_lvl_three_count

In [16]:
def get_school_borough(code: str):
    """
    Takes a code and returns the borough.
    """
    district = int(code[:2])
    if district in range(1, 7):
        return "Manhattan"
    elif district in range(7, 13):
        return "Bronx"
    elif district in range(13, 24) or district == 32:
        return "Brooklyn"
    elif district in range(24, 31):
        return "Queens"
    else:
        return "Staten Island"

In [17]:
# Get the Boroughs
ela_df["borough"] = ela_df["school_code"].apply(get_school_borough)
math_df["borough"] = math_df["school_code"].apply(get_school_borough)

In [18]:
ela_df.head()

Unnamed: 0,school_code,grade,year,student_type,ela_number_tested,ela_avg_of_total_students_tested,ela_lvl_one_count,ela_lvl_one_pct,ela_lvl_two_count,ela_lvl_two_pct,ela_lvl_three_count,ela_lvl_three_pct,ela_lvl_four_count,ela_lvl_four_pct,ela_lvl_three_and_four_count,ela_lvl_three_and_four_pct,school_name,borough
0,01M015,3,2023-01-01,All Students,24,455,4,16.7,5,20.8,11,45.8,4,16.7,15,62.5,P.S. 015 ROBERTO CLEMENTE,Manhattan
1,01M015,4,2023-01-01,All Students,17,454,1,5.9,6,35.3,8,47.1,2,11.8,10,58.8,P.S. 015 ROBERTO CLEMENTE,Manhattan
2,01M015,5,2023-01-01,All Students,30,441,10,33.3,11,36.7,7,23.3,2,6.7,9,30.0,P.S. 015 ROBERTO CLEMENTE,Manhattan
3,01M015,3,2022-01-01,All Students,21,594,4,19.0,12,57.1,4,19.0,1,4.8,5,23.8,P.S. 015 ROBERTO CLEMENTE,Manhattan
4,01M015,4,2022-01-01,All Students,30,596,6,20.0,14,46.7,5,16.7,5,16.7,10,33.3,P.S. 015 ROBERTO CLEMENTE,Manhattan


In [19]:
math_df.head()

Unnamed: 0,school_code,grade,year,student_type,math_number_tested,math_avg_of_total_students_tested,math_lvl_one_count,math_lvl_one_pct,math_lvl_two_count,math_lvl_two_pct,math_lvl_three_count,math_lvl_three_pct,math_lvl_four_count,math_lvl_four_pct,math_lvl_three_and_four_count,math_lvl_three_and_four_pct,school_name,borough
0,01M015,3,2023-01-01,All Students,27,447,6,22.2,9,33.3,7,25.9,5,18.5,12,44.4,P.S. 015 ROBERTO CLEMENTE,Manhattan
1,01M015,4,2023-01-01,All Students,23,445,7,30.4,3,13.0,12,52.2,1,4.3,13,56.5,P.S. 015 ROBERTO CLEMENTE,Manhattan
2,01M015,5,2023-01-01,All Students,30,432,14,46.7,11,36.7,5,16.7,0,0.0,5,16.7,P.S. 015 ROBERTO CLEMENTE,Manhattan
3,01M015,3,2022-01-01,All Students,21,583,12,57.1,4,19.0,5,23.8,0,0.0,5,23.8,P.S. 015 ROBERTO CLEMENTE,Manhattan
4,01M015,4,2022-01-01,All Students,30,591,10,33.3,13,43.3,5,16.7,2,6.7,7,23.3,P.S. 015 ROBERTO CLEMENTE,Manhattan


In [20]:
full_df = ela_df.merge(
    math_df,
    on=["school_code", "grade", "year", "student_type", "school_name", "borough"],
)

In [21]:
ela_df.describe()

Unnamed: 0,grade,year,ela_number_tested,ela_avg_of_total_students_tested,ela_lvl_one_count,ela_lvl_one_pct,ela_lvl_two_count,ela_lvl_two_pct,ela_lvl_three_count,ela_lvl_three_pct,ela_lvl_four_count,ela_lvl_four_pct,ela_lvl_three_and_four_count,ela_lvl_three_and_four_pct
count,321389.0,321389,321389.0,321389.0,321389.0,321389.0,321389.0,321389.0,321389.0,321389.0,321389.0,321389.0,321389.0,321389.0
mean,5.183118,2017-08-01 21:15:01.834225664,55.633833,420.66123,14.356711,29.226244,18.166701,32.716335,14.329093,24.607326,8.781327,13.452951,23.110421,38.059645
min,3.0,2013-01-01 00:00:00,6.0,193.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,4.0,2015-01-01 00:00:00,19.0,297.0,3.0,11.4,5.0,23.5,3.0,13.2,0.0,0.0,5.0,17.2
50%,5.0,2017-01-01 00:00:00,39.0,329.0,9.0,25.0,12.0,33.3,9.0,23.6,3.0,7.7,12.0,34.8
75%,7.0,2019-01-01 00:00:00,70.0,593.0,19.0,42.9,23.0,41.9,18.0,34.1,9.0,19.6,29.0,56.1
max,8.0,2023-01-01 00:00:00,804.0,650.0,365.0,100.0,359.0,100.0,250.0,100.0,421.0,100.0,494.0,100.0
std,1.675131,,59.448583,139.76929,17.633543,22.122043,21.345369,14.025319,18.290194,15.400899,18.286441,16.316747,33.431862,25.283138


In [22]:
# Match the coordinates
temp = pd.read_csv("./data/school_info.csv")
temp.head()

Unnamed: 0,fiscal_year,system_code,location_code,location_name,BEDS,Managed_by_name,location_type_description,Location_Category_Description,Grades_text,Grades_final_text,...,Administrative_District_Name,community_school_sup_name,Tier_3_Support_Location_Name,Tier_3_Support_Leader_Name,Tier_2_Support_Location_Name,HighSchool_Network_Location_Code,HighSchool_Network_Name,HighSchool_Network_Superintendent,Community_district 1,Police_precinct
0,2020,15K001,K001,P.S. 001 The Bergen,331500010001,DOE,General Academic,Elementary,"PK,0K,01,02,03,04,05,SE","PK,0K,01,02,03,04,05",...,COMMUNITY SCHOOL DISTRICT 15,"SKOP, ANITA",NYCDOE Borough Office - Brooklyn North,,School Support Team 5- Brooklyn North,,,,307.0,72.0
1,2020,17K002,K002,Parkside Preparatory Academy,331700010002,DOE,General Academic,Junior High-Intermediate-Middle,"06,07,08,SE",060708,...,COMMUNITY SCHOOL DISTRICT 17,"ELLIS, CLARENCE",NYCDOE Borough Office - Brooklyn South,Mauriciere de Govia,School Support Team 2- Brooklyn South,,,,309.0,71.0
2,2020,13K003,K003,P.S. 003 The Bedford Village,331300010003,DOE,General Academic,Elementary,"PK,0K,01,02,03,04,05,SE","PK,0K,01,02,03,04,05",...,COMMUNITY SCHOOL DISTRICT 13,"SAMUELS, KAMAR",NYCDOE Borough Office - Brooklyn North,,School Support Team 3- Brooklyn North,,,,303.0,79.0
3,2020,75K004,K004,P.S. K004,307500013004,DOE,Special Education,Elementary,"PK,0K,01,02,03,04,05,SE","PK,0K,01,02,03,04,05,06,07,SE",...,CITYWIDE SPECIAL EDUCATION,"LOUISSAINT, KETLER",D75 CITYWIDE BCO,Tillman Roberto,Children First Network 752,,,,305.0,75.0
4,2020,16K005,K005,P.S. 005 Dr. Ronald McNair,331600010005,DOE,General Academic,Elementary,"PK,0K,01,02,03,04,05,SE","PK,0K,01,02,03,04,05",...,COMMUNITY SCHOOL DISTRICT 16,"MARTIN, YOLANDA",NYCDOE Borough Office - Brooklyn North,,School Support Team 4- Brooklyn North,,,,303.0,81.0


In [23]:
# Only interested in longitude and latitude
temp.columns = temp.columns.str.lower()
temp.columns

Index(['fiscal_year', 'system_code', 'location_code', 'location_name', 'beds',
       'managed_by_name', 'location_type_description',
       'location_category_description', 'grades_text', 'grades_final_text',
       'open_date', 'status_descriptions', 'primary_building_code',
       'primary_address_line_1', 'state_code', 'x_coordinate', 'y_coordinate',
       'longitude', 'latitude', 'community_district', 'council-district',
       'census_tract', 'borough_block_lot', 'nta', 'nta_name',
       'principal_name', 'principal_title', 'principal_phone_number',
       'fax_number', 'geographical_district_code',
       'administrative_district_code', 'administrative_district_name',
       'community_school_sup_name', 'tier_3_support_location_name',
       'tier_3_support_leader_name', 'tier_2_support_location_name',
       'highschool_network_location_code', 'highschool_network_name',
       'highschool_network_superintendent', 'community_district 1',
       'police_precinct'],
      dtype=

In [24]:
# Merge the data by school code
df_merged = full_df.merge(temp[["system_code", "longitude", "latitude"]],
         how="left",
         left_on="school_code",
         right_on="system_code")
df_merged.drop(columns="system_code", inplace=True)
df_merged.head()

Unnamed: 0,school_code,grade,year,student_type,ela_number_tested,ela_avg_of_total_students_tested,ela_lvl_one_count,ela_lvl_one_pct,ela_lvl_two_count,ela_lvl_two_pct,...,math_lvl_two_count,math_lvl_two_pct,math_lvl_three_count,math_lvl_three_pct,math_lvl_four_count,math_lvl_four_pct,math_lvl_three_and_four_count,math_lvl_three_and_four_pct,longitude,latitude
0,01M015,3,2023-01-01,All Students,24,455,4,16.7,5,20.8,...,9,33.3,7,25.9,5,18.5,12,44.4,-73.978747,40.722075
1,01M015,4,2023-01-01,All Students,17,454,1,5.9,6,35.3,...,3,13.0,12,52.2,1,4.3,13,56.5,-73.978747,40.722075
2,01M015,5,2023-01-01,All Students,30,441,10,33.3,11,36.7,...,11,36.7,5,16.7,0,0.0,5,16.7,-73.978747,40.722075
3,01M015,3,2022-01-01,All Students,21,594,4,19.0,12,57.1,...,4,19.0,5,23.8,0,0.0,5,23.8,-73.978747,40.722075
4,01M015,4,2022-01-01,All Students,30,596,6,20.0,14,46.7,...,13,43.3,5,16.7,2,6.7,7,23.3,-73.978747,40.722075


In [25]:
# Check for NULL values
df_merged[pd.isnull(df_merged["longitude"])].school_code.unique()

array(['05M371', '19K935', '20K936', '24Q419', '31R085'], dtype=object)

In [26]:
# Locate these schools manually
school_code_match = [
    {"school_code": "05M371", "longitude": -73.952170, "latitude": 40.814970},
    {"school_code": "19K935", "longitude": -73.876400, "latitude": 40.681190},
    {"school_code": "20K936", "longitude": -74.020290, "latitude": 40.642790},
    {"school_code": "24Q419", "longitude": -73.858920, "latitude": 40.759470},
    {"school_code": "31R085", "longitude": -74.0850979, "latitude": 40.6183021},
]
# Match
for school_code in school_code_match:
    condition = df_merged["school_code"] == school_code["school_code"]
    df_merged['longitude'] = df_merged['longitude'].fillna(condition.map({True:school_code["longitude"]}))
    df_merged['latitude'] = df_merged['latitude'].fillna(condition.map({True:school_code["latitude"]}))

In [27]:
# Check for NULL values again
df_merged[pd.isnull(df_merged["longitude"])].school_code.unique()

array([], dtype=object)

In [28]:
# Check for NULLs
df_merged.isnull().sum()

school_code                          0
grade                                0
year                                 0
student_type                         0
ela_number_tested                    0
ela_avg_of_total_students_tested     0
ela_lvl_one_count                    0
ela_lvl_one_pct                      0
ela_lvl_two_count                    0
ela_lvl_two_pct                      0
ela_lvl_three_count                  0
ela_lvl_three_pct                    0
ela_lvl_four_count                   0
ela_lvl_four_pct                     0
ela_lvl_three_and_four_count         0
ela_lvl_three_and_four_pct           0
school_name                          0
borough                              0
math_number_tested                   0
math_avg_of_total_students_tested    0
math_lvl_one_count                   0
math_lvl_one_pct                     0
math_lvl_two_count                   0
math_lvl_two_pct                     0
math_lvl_three_count                 0
math_lvl_three_pct       

In [29]:
# There are duplicates
df_merged.duplicated().sum()

9101

In [30]:
# Drop duplicates
df_merged.drop_duplicates(inplace=True)

In [31]:
# Double check for no duplicates
df_merged.duplicated().sum()

0

In [32]:
df_merged["school_name"].unique()

array(['P.S. 015 ROBERTO CLEMENTE', 'P.S. 020 ANNA SILVER',
       'P.S. 034 FRANKLIN D. ROOSEVELT', ...,
       'P.S. /I.S. 384 FRANCES E. CARTER',
       'ALL CITY LEADERSHIP SECONDARY SCHOOL',
       'EVERGREEN MIDDLE SCHOOL FOR URBAN EXPLORATION'], dtype=object)

In [33]:
# Export the data
df_merged.to_excel("data/nyc_schools_test_result.xlsx", index=False)