In [1]:
import requests
import json
import pandas as pd

In [2]:
response = requests.get('https://www.forbes.com/forbesapi/org/top-colleges/2021/position/true.json?limit=1000&fields=organizationName,academics,state,financialAid,rank,medianBaseSalary,campusSetting,studentPopulation,organization,description')

data = response.text

responseObject = json.loads(data)

responseData = responseObject['organizationList']['organizationsLists']

data_main = pd.DataFrame(responseData)

#### Adding 'organization' to main table

In [3]:
# add organization
organization = [ x['organization'] for x in responseData]
data_main = pd.concat([data_main, pd.DataFrame(organization)], axis=1)

#### Add single 'Primary_Key' column to main table

In [4]:
# add naturalId column
data_main['Primary_Key'] = [ x['organization']['naturalId'].split('/')[-1] for x in responseData]

#### Adding 'organization/geoLocation' to main table

In [5]:

# add geoLocation columns
rows = []
for organizationsLists in responseData:
    row = {}    
    # row['Primary_Key'] = organizationsLists['organization']['naturalId'].split('/')[-1]
    try:
        row['latutide'] = organizationsLists['organization']['geoLocation']['latitude']
    except:
        pass
    try:
        row['longitude']= organizationsLists['organization']['geoLocation']['longitude']
    except:
        pass
    rows.append(row)
data_main = pd.concat([data_main, pd.DataFrame(rows)], axis=1)

#### Adding 'Academics' to main table

In [6]:
academics = [ # items in the 'academics' key to be unpacked
    'attendanceStatus',
    'firstToSecondYearRetention',
    'overallGraduationRates',
    'enrollmentByGender',
    'graduationRateByGender',
    'enrollmentByRace',
    'graduationRateByRace']

def restructure(list_of_dictionaries,name): # change the shape of the json
    row = {}
    for item in list_of_dictionaries:
        key_value = [value for key,value in item.items()]
        key_name = name + '_' + key_value[0]
        row[key_name] = key_value[1]
    return row

def dataFrame_of_Item(item): # build a data frame from all json's 
    list_of_rows = []
    for organization in responseData:
        # create reshaped json
        graduationRateByRace = restructure(organization['academics'][item],item)
        # add primary_key number
        # graduationRateByRace['PrimaryKey'] = organization['organization']['naturalId'].split('/')[-1]
        # append to list
        list_of_rows.append(graduationRateByRace)
    return pd.DataFrame(list_of_rows)

# add date frames to main data
df_ls = []

for item in academics:
    df_ls.append(dataFrame_of_Item(item))

academics_unpacked = pd.concat(df_ls, axis=1)

data_main = pd.concat([data_main,academics_unpacked], axis=1)



#### Adding 'organization/socialNetworks' to main table

In [7]:
def restructure(list_of_dictionaries): # input: list of dictionaries | output: structured dictionary
    row = {}
    for item in list_of_dictionaries:
        key_value = [value for key,value in item.items()]
        key_name =key_value[0]
        row[key_name] = key_value[1]
    return row

def dataFrame_of_Item(col_name,item,TF): # input: name of dict to unpack | output: data frame
    list_of_rows = []
    for organization in responseData:
        # create json
        dictionary = {}
        try:
            # create reshaped json
            dictionary = dictionary | restructure(organization[col_name][item])
        except:
            pass
        # add primary_key number
        if TF:
            dictionary['Primary_Key'] = organization['organization']['naturalId'].split('/')[-1]
        # append to list
        list_of_rows.append(dictionary)
    return pd.DataFrame(list_of_rows)

df_socialNetworks = dataFrame_of_Item('organization','socialNetworks',True)

data_main = data_main.merge(df_socialNetworks, on='Primary_Key')

#### Adding 'financialAid' to main table

In [8]:
financialAid = [
    'grantAidByType',
    'avgGrantAidByType',
    'loansByType',
    'avgLoansByType']

def restructure(list_of_dictionaries): # input: list of dictionaries | output: structured dictionary
    row = {}
    for item in list_of_dictionaries:
        key_value = [value for key,value in item.items()]
        key_name =key_value[0]
        row[key_name] = key_value[1]
    return row

def dataFrame_of_Item(col_name,item,TF): # input: name of dict to unpack | output: data frame
    list_of_rows = []
    for organization in responseData:
        # create json
        dictionary = {}
        try:
            # create reshaped json
            dictionary = dictionary | restructure(organization[col_name][item])
        except:
            pass
        # add primary_key number
        if TF:
            dictionary['Primary_Key'] = organization['organization']['naturalId'].split('/')[-1]
        # append to list
        list_of_rows.append(dictionary)
    return pd.DataFrame(list_of_rows)

# add date frames to main data
df_ls = []

for item in financialAid:
    df_ls.append(dataFrame_of_Item('financialAid',item,True))

financialAid_unpacked = pd.concat(df_ls, axis=1)

financialAid_unpacked = financialAid_unpacked.loc[:, ~financialAid_unpacked.columns.duplicated()]

data_main = data_main.merge(financialAid_unpacked)

data_main = data_main.loc[:, ~data_main.columns.duplicated()]

# ------------------ TEST AREA ------------------

# WHAT TO DO:
* ~~unpack 'organization'~~
    * ~~unpack 'geoLocation'~~
    * ~~unpack 'socialNetworks'~~
* ~~unpack 'academics'~~
    * ~~unpack 'attendanceStatus'~~
    * ~~unpack 'firstToSecondYearRetention'~~
    * ~~unpack 'overallGraduationRates'~~
    * ~~unpack 'enrollmentByGender'~~
    * ~~unpack 'graduationRateByGender'~~
    * ~~unpack 'enrollmentByRace'~~
    * ~~unpack 'graduationRateByRace'~~
* ~~unpack 'financialAid'~~
    * ~~grantAidByType~~
    * ~~avgGrantAidByType~~
    * ~~loansByType~~
    * ~~avgLoansByType~~
* ~~drop unnecessary columns~~

In [9]:
data_main.shape


(600, 91)

In [11]:
col_to_drop = [
       'organization',
       'academics',
       'financialAid',
       'listImages',
       'visible',
       'relatedVisible',
       'imageExists',
       'recentContentCount',
       'country',
       'collegeMedia',
       'landscapeImage',
       'industries',
       'embargo',
       'image',
       'industry',
       'ceoName',
       'ceoTitle',
       'premiumProfile',
       'employees',
       'portraitImage',
       'naturalId',
       'geoLocation',
       'uri',
       'uris',
       'socialNetworks',
       'placeUri'
       ]

data_main.drop(col_to_drop, axis=1, inplace=True)

data_main.to_csv('University_Data.csv')

In [15]:
pd.options.display.max_columns = None
data_main

Unnamed: 0,description,rank,organizationName,state,studentPopulation,campusSetting,medianBaseSalary,name,webSite,phoneNumber,shortUri,squareImage,city,region,yearFounded,stateCode,Primary_Key,latutide,longitude,attendanceStatus_partTime,attendanceStatus_fullTime,firstToSecondYearRetention_fullTime,firstToSecondYearRetention_partTime,overallGraduationRates_4,overallGraduationRates_6,enrollmentByGender_enrollmentMale,enrollmentByGender_enrollmentFemale,enrollmentByGender_AgeUnder18,enrollmentByGender_Age18to24,enrollmentByGender_Age25to64,enrollmentByGender_Age65andOver,graduationRateByGender_graduationMale,graduationRateByGender_graduationFemale,enrollmentByRace_americanIndian,enrollmentByRace_asian,enrollmentByRace_hawaiianPacific,enrollmentByRace_africanAmerican,enrollmentByRace_hispanic,enrollmentByRace_white,enrollmentByRace_twoRaces,enrollmentByRace_unknown,enrollmentByRace_alien,graduationRateByRace_americanIndian,graduationRateByRace_asian,graduationRateByRace_hawaiianPacific,graduationRateByRace_africanAmerican,graduationRateByRace_hispanic,graduationRateByRace_white,graduationRateByRace_twoRaces,graduationRateByRace_unknown,graduationRateByRace_alien,Twitter,Facebook,LinkedIn,Instagram,YouTube,federalGrant,pellGrant,otherFederalGrant,stateLocalGrant,institutionalGrant,anyGrant,anyLoan,federalLoan,nonFederalLoan
0,One of the top public universities in the coun...,1,"University of California, Berkeley",CA,43185,Urban,138800.0,"University of California, Berkeley",http://www.berkeley.edu,(510) 642-6000,http://onforb.es/MvI8Zf,//specials-images.forbesimg.com/imageserve/5d5...,Berkeley,West,1868.0,CA,64,37.869236,-122.258393,7.0,93.0,97.0,72.0,76.0,93.0,48.0,52.0,782.0,28810.0,1752.0,4.0,91.0,94.0,0.2,30.2,0.1,2.1,14.1,26.7,5.4,4.0,17.2,100.0,97.0,75.0,76.0,84.0,92.0,92.0,96.0,92.0,https://twitter.com/UCBerkeley,https://www.facebook.com/UCBerkeley,https://www.linkedin.com/edu/university-of-cal...,http://instagram.com/ucberkeleyofficial/,http://www.berkeley.edu/tour/,23.0,27.0,13.0,31.0,51.0,19126.0,23.0,23.0,12.0
1,"The second oldest Ivy League institution, Yale...",2,Yale University,CT,13609,Urban,141300.0,Yale University,http://www.yale.edu,203-432-4771,http://onforb.es/NirARu,https://specials-images.forbesimg.com/imageser...,New Haven,Northeast,1701.0,CT,10,41.314042,-72.923425,1.0,99.0,99.0,0.0,88.0,97.0,48.0,52.0,114.0,5947.0,30.0,1.0,97.0,98.0,0.3,15.1,0.1,6.0,10.4,41.2,5.1,1.1,20.7,100.0,97.0,0.0,97.0,97.0,98.0,96.0,100.0,95.0,https://twitter.com/yale,https://www.facebook.com/YaleUniversity,https://www.linkedin.com/edu/school?id=18043&t...,http://instagram.com/yale,http://admissions.yale.edu/virtual-tour,20.0,18.0,9.0,,56.0,55827.0,6.0,6.0,
2,Princeton is a leading private research univer...,3,Princeton University,NJ,8419,Suburban,150500.0,Princeton University,http://www.princeton.edu,609-258-3000,http://onforb.es/NirwkP,http://specials-images.forbesimg.com/imageserv...,Princeton,Northeast,1746.0,NJ,2,40.349855,-74.659119,1.0,99.0,98.0,0.0,90.0,98.0,54.0,46.0,145.0,5239.0,38.0,0.0,97.0,99.0,0.1,17.1,0.1,5.9,9.0,37.1,4.3,3.3,23.1,100.0,99.0,100.0,97.0,97.0,98.0,92.0,96.0,98.0,https://twitter.com/princeton,https://www.facebook.com/PrincetonU,http://www.linkedin.com/edu/princeton-universi...,http://instagram.com/Princeton_University,http://www.princeton.edu/~oktour/virtualtour/,21.0,19.0,4.0,2.0,62.0,52188.0,10.0,4.0,8.0
3,"Located just outside of Palo Alto, CA, Stanfor...",4,Stanford University,CA,17249,Suburban,147100.0,Stanford University,http://www.stanford.edu,650-723-2091,http://onforb.es/NiryZI,//specials-images.forbesimg.com/imageserve/5d5...,Stanford,West,1891.0,CA,6,37.431370,-122.168924,6.0,94.0,99.0,0.0,73.0,94.0,55.0,45.0,13.0,6867.0,116.0,0.0,94.0,95.0,0.4,18.1,0.2,4.2,11.2,32.4,6.1,2.1,25.3,100.0,93.0,100.0,94.0,93.0,96.0,93.0,83.0,95.0,https://twitter.com/stanford,https://www.facebook.com/stanford,http://www.linkedin.com/edu/stanford-universit...,http://instagram.com/stanford,,18.0,17.0,8.0,5.0,56.0,54808.0,7.0,7.0,1.0
4,Columbia University in the City of New York is...,5,Columbia University,NY,31456,Urban,132100.0,Columbia University,http://www.columbia.edu,212-854-1754,http://onforb.es/NiryZG,https://specials-images.forbesimg.com/imageser...,New York,Northeast,1754.0,NY,13,40.806515,-73.961288,16.0,84.0,99.0,95.0,86.0,95.0,47.0,53.0,90.0,6964.0,1162.0,5.0,95.0,96.0,0.2,12.3,0.1,4.7,7.8,30.1,2.7,4.6,37.5,67.0,98.0,0.0,98.0,94.0,95.0,92.0,100.0,97.0,https://twitter.com/columbia,https://www.facebook.com/columbia,http://www.linkedin.com/company/columbia-unive...,,,18.0,25.0,10.0,6.0,52.0,55071.0,10.0,8.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,"Affiliated with the Presbyterian Church, Whitw...",596,Whitworth University,WA,2814,Suburban,89100.0,Whitworth University,http://www.whitworth.edu/,509-777-1000,http://onforb.es/MvI5wD,//specials-images.forbesimg.com/imageserve/598...,Spokane,West,1890.0,WA,208,47.750994,-117.415201,11.0,89.0,84.0,0.0,63.0,75.0,40.0,60.0,31.0,2303.0,83.0,0.0,73.0,77.0,0.7,4.4,1.1,2.1,11.3,66.6,9.1,0.8,3.9,60.0,70.0,40.0,90.0,64.0,78.0,68.0,100.0,60.0,http://twitter.com/whitworth,https://www.facebook.com/whitworthuniversity,https://www.linkedin.com/edu/school?id=19665&t...,http://instagram.com/whitworthuniversity,https://www.whitworth.edu/GeneralInformation/V...,38.0,30.0,33.0,27.0,97.0,32340.0,59.0,59.0,6.0
596,"Augustana University in Sioux Falls, SD, one o...",597,Augustana University,SD,2113,Urban,91900.0,Augustana University,http://www.augie.edu/,(605) 274-0770,http://onforb.es/NirTf5,//specials-images.forbesimg.com/imageserve/56e...,Sioux Falls,Midwest,1860.0,SD,307,43.527114,-96.736267,16.0,84.0,82.0,0.0,69.0,71.0,34.0,66.0,22.0,1730.0,64.0,2.0,67.0,74.0,0.8,1.6,0.0,2.0,3.9,83.9,1.5,0.7,5.6,0.0,0.0,0.0,50.0,67.0,74.0,44.0,0.0,59.0,https://twitter.com/augustanasd,https://www.facebook.com/augustanasd,http://www.linkedin.com/company/227550,http://instagram.com/augustanasd,www.augie.edu/admission/visiting-campus,20.0,19.0,20.0,29.0,100.0,22281.0,66.0,64.0,22.0
597,"A small, Lutheran liberal arts college in the ...",598,Luther College,IA,1951,Rural,83800.0,Luther College,http://www.luther.edu/,(563) 387-2000,http://onforb.es/NiqjKf,https://specials-images.forbesimg.com/imageser...,Decorah,Midwest,1861.0,IA,121,43.313226,-91.799646,1.0,99.0,84.0,0.0,67.0,75.0,43.0,57.0,18.0,1915.0,18.0,0.0,70.0,79.0,0.3,2.1,0.4,2.3,5.6,77.6,2.2,0.4,9.1,100.0,73.0,0.0,50.0,54.0,76.0,83.0,0.0,79.0,https://twitter.com/luthercollege,https://www.facebook.com/luthercollege?fref=ts,https://www.linkedin.com/edu/luther-college-20110,http://instagram.com/luthercollege,http://www.luther.edu/campus/virtualtour/,20.0,18.0,20.0,11.0,100.0,31966.0,66.0,66.0,45.0
598,Coe College is a liberal arts school in Cedar ...,599,Coe College,IA,1430,Urban,88300.0,Coe College,http://www.coe.edu/,319.399.8500,http://onforb.es/NirwBf,https://specials-images.forbesimg.com/imageser...,Cedar Rapids,Midwest,1851.0,IA,224,41.985318,-91.657250,3.0,97.0,80.0,0.0,64.0,66.0,45.0,55.0,6.0,1392.0,20.0,2.0,62.0,68.0,0.4,4.6,0.1,7.1,11.3,65.7,2.5,6.5,1.8,0.0,60.0,0.0,56.0,69.0,67.0,80.0,29.0,60.0,https://twitter.com/coecollege,https://www.facebook.com/CoeCollege,,,,42.0,35.0,18.0,25.0,100.0,35805.0,77.0,77.0,21.0
