In [1]:
# Import libraries
import requests, json
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn')
import difflib

In [2]:
def import_json(url):
    r = requests.get(url)
    universityJson = json.loads(r.text)
    top200 = pd.DataFrame(universityJson['data'])[:200] #import the top200 universities
    return top200

def import_details_qs(top200): #requires special handling as international stud/fac data not available
    Intl_Data = pd.DataFrame()
    for index, row in top200.iterrows():
        nid = row['nid']
        Intl_link = 'https://www.topuniversities.com/node/'+str(nid)
        rIntl = requests.get(Intl_link)
        
        if rIntl.ok:
            page_body = rIntl.text
            soup = BeautifulSoup(page_body, 'html.parser')
            #Total n - faculty
            try :
                t_faculty = soup.find_all('div', class_='total faculty')[0].find('div', class_='number').text
                tot_fac_value = int(t_faculty[1:-1].replace(',',''))
            except:
                tot_fac_value = 0
            #Total n - international faculty
            try:
                t_interfaculty = soup.find_all('div',class_='inter faculty')[0].find('div',class_='number').text
                tot_intfac_value = int(t_interfaculty[1:-1].replace(',',''))
            except:
                tot_intfac_value = 0
            #Total n - students
            try:
                t_stud = soup.find_all('div', class_='total student')[0].find('div', class_='number').text
                tot_stud_value = int(t_stud[1:-1].replace(',',''))
            except:
                tot_stud_value = 0
            #Total n - international students
            try:
                t_intstud = soup.find_all('div', class_='total inter')[0].find('div', class_='number').text
                tot_intstud_value = int(t_intstud[1:-1].replace(',',''))
            except:
                tot_intstud_value = 0
            IntlDF = pd.DataFrame({'nid' : [nid],'total_faculty_members' : [tot_fac_value], 'international_faculty_members' : [tot_intfac_value], 'total_students' : [tot_stud_value], 'international_students' : [tot_intstud_value]})
            Intl_Data = Intl_Data.append(IntlDF, ignore_index=True)
    return Intl_Data

def import_details_times(top200):
    # Get the supplementary data from the university page
    locationData = pd.DataFrame()
    for index, row in top200.iterrows():
        nid = row['nid']
        #comparisonLink = 'https://www.timeshighereducation.com/sites/default/files/university/comparison/'+str(nid)+'.json'
        #rankingsLink = 'https://www.timeshighereducation.com/sites/default/files/university/rankings/'+str(nid)+'.json'
        locationLink = 'https://www.timeshighereducation.com/sites/default/files/institution_markers/gmap_'+str(nid)+'.json'
        rLocation = requests.get(locationLink)
        if rLocation.ok:
            locationJson = json.loads(rLocation.text)
            locationDF = pd.DataFrame(locationJson)
            locationData = locationData.append(locationDF, ignore_index=True)
    # Add location data to top200 dataframe
       #locationData = locationData.rename(columns={'title':'name'})
    #top200 = pd.merge(top200, locationData, on='name')
    return locationData

## QS International Rankings

In [156]:
%%time
top200_qs = import_json('https://www.topuniversities.com/sites/default/files/qs-rankings-data/357051.txt?_=1508492427994')
data_qs = import_details_qs(top200_qs)

CPU times: user 57.3 s, sys: 716 ms, total: 58 s
Wall time: 1min 45s


In [157]:
top200_qs_full = pd.merge(top200_qs, data_qs, on='nid')
top200_qs_full.rename(columns={'title':'name'})
top200_qs_full.head()

Unnamed: 0,cc,core_id,country,guide,logo,nid,rank_display,region,score,stars,title,url,international_faculty_members,international_students,total_faculty_members,total_students
0,US,410,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294850,1,North America,100.0,6,Massachusetts Institute of Technology (MIT),/universities/massachusetts-institute-technolo...,1679,3717,2982,11067
1,US,573,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",297282,2,North America,98.7,5,Stanford University,/universities/stanford-university,2042,3611,4285,15878
2,US,253,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294270,3,North America,98.4,5,Harvard University,/universities/harvard-university,1311,5266,4350,22429
3,US,94,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294562,4,North America,97.7,5,California Institute of Technology (Caltech),/universities/california-institute-technology-...,350,647,953,2255
4,GB,95,United Kingdom,"<a href=""/where-to-study/europe/united-kingdom...","<img src=""https://www.topuniversities.com/site...",294561,5,Europe,95.6,5,University of Cambridge,/universities/university-cambridge,2278,6699,5490,18770


## Times Higher Education International Rankings

In [155]:
%%time
top200_times = import_json('https://www.timeshighereducation.com/sites/default/files/the_data_rankings/world_university_rankings_2018_limit0_369a9045a203e176392b9fb8f8c1cb2a.json')
locdata_times = import_details_times(top200_times)

CPU times: user 5.47 s, sys: 231 ms, total: 5.7 s
Wall time: 23.5 s


In [159]:
locdata_times = locdata_times.rename(columns={'title':'TimesName'})
top200_times = top200_times.rename(columns={'name':'TimesName'})
top200_times_full = pd.merge(top200_times, locdata_times, on='TimesName')

In [160]:
top200_times_full.head()

Unnamed: 0,aliases,location,member_level,TimesName,nid,rank,rank_order,record_type,scores_citations,scores_citations_rank,scores_industry_income,scores_industry_income_rank,scores_international_outlook,scores_international_outlook_rank,scores_overall,scores_overall_rank,scores_research,scores_research_rank,scores_teaching,scores_teaching_rank,stats_female_male_ratio,stats_number_students,stats_pc_intl_students,stats_student_staff_ratio,subjects_offered,url,country,lat,lng,locality,path,postal_code,thoroughfare
0,University of Oxford,United Kingdom,0,University of Oxford,468,1,10,master_account,99.1,15,63.7,169,95.0,24,94.3,10,99.5,1,86.7,5,46 : 54,20409,38%,11.2,"Archaeology,Art, Performing Arts & Design,Biol...",/world-university-rankings/university-oxford,United Kingdom,51.7577018738,-1.2639590502,Oxford,node/468,OX1 2JD,University Offices
1,University of Cambridge,United Kingdom,0,University of Cambridge,470,2,20,master_account,97.5,29,51.5,260,93.0,35,93.2,20,97.8,3,87.8,3,45 : 55,18389,35%,10.9,"Archaeology,Architecture,Art, Performing Arts ...",/world-university-rankings/university-cambridge,United Kingdom,52.2049598694,0.1160930023,Cambridge,node/470,CB2 1TN,The Old Schools
2,California Institute of Technology caltech,United States,0,California Institute of Technology,128779,=3,30,private,99.5,10,92.6,51,59.7,322,93.0,30,97.5,4,90.3,1,31 : 69,2209,27%,6.5,"Architecture,Biological Sciences,Business & Ma...",/world-university-rankings/california-institut...,United States,34.1359519959,-118.1256332397,Pasadena,node/128779,91125,1200 East California Boulevard
3,Stanford University,United States,11,Stanford University,467,=3,40,private,99.9,4,60.5,189,77.6,162,93.0,40,96.7,5,89.1,2,42 : 58,15845,22%,7.5,"Archaeology,Architecture,Art, Performing Arts ...",/world-university-rankings/stanford-university,United States,37.4276580811,-122.1700668335,Stanford,node/467,94305–2004,450 Serra Mall
4,Massachusetts Institute of Technology,United States,0,Massachusetts Institute of Technology,471,5,50,private,100.0,1,88.4,63,87.6,81,92.5,50,91.9,9,87.3,4,37 : 63,11177,34%,8.7,"Architecture,Art, Performing Arts & Design,Bio...",/world-university-rankings/massachusetts-insti...,United States,42.360091,-71.09416,Cambridge,node/471,02139-4307,77 Massachusetts Avenue


## Merging both datasets by name

In [162]:
top200_qs_full = top200_qs_full.rename(columns={'title':'QSname'})
top200_qs_full.head()

Unnamed: 0,cc,core_id,country,guide,logo,nid,rank_display,region,score,stars,QSname,url,international_faculty_members,international_students,total_faculty_members,total_students
0,US,410,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294850,1,North America,100.0,6,Massachusetts Institute of Technology (MIT),/universities/massachusetts-institute-technolo...,1679,3717,2982,11067
1,US,573,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",297282,2,North America,98.7,5,Stanford University,/universities/stanford-university,2042,3611,4285,15878
2,US,253,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294270,3,North America,98.4,5,Harvard University,/universities/harvard-university,1311,5266,4350,22429
3,US,94,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294562,4,North America,97.7,5,California Institute of Technology (Caltech),/universities/california-institute-technology-...,350,647,953,2255
4,GB,95,United Kingdom,"<a href=""/where-to-study/europe/united-kingdom...","<img src=""https://www.topuniversities.com/site...",294561,5,Europe,95.6,5,University of Cambridge,/universities/university-cambridge,2278,6699,5490,18770


In [168]:
top200_qs_full.QSname.values

array(['Massachusetts Institute of Technology (MIT)',
       'Stanford University', 'Harvard University',
       'California Institute of Technology (Caltech)',
       'University of Cambridge', 'University of Oxford',
       'UCL (University College London)', 'Imperial College London',
       'University of Chicago',
       'ETH Zurich - Swiss Federal Institute of Technology',
       'Nanyang Technological University, Singapore (NTU)',
       'Ecole Polytechnique Fédérale de Lausanne (EPFL)',
       'Princeton University', 'Cornell University',
       'National University of Singapore (NUS)', 'Yale University',
       'Johns Hopkins University', 'Columbia University',
       'University of Pennsylvania', 'The Australian National University',
       'University of Michigan', 'Duke University',
       'The University of Edinburgh', "King's College London",
       'Tsinghua University', 'The University of Hong Kong',
       'University of California, Berkeley (UCB)',
       'The Universi

In [163]:
top200_times_full.head()

Unnamed: 0,aliases,location,member_level,TimesName,nid,rank,rank_order,record_type,scores_citations,scores_citations_rank,scores_industry_income,scores_industry_income_rank,scores_international_outlook,scores_international_outlook_rank,scores_overall,scores_overall_rank,scores_research,scores_research_rank,scores_teaching,scores_teaching_rank,stats_female_male_ratio,stats_number_students,stats_pc_intl_students,stats_student_staff_ratio,subjects_offered,url,country,lat,lng,locality,path,postal_code,thoroughfare
0,University of Oxford,United Kingdom,0,University of Oxford,468,1,10,master_account,99.1,15,63.7,169,95.0,24,94.3,10,99.5,1,86.7,5,46 : 54,20409,38%,11.2,"Archaeology,Art, Performing Arts & Design,Biol...",/world-university-rankings/university-oxford,United Kingdom,51.7577018738,-1.2639590502,Oxford,node/468,OX1 2JD,University Offices
1,University of Cambridge,United Kingdom,0,University of Cambridge,470,2,20,master_account,97.5,29,51.5,260,93.0,35,93.2,20,97.8,3,87.8,3,45 : 55,18389,35%,10.9,"Archaeology,Architecture,Art, Performing Arts ...",/world-university-rankings/university-cambridge,United Kingdom,52.2049598694,0.1160930023,Cambridge,node/470,CB2 1TN,The Old Schools
2,California Institute of Technology caltech,United States,0,California Institute of Technology,128779,=3,30,private,99.5,10,92.6,51,59.7,322,93.0,30,97.5,4,90.3,1,31 : 69,2209,27%,6.5,"Architecture,Biological Sciences,Business & Ma...",/world-university-rankings/california-institut...,United States,34.1359519959,-118.1256332397,Pasadena,node/128779,91125,1200 East California Boulevard
3,Stanford University,United States,11,Stanford University,467,=3,40,private,99.9,4,60.5,189,77.6,162,93.0,40,96.7,5,89.1,2,42 : 58,15845,22%,7.5,"Archaeology,Architecture,Art, Performing Arts ...",/world-university-rankings/stanford-university,United States,37.4276580811,-122.1700668335,Stanford,node/467,94305–2004,450 Serra Mall
4,Massachusetts Institute of Technology,United States,0,Massachusetts Institute of Technology,471,5,50,private,100.0,1,88.4,63,87.6,81,92.5,50,91.9,9,87.3,4,37 : 63,11177,34%,8.7,"Architecture,Art, Performing Arts & Design,Bio...",/world-university-rankings/massachusetts-insti...,United States,42.360091,-71.09416,Cambridge,node/471,02139-4307,77 Massachusetts Avenue


In [164]:
import re
import unidecode
def makeStringSimple(s):
    s2 = s.lower()
    s2 = unidecode.unidecode(s2)
    s2 = re.sub("[\(\[].*?[\)\]]", "", s2)
    return s2

In [170]:
import difflib
matches = []
highestScores = []
for index,row in top200_times_full.iterrows():
    name = makeStringSimple(row['TimesName'])
    scores = []
    qs_nameList = list(top200_qs_full.QSname.values)
    for qsName in qs_nameList:
        scores.append(difflib.SequenceMatcher(None, name, makeStringSimple(qsName)).ratio())
        
    best = scores.index(max(scores))
    if max(scores) > 0.9:
        highestScores.append(max(scores))
        matches.append(qs_nameList[best])
    else:
        highestScores.append(0)
        matches.append(None)    

In [176]:
pd.set_option('display.max_rows', 500)
correspondence = pd.DataFrame({'TimesName':list(top200_times_full['TimesName']),'QSname':matches, 'matchingScore': highestScores})

In [177]:
tempMerge = pd.merge(top200_times_full,correspondence,on='TimesName' )
MERGEDDATAFRAMES = pd.merge(tempMerge,top200_qs_full, on='QSname')

In [178]:
MERGEDDATAFRAMES

Unnamed: 0,aliases,location,member_level,TimesName,nid_x,rank,rank_order,record_type,scores_citations,scores_citations_rank,scores_industry_income,scores_industry_income_rank,scores_international_outlook,scores_international_outlook_rank,scores_overall,scores_overall_rank,scores_research,scores_research_rank,scores_teaching,scores_teaching_rank,stats_female_male_ratio,stats_number_students,stats_pc_intl_students,stats_student_staff_ratio,subjects_offered,url_x,country_x,lat,lng,locality,path,postal_code,thoroughfare,QSname,matchingScore,cc,core_id,country_y,guide,logo,nid_y,rank_display,region,score,stars,url_y,international_faculty_members,international_students,total_faculty_members,total_students
0,University of Oxford,United Kingdom,0,University of Oxford,468,1,10,master_account,99.1,15,63.7,169,95.0,24,94.3,10,99.5,1,86.7,5,46 : 54,20409,38%,11.2,"Archaeology,Art, Performing Arts & Design,Biol...",/world-university-rankings/university-oxford,United Kingdom,51.7577018738,-1.2639590502,Oxford,node/468,OX1 2JD,University Offices,University of Oxford,1.0,GB,478,United Kingdom,"<a href=""/where-to-study/europe/united-kingdom...","<img src=""https://www.topuniversities.com/site...",294654,6,Europe,95.3,5.0,/universities/university-oxford,2964,7353,6750,19720
1,University of Cambridge,United Kingdom,0,University of Cambridge,470,2,20,master_account,97.5,29,51.5,260,93.0,35,93.2,20,97.8,3,87.8,3,45 : 55,18389,35%,10.9,"Archaeology,Architecture,Art, Performing Arts ...",/world-university-rankings/university-cambridge,United Kingdom,52.2049598694,0.1160930023,Cambridge,node/470,CB2 1TN,The Old Schools,University of Cambridge,1.0,GB,95,United Kingdom,"<a href=""/where-to-study/europe/united-kingdom...","<img src=""https://www.topuniversities.com/site...",294561,5,Europe,95.6,5.0,/universities/university-cambridge,2278,6699,5490,18770
2,California Institute of Technology caltech,United States,0,California Institute of Technology,128779,=3,30,private,99.5,10,92.6,51,59.7,322,93.0,30,97.5,4,90.3,1,31 : 69,2209,27%,6.5,"Architecture,Biological Sciences,Business & Ma...",/world-university-rankings/california-institut...,United States,34.1359519959,-118.1256332397,Pasadena,node/128779,91125,1200 East California Boulevard,California Institute of Technology (Caltech),0.985507,US,94,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294562,4,North America,97.7,5.0,/universities/california-institute-technology-...,350,647,953,2255
3,Stanford University,United States,11,Stanford University,467,=3,40,private,99.9,4,60.5,189,77.6,162,93.0,40,96.7,5,89.1,2,42 : 58,15845,22%,7.5,"Archaeology,Architecture,Art, Performing Arts ...",/world-university-rankings/stanford-university,United States,37.4276580811,-122.1700668335,Stanford,node/467,94305–2004,450 Serra Mall,Stanford University,1.0,US,573,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",297282,2,North America,98.7,5.0,/universities/stanford-university,2042,3611,4285,15878
4,Massachusetts Institute of Technology,United States,0,Massachusetts Institute of Technology,471,5,50,private,100.0,1,88.4,63,87.6,81,92.5,50,91.9,9,87.3,4,37 : 63,11177,34%,8.7,"Architecture,Art, Performing Arts & Design,Bio...",/world-university-rankings/massachusetts-insti...,United States,42.360091,-71.09416,Cambridge,node/471,02139-4307,77 Massachusetts Avenue,Massachusetts Institute of Technology (MIT),0.986667,US,410,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294850,1,North America,100.0,6.0,/universities/massachusetts-institute-technolo...,1679,3717,2982,11067
5,Harvard University,United States,0,Harvard University,466,6,60,private,99.7,8,46.4,330,79.7,143,91.8,60,98.4,2,84.2,9,,20326,26%,8.9,"Agriculture & Forestry,Archaeology,Art, Perfor...",/world-university-rankings/harvard-university,United States,42.3745993,-71.1204804,Cambridge,node/466,02138,Massachusetts Hall,Harvard University,1.0,US,253,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294270,3,North America,98.4,5.0,/universities/harvard-university,1311,5266,4350,22429
6,Princeton University,United States,0,Princeton University,469,7,70,private,99.6,9,58.0,204,78.7,152,91.1,70,93.9,6,85.7,7,45 : 55,7955,24%,8.3,"Architecture,Art, Performing Arts & Design,Bio...",/world-university-rankings/princeton-university,United States,40.3439888,-74.6514481,,node/469,08544,Princeton,Princeton University,1.0,US,508,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",297490,13,North America,91.0,5.0,/universities/princeton-university,246,1793,1007,8069
7,Imperial College London,United Kingdom,0,Imperial College London,472,8,80,master_account,96.7,40,71.6,120,96.6,13,89.2,80,88.7,12,81.7,13,37 : 63,15857,55%,11.4,"Biological Sciences,Chemical Engineering,Chemi...",/world-university-rankings/imperial-college-lo...,United Kingdom,51.4988,-0.174877,Kensington,node/472,SW7 2AZ,South Kensington Road,Imperial College London,1.0,GB,356,United Kingdom,"<a href=""/where-to-study/europe/united-kingdom...","<img src=""https://www.topuniversities.com/site...",294030,8,Europe,93.7,,/universities/imperial-college-london,2071,8746,3930,16090
8,University of Chicago,United States,0,University of Chicago,473,9,90,private,99.4,12,39.8,487,69.6,231,88.6,90,90.1,10,85.3,8,44 : 56,13525,25%,6.2,"Archaeology,Art, Performing Arts & Design,Biol...",/world-university-rankings/university-chicago,United States,41.7886079,-87.5987133,Chicago,node/473,60637,Edward H. Levi Hall,University of Chicago,1.0,US,120,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294536,9,North America,93.5,5.0,/universities/university-chicago,635,3379,2449,13557
9,ETH Zurich – Swiss Federal Institute of Techno...,Switzerland,0,ETH Zurich – Swiss Federal Institute of Techno...,479,=10,100,master_account,94.3,60,60.3,190,98.1,8,87.7,100,92.0,8,76.4,21,31 : 69,19233,38%,14.6,"Agriculture & Forestry,Architecture,Biological...",/world-university-rankings/eth-zurich-swiss-fe...,Switzerland,47.3772479,8.5528242,Zürich,node/479,8092,Rämistrasse 101,ETH Zurich - Swiss Federal Institute of Techno...,0.934579,CH,201,Switzerland,"<a href=""/where-to-study/europe/switzerland/gu...","<img src=""https://www.topuniversities.com/site...",294432,10,Europe,93.3,,/universities/eth-zurich-swiss-federal-institu...,1886,7563,2477,19815


Unnamed: 0,cc,core_id,country,guide,logo,nid,rank_display,region,score,stars,name,url,international_faculty_members,international_students,total_faculty_members,total_students
0,US,410,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294850,1,North America,100,6,Massachusetts Institute of Technology (MIT),/universities/massachusetts-institute-technolo...,1679,3717,2982,11067
1,US,573,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",297282,2,North America,98.7,5,Stanford University,/universities/stanford-university,2042,3611,4285,15878
2,US,253,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294270,3,North America,98.4,5,Harvard University,/universities/harvard-university,1311,5266,4350,22429
3,US,94,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294562,4,North America,97.7,5,California Institute of Technology (Caltech),/universities/california-institute-technology-...,350,647,953,2255
4,GB,95,United Kingdom,"<a href=""/where-to-study/europe/united-kingdom...","<img src=""https://www.topuniversities.com/site...",294561,5,Europe,95.6,5,University of Cambridge,/universities/university-cambridge,2278,6699,5490,18770
5,GB,478,United Kingdom,"<a href=""/where-to-study/europe/united-kingdom...","<img src=""https://www.topuniversities.com/site...",294654,6,Europe,95.3,5,University of Oxford,/universities/university-oxford,2964,7353,6750,19720
6,GB,365,United Kingdom,"<a href=""/where-to-study/europe/united-kingdom...","<img src=""https://www.topuniversities.com/site...",294014,7,Europe,94.6,,UCL (University College London),/universities/ucl-university-college-london,2554,14854,6345,31080
7,GB,356,United Kingdom,"<a href=""/where-to-study/europe/united-kingdom...","<img src=""https://www.topuniversities.com/site...",294030,8,Europe,93.7,,Imperial College London,/universities/imperial-college-london,2071,8746,3930,16090
8,US,120,United States,"<a href=""/where-to-study/north-america/united-...","<img src=""https://www.topuniversities.com/site...",294536,9,North America,93.5,5,University of Chicago,/universities/university-chicago,635,3379,2449,13557
9,CH,201,Switzerland,"<a href=""/where-to-study/europe/switzerland/gu...","<img src=""https://www.topuniversities.com/site...",294432,10,Europe,93.3,,ETH Zurich - Swiss Federal Institute of Techno...,/universities/eth-zurich-swiss-federal-institu...,1886,7563,2477,19815


In [None]:
pd.set_option('display.max_rows', 500)
correspondence = pd.DataFrame({'name':list(top200['name']),'title':matches, 'matchingScore': highestScores})

In [None]:
tempMerge = pd.merge(top200,correspondence,on='name' )
MERGEDDATAFRAMES = pd.merge(tempMerge,top200_qs_full, on='title')

In [None]:
MERGEDDATAFRAMES.head()

Extract name, rank, country and region, 
number of faculty members (international and total) and 
number of students (international and total).

In [143]:
df = pd.concat([top200_merged['name'],top200_merged['rank'],top200_merged['rank_display'],top200_merged['country_x'], 
           top200_merged['region'],
           top200_merged['international_faculty_members'], top200_merged['international_students'],
            top200_merged['total_faculty_members'], top200_merged['total_students'],               
            top200_merged['stats_number_students'], top200_merged['stats_pc_intl_students'], 
            top200_merged['stats_student_staff_ratio']], axis=1)


In [140]:
df['rank'] = df['rank'].apply(lambda x: x[1:] if x[0]=='=' in str(x) else str(x))
df['rank_display'] = df['rank_display'].apply(lambda x: x[1:] if x[0]=='=' in str(x) else str(x))



In [141]:
df.corr()

Unnamed: 0,international_faculty_members,international_students,total_faculty_members,total_students
international_faculty_members,1.0,0.518031,0.679967,0.262719
international_students,0.518031,1.0,0.405364,0.641197
total_faculty_members,0.679967,0.405364,1.0,0.56545
total_students,0.262719,0.641197,0.56545,1.0


When a university is strong in its international dimension, can you observe a consistency both for students and faculty members?

In [116]:
df['rank'] = df['rank'].apply(pd.to_numeric, errors='coerce')
df['rank_display'] = df['rank_display'].apply(pd.to_numeric, errors='coerce')

In [124]:
s = df['rank'] + df['rank_display']
#pd.concat([s,df['rank'], df['rank_display']], axis=1)
new_sorting = sorted(range(len(s)), key=lambda k: s[k])

Unnamed: 0,0,rank,rank_display
0,5,3,2
1,9,6,3
2,7,2,5
3,7,1,6
4,16,8,8
5,18,9,9
6,20,7,13
7,33,19,14
8,28,12,16
9,30,13,17


In our approach we decided to sum together the two rankings and sort them in descending order. 

TypeError: 'ey' is an invalid keyword argument for this function