In [2]:
UNIV_COLUMNS = ['대학코드', '국가', '대학명', '파견구분', '협정형태', 'NUM_VISITS', "BERT_SUM", "RNN_SUM", 'COLLEGE_LINK']
ABSTRACT_REVIEWS_COLUMNS = ['대학코드', '제목', '학과', '과정', '년도', 'BERT_SCORE', 'RNN_SCORE', 'REVIEW_LINK']

In [3]:
# import all modules
import os
from os import fdopen, remove
import glob
from tempfile import mkstemp
import shutil
from shutil import move, copymode

import pandas as pd
import numpy as np

## Wrangle dataset and make abstract review dataset

In [4]:
# define path for sentiment labeled dataset
# abstract: 짧은 후기 제목만 있는 텍스트 데이터셋
# specific: 문단으로 구성된 텍스트 데이터셋

abstract_sentiment = "./data_sentiment/abstract"
specific_sentiment = "./data_sentiment/specific"

In [5]:
# fetch all yonsei exchange review text datasets for each foreign universities
abstract_yonsei_reviews = glob.glob(f"{abstract_sentiment}/*.csv")
abstract_yonsei_reviews.sort()
abstract_yonsei_reviews[:5]

['./data_sentiment/abstract/730     _review_abstract_sentiment.csv',
 './data_sentiment/abstract/AR000004_review_abstract_sentiment.csv',
 './data_sentiment/abstract/AT000001_review_abstract_sentiment.csv',
 './data_sentiment/abstract/AT000002_review_abstract_sentiment.csv',
 './data_sentiment/abstract/AT000003_review_abstract_sentiment.csv']

In [6]:
# get sample dataset
sample_file = abstract_yonsei_reviews[70]

# extract file name
file_name = sample_file.split("/")[-1]
file_name_without_ext = file_name.split(".")[0]
print(file_name_without_ext)

# extract university code
univ_code_sample = file_name_without_ext.split("_")[0]
print(univ_code_sample)

# read sample dataset
df_abstract = pd.read_csv(sample_file, encoding="utf-8")
df_abstract.sample(5)

CN000022_review_abstract_sentiment
CN000022


Unnamed: 0.1,Unnamed: 0,No,제목,학과,과정,년도,href,BERT_SCORE,RNN_SCORE
7,7,11,가을학기 북경대학교 교환학생 파견,정치외교학과,학부,2017,/partner/expReport.asp?id=14422&page=1&bgbn=R,1,0.800448
4,4,14,북경대학교에서의 한 학기 교환생활,경영학과,학부,2018,/partner/expReport.asp?id=15209&page=1&bgbn=R,1,0.697339
14,14,4,북경대학교,중어중문학과,학부,2015,/partner/expReport.asp?id=12075&page=2&bgbn=R,1,0.659739
1,1,17,베이징대 교환생활,중어중문학과,학부,2019,/partner/expReport.asp?id=16194&page=1&bgbn=R,0,0.484792
17,17,1,중국 최고의 명문대학 北京大學,중어중문학과,학부,2011,/partner/expReport.asp?id=5520&page=2&bgbn=R,1,0.535456


In [7]:
df_abstract_review = df_abstract[["제목", "학과", "과정", "년도", "BERT_SCORE", "RNN_SCORE", "href"]]
df_abstract_review["대학코드"] = univ_code_sample
_abstract_review = df_abstract[["제목", "학과", "과정", "년도", "BERT_SCORE", "RNN_SCORE", "href"]]
df_abstract_review["대학코드"] = univ_code_sample
df_abstract_review = df_abstract_review[["대학코드","제목", "학과", "과정", "년도", "BERT_SCORE", "RNN_SCORE", "href"]]
df_abstract_review.head()

Unnamed: 0,대학코드,제목,학과,과정,년도,BERT_SCORE,RNN_SCORE,href
0,CN000022,PKU Exchange Program,언더우드국제대학,학부,2019,0,0.515649,/partner/expReport.asp?id=16451&page=1&bgbn=R
1,CN000022,베이징대 교환생활,중어중문학과,학부,2019,0,0.484792,/partner/expReport.asp?id=16194&page=1&bgbn=R
2,CN000022,북경대학교 교환학생,정치외교학과,학부,2019,0,0.664221,/partner/expReport.asp?id=15979&page=1&bgbn=R
3,CN000022,북경대학교 교환생활,중어중문학과,학부,2018,0,0.602057,/partner/expReport.asp?id=15513&page=1&bgbn=R
4,CN000022,북경대학교에서의 한 학기 교환생활,경영학과,학부,2018,1,0.697339,/partner/expReport.asp?id=15209&page=1&bgbn=R


In [8]:
df_abstract_review.columns.to_list()

['대학코드', '제목', '학과', '과정', '년도', 'BERT_SCORE', 'RNN_SCORE', 'href']

In [9]:
def make_abstract_review_df(UNIV_CODE):
    # reading data source csv file, importing as dataframe
    file_path = f"./data_sentiment/abstract/{UNIV_CODE}_review_abstract_sentiment.csv"
    df_abstract = pd.read_csv(file_path, encoding="utf-8")

    # select columns from dataframe, create new dataframe
    df_abstract_review = df_abstract[["제목", "학과", "과정", "년도", "BERT_SCORE", "RNN_SCORE", "href"]]

    # rename columns into review_link, make complete url
    df_abstract_review.rename(columns = {'href':'REVIEW_LINK'}, inplace = True)
    df_abstract_review["REVIEW_LINK"] = "http://oia.yonsei.ac.kr" + df_abstract_review["REVIEW_LINK"].astype(str)
    
    df_abstract_review["대학코드"] = UNIV_CODE
    df_abstract_review = df_abstract_review[["대학코드","제목", "학과", "과정", "년도", "BERT_SCORE", "RNN_SCORE", "REVIEW_LINK"]]
    return df_abstract_review

In [10]:
# listing up dataframes to concat
list_df = []
for csv_file in abstract_yonsei_reviews:
    # extract file name
    file_name = csv_file.split("/")[-1]
    file_name_without_ext = file_name.split(".")[0]

    # extract university code
    univ_code = file_name_without_ext.split("_")[0]

    # make dataframe based on university code
    df_temp = make_abstract_review_df(univ_code)
    list_df.append(df_temp)

print(len(list_df))
list_df[:3]

470


[       대학코드                제목               학과  과정    년도  BERT_SCORE  \
 0  730       리투아니아에서 행복했던 4개월  사회복지학과/언론홍보영상학부  학부  2019           1   
 
    RNN_SCORE                                        REVIEW_LINK  
 0   0.752021  http://oia.yonsei.ac.kr/partner/expReport.asp?...  ,
        대학코드                    제목    학과  과정      년도  BERT_SCORE  RNN_SCORE  \
 0  AR000004  아르헨티나 교환학생(어학연수프로그램)  건축학과  학부  2013-1           0    0.39967   
 
                                          REVIEW_LINK  
 0  http://oia.yonsei.ac.kr/partner/expReport.asp?...  ,
         대학코드                                        제목        학과  과정  \
 0   AT000001                       쿠프슈타인에서 노후 50년 땡겨쓰기      경영학과  학부   
 1   AT000001                           평화로운 쿠프에서의 한 학기        경제  학부   
 2   AT000001                   소중한 경험, 잊지 못할 기억, 쿠프슈타인   언론홍보영상학  학부   
 3   AT000001                        언제나 마음속의 고향인 쿠프슈타인    중어중문학과  학부   
 4   AT000001                    자연이 아름다운 쿠프슈타인에서의 한 학기    정치외교학과  학부   
 5   A

In [11]:
# concat dataframes in the list
df_abstract_wrangle = pd.concat(list_df)
print(len(df_abstract_wrangle.index))
df_abstract_wrangle.head(10)

11183


Unnamed: 0,대학코드,제목,학과,과정,년도,BERT_SCORE,RNN_SCORE,REVIEW_LINK
0,730,리투아니아에서 행복했던 4개월,사회복지학과/언론홍보영상학부,학부,2019,1,0.752021,http://oia.yonsei.ac.kr/partner/expReport.asp?...
0,AR000004,아르헨티나 교환학생(어학연수프로그램),건축학과,학부,2013-1,0,0.39967,http://oia.yonsei.ac.kr/partner/expReport.asp?...
0,AT000001,쿠프슈타인에서 노후 50년 땡겨쓰기,경영학과,학부,2017-2018,0,0.416185,http://oia.yonsei.ac.kr/partner/expReport.asp?...
1,AT000001,평화로운 쿠프에서의 한 학기,경제,학부,2017ㅡ2018,1,0.816743,http://oia.yonsei.ac.kr/partner/expReport.asp?...
2,AT000001,"소중한 경험, 잊지 못할 기억, 쿠프슈타인",언론홍보영상학,학부,2017,1,0.854657,http://oia.yonsei.ac.kr/partner/expReport.asp?...
3,AT000001,언제나 마음속의 고향인 쿠프슈타인,중어중문학과,학부,2017,1,0.8348,http://oia.yonsei.ac.kr/partner/expReport.asp?...
4,AT000001,자연이 아름다운 쿠프슈타인에서의 한 학기,정치외교학과,학부,2017,1,0.870668,http://oia.yonsei.ac.kr/partner/expReport.asp?...
5,AT000001,여유로웠던 쿠프슈타인에서의 한 학기,행정학과,학부,2017,1,0.816663,http://oia.yonsei.ac.kr/partner/expReport.asp?...
6,AT000001,쿠프슈타인에서의 한 학기,경제학부,학부,2016,1,0.731432,http://oia.yonsei.ac.kr/partner/expReport.asp?...
7,AT000001,여유롭고 소중했던 쿠프슈타인에서의 시간,경제학부,학부,2016,1,0.852968,http://oia.yonsei.ac.kr/partner/expReport.asp?...


In [12]:
# check for duplicate rows
df_abstract_wrangle = df_abstract_wrangle.drop_duplicates()
print(df_abstract_wrangle.shape)

(11183, 8)


In [13]:
# export wrangled df abstract concat dataframe
df_abstract_wrangle.to_csv("./data_wrangled/df_abstract_wrangle.csv",encoding="utf-8", index=False)

## Wrangle Foreign Exchange University Dataframe

In [14]:
def yield_BERT_SUM(UNIV_CODE):
    file_path = f"./data_sentiment/abstract/{UNIV_CODE}_review_abstract_sentiment.csv"
    dataframe_input = pd.read_csv(file_path, encoding="utf-8")
    bert_sum = dataframe_input["BERT_SCORE"].sum()
    return bert_sum

In [15]:
def yield_RNN_SUM(UNIV_CODE):
    file_path = f"./data_sentiment/abstract/{UNIV_CODE}_review_abstract_sentiment.csv"
    dataframe_input = pd.read_csv(file_path, encoding="utf-8")
    rnn_sum = dataframe_input["RNN_SCORE"].sum()
    return rnn_sum

In [16]:
def yield_no_of_students(UNIV_CODE):
    dataframe_input = make_abstract_review_df(UNIV_CODE)
    no_of_students = len(dataframe_input.index)
    return no_of_students

In [17]:
# University of Copenhagen Satisfaction Level
print(yield_BERT_SUM("DK000003"))
print(yield_RNN_SUM("DK000003"))

65
49.14911192655563


In [18]:
# University of Hongkong Satisfaction Level
print(yield_BERT_SUM("CN000016"))
print(yield_RNN_SUM("CN000016"))

41
43.059615552425385


In [19]:
# read university dataframe
df_univ = pd.read_csv("./data/univ_db_full.csv", encoding="utf-8")
df_univ.sample(5)

Unnamed: 0,level_0,index,No,대학명,국가,파견구분,협정형태,href,visitation
308,308,8,9,Radboud University Nijmegen,NETHERLANDS,교환학생,교환대학ISEP,/partner/expReport.asp?ucode=NL000005&bgbn=A,Exists
728,728,292,293,Western Kentucky University,UNITED STATES,교환학생,교환대학,/partner/expReport.asp?ucode=US000271&bgbn=A,Exists
421,421,28,29,University of Leeds,UNITED KINGDOM,,교환대학ISEP,/partner/expReport.asp?ucode=GB000041&bgbn=A,Exists
575,575,139,140,Northern Arizona University,UNITED STATES,교환학생,ISEP,/partner/expReport.asp?ucode=US000281&bgbn=A,Exists
386,386,8,9,TungHai University,TAIWAN,교환학생,교환대학,/partner/expReport.asp?ucode=TW000006&bgbn=A,Exists


In [20]:
# make column for university code
series_query = df_univ["href"].str.split("=", expand=True)[1]
series_univ_code = series_query.str.split("&", expand=True)[0]
df_univ["대학코드"] = series_univ_code
df_univ.head()

Unnamed: 0,level_0,index,No,대학명,국가,파견구분,협정형태,href,visitation,대학코드
0,0,0,1,Kabul University,AFGHANISTAN,교환학생,교환대학,/partner/expReport.asp?ucode=AF000001&bgbn=A,,AF000001
1,1,0,1,Universidad Blas Pascal,ARGENTINA,교환학생,ISEP,/partner/expReport.asp?ucode=AR000001&bgbn=A,,AR000001
2,2,1,2,Universidad Catolica de Cordoba,ARGENTINA,교환학생,ISEP,/partner/expReport.asp?ucode=AR000002&bgbn=A,,AR000002
3,3,2,3,Universidad de Palermo,ARGENTINA,교환학생,ISEP,/partner/expReport.asp?ucode=AR000003&bgbn=A,,AR000003
4,4,3,4,Universidad del Salvador,ARGENTINA,교환학생,ISEP,/partner/expReport.asp?ucode=AR000004&bgbn=A,Exists,AR000004


In [21]:
# reorder columns, delete unnecessary columns
df = df_univ[["대학코드", "국가", "대학명", "파견구분", "협정형태", "visitation", "href"]]
df.head(10)

Unnamed: 0,대학코드,국가,대학명,파견구분,협정형태,visitation,href
0,AF000001,AFGHANISTAN,Kabul University,교환학생,교환대학,,/partner/expReport.asp?ucode=AF000001&bgbn=A
1,AR000001,ARGENTINA,Universidad Blas Pascal,교환학생,ISEP,,/partner/expReport.asp?ucode=AR000001&bgbn=A
2,AR000002,ARGENTINA,Universidad Catolica de Cordoba,교환학생,ISEP,,/partner/expReport.asp?ucode=AR000002&bgbn=A
3,AR000003,ARGENTINA,Universidad de Palermo,교환학생,ISEP,,/partner/expReport.asp?ucode=AR000003&bgbn=A
4,AR000004,ARGENTINA,Universidad del Salvador,교환학생,ISEP,Exists,/partner/expReport.asp?ucode=AR000004&bgbn=A
5,AR000005,ARGENTINA,University of Buenos Aires,교환학생,교환대학,,/partner/expReport.asp?ucode=AR000005&bgbn=A
6,AU000019,AUSTRALIA,Australian National University,교환학생,교환대학,Exists,/partner/expReport.asp?ucode=AU000019&bgbn=A
7,AU000001,AUSTRALIA,Bond University,교환학생,교환대학,Exists,/partner/expReport.asp?ucode=AU000001&bgbn=A
8,AU000002,AUSTRALIA,Curtin University,교환학생,교환대학ISEP,Exists,/partner/expReport.asp?ucode=AU000002&bgbn=A
9,AU000003,AUSTRALIA,Curtin University of Technology,교환학생,,Exists,/partner/expReport.asp?ucode=AU000003&bgbn=A


In [22]:
# sort out unvisited universities
df_ever_been = df.loc[df['visitation'] == "Exists"]
df_ever_been.head()

Unnamed: 0,대학코드,국가,대학명,파견구분,협정형태,visitation,href
4,AR000004,ARGENTINA,Universidad del Salvador,교환학생,ISEP,Exists,/partner/expReport.asp?ucode=AR000004&bgbn=A
6,AU000019,AUSTRALIA,Australian National University,교환학생,교환대학,Exists,/partner/expReport.asp?ucode=AU000019&bgbn=A
7,AU000001,AUSTRALIA,Bond University,교환학생,교환대학,Exists,/partner/expReport.asp?ucode=AU000001&bgbn=A
8,AU000002,AUSTRALIA,Curtin University,교환학생,교환대학ISEP,Exists,/partner/expReport.asp?ucode=AU000002&bgbn=A
9,AU000003,AUSTRALIA,Curtin University of Technology,교환학생,,Exists,/partner/expReport.asp?ucode=AU000003&bgbn=A


In [23]:
# number of visited universities in Yonsei history
df_ever_been.shape

(470, 7)

In [24]:
# wrangle university dataframe
def make_univ_df():
    # read university dataset csv file
    df_univ = pd.read_csv("./data/univ_db_full.csv", encoding="utf-8")

    # get university code
    series_query = df_univ["href"].str.split("=", expand=True)[1]
    series_univ_code = series_query.str.split("&", expand=True)[0]
    df_univ["대학코드"] = series_univ_code
    
    # Making column values for BERT_SUM, RNN_SUM, STUDENT_NO
    list_bert_sum = []
    list_rnn_sum = []
    list_student_no = []
    
    # iterrating over rows of university datframe
    for index, row in df_univ.iterrows():

        # if university is not ever visited, input NaN values
        if row["visitation"] == "None":
            list_bert_sum.append(np.NaN)
            list_rnn_sum.append(np.NaN)
            list_student_no.append(np.NaN)

        # if university is visited, input BERT_SCORE, RNN_SCORE's sum and input number of visitations
        if row["visitation"] == "Exists":
            list_bert_sum.append(yield_BERT_SUM(row["대학코드"]))
            list_rnn_sum.append(yield_RNN_SUM(row["대학코드"]))
            list_student_no.append(yield_no_of_students(row["대학코드"]))
    
    # Make Columns
    df_univ["BERT_SUM"] = list_bert_sum
    df_univ["RNN_SUM"] = list_rnn_sum
    df_univ["NUM_VISITS"] = list_student_no
    
    # Select and order columns
    df = df_univ[["대학코드", "국가", "대학명", "파견구분", "협정형태", "NUM_VISITS", "BERT_SUM", "RNN_SUM", "href"]]
    return df

In [25]:
df_univ_wrangled = make_univ_df()

In [26]:
print(df_univ_wrangled.shape)
df_univ_wrangled.head(10)

(745, 9)


Unnamed: 0,대학코드,국가,대학명,파견구분,협정형태,NUM_VISITS,BERT_SUM,RNN_SUM,href
0,AF000001,AFGHANISTAN,Kabul University,교환학생,교환대학,,,,/partner/expReport.asp?ucode=AF000001&bgbn=A
1,AR000001,ARGENTINA,Universidad Blas Pascal,교환학생,ISEP,,,,/partner/expReport.asp?ucode=AR000001&bgbn=A
2,AR000002,ARGENTINA,Universidad Catolica de Cordoba,교환학생,ISEP,,,,/partner/expReport.asp?ucode=AR000002&bgbn=A
3,AR000003,ARGENTINA,Universidad de Palermo,교환학생,ISEP,,,,/partner/expReport.asp?ucode=AR000003&bgbn=A
4,AR000004,ARGENTINA,Universidad del Salvador,교환학생,ISEP,1.0,0.0,0.39967,/partner/expReport.asp?ucode=AR000004&bgbn=A
5,AR000005,ARGENTINA,University of Buenos Aires,교환학생,교환대학,,,,/partner/expReport.asp?ucode=AR000005&bgbn=A
6,AU000019,AUSTRALIA,Australian National University,교환학생,교환대학,27.0,17.0,17.952732,/partner/expReport.asp?ucode=AU000019&bgbn=A
7,AU000001,AUSTRALIA,Bond University,교환학생,교환대학,19.0,16.0,13.359809,/partner/expReport.asp?ucode=AU000001&bgbn=A
8,AU000002,AUSTRALIA,Curtin University,교환학생,교환대학ISEP,7.0,5.0,4.657787,/partner/expReport.asp?ucode=AU000002&bgbn=A
9,AU000003,AUSTRALIA,Curtin University of Technology,교환학생,,16.0,14.0,9.69667,/partner/expReport.asp?ucode=AU000003&bgbn=A


In [27]:
df_univ_wrangled.columns

Index(['대학코드', '국가', '대학명', '파견구분', '협정형태', 'NUM_VISITS', 'BERT_SUM',
       'RNN_SUM', 'href'],
      dtype='object')

In [28]:
df_univ_wrangled.shape

(745, 9)

In [29]:
# check for duplicate rows
df_univ_wrangled = df_univ_wrangled.drop_duplicates()
df_univ_wrangled.shape

(745, 9)

In [30]:
df_univ_wrangled.rename(columns = {'href':'COLLEGE_LINK'}, inplace = True)

In [31]:
df_univ_wrangled["COLLEGE_LINK"] = "http://oia.yonsei.ac.kr" + df_univ_wrangled["COLLEGE_LINK"].astype(str)

In [32]:
# GET BERT and RNN average
df_univ_wrangled["BERT_AVG"] = df_univ_wrangled["BERT_SUM"] / df_univ_wrangled["NUM_VISITS"]
df_univ_wrangled["RNN_AVG"] = df_univ_wrangled["RNN_SUM"] / df_univ_wrangled["NUM_VISITS"]

In [33]:
# GET BERT and RNN average
df_univ_wrangled["BERT_PLUS_RNN_SCORE"] = df_univ_wrangled["BERT_AVG"]*0.7 + df_univ_wrangled["RNN_AVG"]*0.3

In [34]:
# reorder columns
df_univ_wrangled = df_univ_wrangled[['대학코드', '국가', '대학명', '파견구분', '협정형태', 'NUM_VISITS', 'BERT_SUM', 'RNN_SUM', 'BERT_AVG', 'RNN_AVG','BERT_PLUS_RNN_SCORE', 'COLLEGE_LINK']]

In [35]:
df_univ_wrangled.head(10)

Unnamed: 0,대학코드,국가,대학명,파견구분,협정형태,NUM_VISITS,BERT_SUM,RNN_SUM,BERT_AVG,RNN_AVG,BERT_PLUS_RNN_SCORE,COLLEGE_LINK
0,AF000001,AFGHANISTAN,Kabul University,교환학생,교환대학,,,,,,,http://oia.yonsei.ac.kr/partner/expReport.asp?...
1,AR000001,ARGENTINA,Universidad Blas Pascal,교환학생,ISEP,,,,,,,http://oia.yonsei.ac.kr/partner/expReport.asp?...
2,AR000002,ARGENTINA,Universidad Catolica de Cordoba,교환학생,ISEP,,,,,,,http://oia.yonsei.ac.kr/partner/expReport.asp?...
3,AR000003,ARGENTINA,Universidad de Palermo,교환학생,ISEP,,,,,,,http://oia.yonsei.ac.kr/partner/expReport.asp?...
4,AR000004,ARGENTINA,Universidad del Salvador,교환학생,ISEP,1.0,0.0,0.39967,0.0,0.39967,0.119901,http://oia.yonsei.ac.kr/partner/expReport.asp?...
5,AR000005,ARGENTINA,University of Buenos Aires,교환학생,교환대학,,,,,,,http://oia.yonsei.ac.kr/partner/expReport.asp?...
6,AU000019,AUSTRALIA,Australian National University,교환학생,교환대학,27.0,17.0,17.952732,0.62963,0.664916,0.640216,http://oia.yonsei.ac.kr/partner/expReport.asp?...
7,AU000001,AUSTRALIA,Bond University,교환학생,교환대학,19.0,16.0,13.359809,0.842105,0.703148,0.800418,http://oia.yonsei.ac.kr/partner/expReport.asp?...
8,AU000002,AUSTRALIA,Curtin University,교환학생,교환대학ISEP,7.0,5.0,4.657787,0.714286,0.665398,0.699619,http://oia.yonsei.ac.kr/partner/expReport.asp?...
9,AU000003,AUSTRALIA,Curtin University of Technology,교환학생,,16.0,14.0,9.69667,0.875,0.606042,0.794313,http://oia.yonsei.ac.kr/partner/expReport.asp?...


In [36]:
print(df_univ_wrangled.shape)
df_univ_wrangled.columns

(745, 12)


Index(['대학코드', '국가', '대학명', '파견구분', '협정형태', 'NUM_VISITS', 'BERT_SUM',
       'RNN_SUM', 'BERT_AVG', 'RNN_AVG', 'BERT_PLUS_RNN_SCORE',
       'COLLEGE_LINK'],
      dtype='object')

In [37]:
# export wrangled df abstract concat dataframe
df_univ_wrangled.to_csv("./data_wrangled/df_univ_wrangled.csv",encoding="utf-8", index=False)

## Sort universities according to BERT and RNN average score

In [53]:
# Looking at cases with enough number of visits
NUMBER_OF_VISITS = 6
df_enough_visits = df_univ_wrangled.loc[df_univ_wrangled['NUM_VISITS'] >= NUMBER_OF_VISITS]
print(df_enough_visits.shape)
df_enough_visits.head()

(295, 12)


Unnamed: 0,대학코드,국가,대학명,파견구분,협정형태,NUM_VISITS,BERT_SUM,RNN_SUM,BERT_AVG,RNN_AVG,BERT_PLUS_RNN_SCORE,COLLEGE_LINK
6,AU000019,AUSTRALIA,Australian National University,교환학생,교환대학,27.0,17.0,17.952732,0.62963,0.664916,0.640216,http://oia.yonsei.ac.kr/partner/expReport.asp?...
7,AU000001,AUSTRALIA,Bond University,교환학생,교환대학,19.0,16.0,13.359809,0.842105,0.703148,0.800418,http://oia.yonsei.ac.kr/partner/expReport.asp?...
8,AU000002,AUSTRALIA,Curtin University,교환학생,교환대학ISEP,7.0,5.0,4.657787,0.714286,0.665398,0.699619,http://oia.yonsei.ac.kr/partner/expReport.asp?...
9,AU000003,AUSTRALIA,Curtin University of Technology,교환학생,,16.0,14.0,9.69667,0.875,0.606042,0.794313,http://oia.yonsei.ac.kr/partner/expReport.asp?...
10,AU000004,AUSTRALIA,Deakin University,교환학생,교환대학ISEPUSAC,24.0,18.0,17.42512,0.75,0.726047,0.742814,http://oia.yonsei.ac.kr/partner/expReport.asp?...


In [54]:
df_enough_visits_sort = df_enough_visits.sort_values(by=["BERT_PLUS_RNN_SCORE"], ascending=False)
df_ranking = df_enough_visits_sort.reset_index()
df_ranking.head(15)

Unnamed: 0,index,대학코드,국가,대학명,파견구분,협정형태,NUM_VISITS,BERT_SUM,RNN_SUM,BERT_AVG,RNN_AVG,BERT_PLUS_RNN_SCORE,COLLEGE_LINK
0,376,CH000004,SWITZERLAND,University of Lausanne,교환학생,교환대학ISEP,8.0,8.0,6.659102,1.0,0.832388,0.949716,http://oia.yonsei.ac.kr/partner/expReport.asp?...
1,284,MX000004,MEXICO,Instituto Tecnologico y de Estudios Superiores...,교환학생,교환대학ISEP,7.0,7.0,5.6657,1.0,0.809386,0.942816,http://oia.yonsei.ac.kr/partner/expReport.asp?...
2,415,GB000010,UNITED KINGDOM,University of Edinburgh,교환학생방문학생SAP(정규),교환대학SAF,7.0,7.0,5.607286,1.0,0.801041,0.940312,http://oia.yonsei.ac.kr/partner/expReport.asp?...
3,361,SE000003,SWEDEN,Kristianstad University,교환학생,교환대학,49.0,47.0,39.34154,0.959184,0.802889,0.912295,http://oia.yonsei.ac.kr/partner/expReport.asp?...
4,49,CA000003,CANADA,Dalhousie University,교환학생,교환대학,6.0,6.0,4.088647,1.0,0.681441,0.904432,http://oia.yonsei.ac.kr/partner/expReport.asp?...
5,547,US000104,UNITED STATES,Maryville College,교환학생,교환대학,28.0,27.0,21.112275,0.964286,0.75401,0.901203,http://oia.yonsei.ac.kr/partner/expReport.asp?...
6,246,JP000017,JAPAN,Nagasaki University,교환학생,교환대학,7.0,7.0,4.619841,1.0,0.659977,0.897993,http://oia.yonsei.ac.kr/partner/expReport.asp?...
7,34,AT000009,AUSTRIA,University of Vienna,교환학생,교환대학,20.0,20.0,13.028918,1.0,0.651446,0.895434,http://oia.yonsei.ac.kr/partner/expReport.asp?...
8,539,US000096,UNITED STATES,Linfield College,교환학생,교환대학,51.0,50.0,35.077536,0.980392,0.687795,0.892613,http://oia.yonsei.ac.kr/partner/expReport.asp?...
9,168,DE000017,GERMANY,Humboldt-Universitat zu Berlin,교환학생,교환대학,28.0,27.0,19.999718,0.964286,0.714276,0.889283,http://oia.yonsei.ac.kr/partner/expReport.asp?...


In [55]:
df_ranking.tail(15)

Unnamed: 0,index,대학코드,국가,대학명,파견구분,협정형태,NUM_VISITS,BERT_SUM,RNN_SUM,BERT_AVG,RNN_AVG,BERT_PLUS_RNN_SCORE,COLLEGE_LINK
280,220,IT000008,ITALY,Luiss Guido Carli,교환학생,교환대학,20.0,10.0,13.335463,0.5,0.666773,0.550032,http://oia.yonsei.ac.kr/partner/expReport.asp?...
281,486,US000021,UNITED STATES,CSU Los Angeles,교환학생,교환대학,8.0,4.0,5.026329,0.5,0.628291,0.538487,http://oia.yonsei.ac.kr/partner/expReport.asp?...
282,435,GB000025,UNITED KINGDOM,University of Westminster,방문학생SAP(정규),SAF,7.0,3.0,5.564678,0.428571,0.794954,0.538486,http://oia.yonsei.ac.kr/partner/expReport.asp?...
283,334,RU000002,RUSSIAN FEDERATION,Moscow State University,교환학생,교환대학,29.0,14.0,19.249699,0.482759,0.663783,0.537066,http://oia.yonsei.ac.kr/partner/expReport.asp?...
284,164,DE000001,GERMANY,Eberhard Karls Universitat Tubingen,교환학생,교환대학,28.0,12.0,21.016637,0.428571,0.750594,0.525178,http://oia.yonsei.ac.kr/partner/expReport.asp?...
285,404,GB000033,UNITED KINGDOM,"SOAS, University of London",교환학생,교환대학,9.0,4.0,6.251262,0.444444,0.694585,0.519487,http://oia.yonsei.ac.kr/partner/expReport.asp?...
286,187,CN000004,HONG KONG,Hong Kong Polytechnic University,교환학생,교환대학,53.0,23.0,36.354918,0.433962,0.685942,0.509556,http://oia.yonsei.ac.kr/partner/expReport.asp?...
287,126,FR000004,FRANCE,Ecole Superieure des Sciences Economiques et C...,교환학생,교환대학,7.0,3.0,4.874636,0.428571,0.696377,0.508913,http://oia.yonsei.ac.kr/partner/expReport.asp?...
288,300,NL000013,NETHERLANDS,Amsterdam University College,교환학생,,17.0,8.0,10.136701,0.470588,0.596277,0.508295,http://oia.yonsei.ac.kr/partner/expReport.asp?...
289,172,DE000008,GERMANY,Ludwig-Maximilians-Universitat Munchen,교환학생,교환대학,31.0,13.0,21.730792,0.419355,0.700993,0.503846,http://oia.yonsei.ac.kr/partner/expReport.asp?...


In [56]:
df_ranking.to_csv("./data_wrangled/df_univ_ranking.csv",encoding="utf-8", index=False)