In [55]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd

<p>ChromeDriver is a separate executable that Selenium WebDriver uses to control Chrome. It is maintained by the Chromium team with help from WebDriver contributors. If you are unfamiliar with Selenium WebDriver, you should check out the <a href="https://www.selenium.dev/">Selenium site</a>.</p>

https://googlechromelabs.github.io/chrome-for-testing/

# 1. Create df_state_universities dataframe.
#### Index of the dataframe is university code and the single column is the university name.

In [56]:
service = Service("chromedriver.exe")
driver= webdriver.Chrome(service=service)
driver.get("https://yokatlas.yok.gov.tr/lisans-anasayfa.php")
driver.maximize_window()
optgroup = driver.find_element(by=By.XPATH, value="//optgroup[@label='Devlet Üniversiteleri']")
options = optgroup.find_elements (by=By.TAG_NAME,value="option")
state_universities ={option.get_attribute("value"):option.get_attribute("innerText")   for option in options}
df_state_universities = pd.DataFrame(data = state_universities.values(), index=state_universities.keys(),columns=["name"] )
df_state_universities.index.name="uni_code"
df_state_universities.head()

Unnamed: 0_level_0,name
uni_code,Unnamed: 1_level_1
1065,ABDULLAH GÜL ÜNİVERSİTESİ
1104,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİV...
1002,ADIYAMAN ÜNİVERSİTESİ
1004,AFYON KOCATEPE ÜNİVERSİTESİ
1126,AFYONKARAHİSAR SAĞLIK BİLİMLERİ ÜNİVERSİTESİ


# 2- Add the city column
#### New column is the city name 

In [57]:
driver.get("https://yokatlas.yok.gov.tr/universite.php")
city_web_elements = driver.find_elements(by=By.CLASS_NAME, value="sehir")
cities = [web_element.get_attribute("innerText") for web_element in city_web_elements]

name_web_elements = driver.find_elements(by=By.CLASS_NAME, value="baslik")
names =[web_element.get_attribute("innerText") for web_element in name_web_elements]
df_city = pd.DataFrame({"city":cities,"name":names})
df_city.head()

Unnamed: 0,city,name
0,Kayseri,ABDULLAH GÜL ÜNİVERSİTESİ
1,İstanbul,ACIBADEM MEHMET ALİ AYDINLAR ÜNİVERSİTESİ
2,Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİV...
3,Adıyaman,ADIYAMAN ÜNİVERSİTESİ
4,Afyonkarahisar,AFYON KOCATEPE ÜNİVERSİTESİ


Merge two dataframes

In [58]:
df_state_universities=df_state_universities.reset_index().merge(df_city, on="name", how='left').set_index('uni_code')
df_state_universities.head()

Unnamed: 0_level_0,name,city
uni_code,Unnamed: 1_level_1,Unnamed: 2_level_1
1065,ABDULLAH GÜL ÜNİVERSİTESİ,Kayseri
1104,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİV...,Adana
1002,ADIYAMAN ÜNİVERSİTESİ,Adıyaman
1004,AFYON KOCATEPE ÜNİVERSİTESİ,Afyonkarahisar
1126,AFYONKARAHİSAR SAĞLIK BİLİMLERİ ÜNİVERSİTESİ,Afyonkarahisar


# 3- Create df_quota 
### df_quota includes all departments and their quota/enrollments 

In [59]:
base_url = "https://yokatlas.yok.gov.tr/lisans-univ.php?u="

In [60]:
def get_department_names(driver,base_url, uni_codes):
    department_names = set()
    for uni_code in uni_codes:
        driver.get(base_url + uni_code)  
        driver.implicitly_wait(2)
        divs_departments =  driver.find_elements(by=By.XPATH, value="//a[@data-parent='#']/div")
        for div_department  in divs_departments:
            department_name = div_department.get_attribute("innerText")
            if "KKTC" not in department_name:
               # driver.get(anchor.get_attribute("href"))
               department_names.add(department_name) 
                
    return department_names    

In [61]:
department_names = get_department_names(driver,base_url, df_state_universities.index)
len(department_names)

767

In [62]:
department_names = list(department_names)
department_names.sort()
midx = pd.MultiIndex.from_product([ department_names, ["quota","enrollments"]  ]) 
df_quota = pd.DataFrame(index = midx,columns=df_state_universities.index)
df_quota.head()

Unnamed: 0,uni_code,1065,1104,1002,1004,1126,1005,1007,1008,1105,1009,...,1043,1121,1094,1096,1098,1102,1099,1101,1023,1103
Acil Yardım ve Afet Yönetimi,quota,,,,,,,,,,,...,,,,,,,,,,
Acil Yardım ve Afet Yönetimi,enrollments,,,,,,,,,,,...,,,,,,,,,,
Acil Yardım ve Afet Yönetimi (İÖ),quota,,,,,,,,,,,...,,,,,,,,,,
Acil Yardım ve Afet Yönetimi (İÖ),enrollments,,,,,,,,,,,...,,,,,,,,,,
Adli Bilimler,quota,,,,,,,,,,,...,,,,,,,,,,


In [63]:
def get_quota_enrollment(driver):
    try:
        pop_up_element_xpath = "//span[@class='featherlight-close-icon featherlight-close']"
        WebDriverWait(driver,10).until( EC.presence_of_element_located((By.XPATH,pop_up_element_xpath) )  )
        driver.implicitly_wait(2)
        pop_ups = driver.find_elements(by=By.XPATH, value=pop_up_element_xpath)
        for i in range(len(pop_ups)-1,-1,-1): # close pop-ups
            pop_ups[i].click() 
    except:
        pass
    
    quota_enrollment_section_xpath = "//*[contains(text(),'Kontenjan, Yerleşme ve Kayıt İstatistikleri')]"
    WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH,quota_enrollment_section_xpath) )  )
    quota_enrollment_section = driver.find_element(by=By.XPATH, value=quota_enrollment_section_xpath)
    quota_enrollment_section.click()
  
    #ActionChains(driver).move_to_element(WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, quota_enrollment_section_xpath)) ) ).click().perform()
    
    quota_enrollment_section = driver.find_element(by=By.XPATH, value=quota_enrollment_section_xpath)
    td_xpath = "//td[contains(text(),'Toplam')]"
    WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH,td_xpath) )  )
    #ActionChains(driver).move_to_element(WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, td_xpath)) ) ).click().perform()
    td = quota_enrollment_section.find_element(by=By.XPATH, value= td_xpath)   
    tr= td.find_element(by=By.XPATH, value="..")
    tds = tr.find_elements(by=By.TAG_NAME, value="td")
    quota = tds[1].get_attribute("innerText")
    enrollments = tds[2].get_attribute("innerText")
    return quota, enrollments

In [64]:
def create_df_quota(driver, df_quota,uni_code):
    driver.get(base_url + uni_code)
    anchors          =  driver.find_elements(by=By.XPATH, value="//a[@data-parent='#']") #links to departments
    divs_departments =  driver.find_elements(by=By.XPATH, value="//a[@data-parent='#']/div")
    for anchor,div_department in zip(anchors, divs_departments):
        try:
            department_name = div_department.get_attribute("innerText")
        except:
            print(uni_code)
            return
        if "KKTC" not in department_name:
            driver.get(anchor.get_attribute("href"))
          
            quota, enrollments =  get_quota_enrollment(driver)           
            df_quota.loc[department_name,"quota"][uni_code] = quota
            df_quota.loc[department_name,"enrollments"][uni_code] = enrollments
        driver.back()   

In [65]:
df_quota.head()

Unnamed: 0,uni_code,1065,1104,1002,1004,1126,1005,1007,1008,1105,1009,...,1043,1121,1094,1096,1098,1102,1099,1101,1023,1103
Acil Yardım ve Afet Yönetimi,quota,,,,,,,,,,,...,,,,,,,,,,
Acil Yardım ve Afet Yönetimi,enrollments,,,,,,,,,,,...,,,,,,,,,,
Acil Yardım ve Afet Yönetimi (İÖ),quota,,,,,,,,,,,...,,,,,,,,,,
Acil Yardım ve Afet Yönetimi (İÖ),enrollments,,,,,,,,,,,...,,,,,,,,,,
Adli Bilimler,quota,,,,,,,,,,,...,,,,,,,,,,


In [None]:
for uni_code in df_state_universities.index:#list(uni_codes):#
    create_df_quota(driver, df_quota,uni_code)    

1065
1104
