In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import numpy as np
from tqdm import tqdm
import locale
import re
from PIL import Image 

locale.setlocale(locale.LC_ALL, 'tr_TR.utf8')
driver = webdriver.Chrome(service=Service('chromedriver.exe'), options= webdriver.ChromeOptions())
base_url = "https://yokatlas.yok.gov.tr/lisans-univ.php?u="

ModuleNotFoundError: No module named 'selenium'

 In the previous notebook we have prepared a template df including university **uni_type**, **city**, **uni_name** and **uni_code**.<br>
Firstly, we read this df which is ready to extend and sort index. In reading process we use columns as index.

In [None]:
df = pd.read_csv("df_template.csv",index_col=[0,1,2,3])
df = df.sort_index(level=[0,1,2],key=lambda x: pd.Index([locale.strxfrm(e) for e in x] ))
df.head(10)

In this notebook we are going to:
* extend df to include departments and their features.
* fill the df with feature values.

## 1- Add department names

In [None]:
import time
def get_department_names_codes(driver,base_url, city,uni_code):
    uni_code_department_names_codes = set()  
    driver.get(base_url + str(uni_code) )  
   # time.sleep(.1)
    while True:
        try:
            anchor_departments = WebDriverWait(driver, 10).until( EC.presence_of_all_elements_located((By.XPATH, "//a[@data-parent='#']")))
            break
        except:
            driver.refresh()
            pass
   # anchor_departments =  driver.find_elements(by=By.XPATH, value="//a[@data-parent='#']")
    for anchor_department  in anchor_departments:
        department_code = anchor_department.get_attribute("href")[anchor_department.get_attribute("href").index("=")+1:]  
        department_name = anchor_department.find_element(by=By.XPATH, value="div").get_attribute("innerText")
        place_name = anchor_department.find_element(by=By.XPATH, value="small").get_attribute("innerText")[1:-1]# exclude parenthesis ( )      

        # Exclude abroad programs 
        excluded_strings = ["KKTC", "UOLP"]
        if all(exclude_str not in department_name for exclude_str in excluded_strings)  and "KKTC" not in place_name:
            
            if uni_code==1110: # We deal with Sağlık Bilimleri Üniversitesi seperately
                other_cities = {"Adana","Erzurum","Bursa","Trabzon","İzmir","Kayseri"}
                # skip department name for İstanbul (do not add department for İstanbul, it belongs to another city)
                if (city=="Ankara" and "Ankara" not in department_name) or (city!="Ankara" and "Ankara" in department_name) or\
                    (city in other_cities and city not in place_name) or\
                   (city=="İstanbul" and ("Ankara" in department_name or other_cities.intersection( place_name.split() ) )  ):
                    department_name = None 
                if department_name!=None and "(" in department_name:
                        department_name = department_name[:department_name.index("(")].strip()    
            
            if department_name!=None: 
                uni_code_department_names_codes.add((uni_code,department_name,department_code)) 
                
    return uni_code_department_names_codes

In [None]:
option = int(input("1-Use previously saved file\n2-Scrape and save to df_template2.csv\nSelect an option:"))
if option==1:
    df=pd.read_csv("df_template2.csv")
    df.head(2)
    df=df.set_index(["city","uni_name","uni_code","uni_type","dep_name","dep_code"])
else:
    df_new = pd.DataFrame() 
    for _,city,_,uni_code in tqdm(df.index):
        uni_code_department_names_codes = get_department_names_codes(driver,base_url,city, uni_code) 
        midx = pd.MultiIndex.from_tuples(uni_code_department_names_codes,names=["uni_code","dep_name","dep_code"])
       # midx = pd.MultiIndex.from_product([[uni_code],department_names,department_codes] ,names=["uni_code","dep_name","dep_code"]) 
        df_temp = pd.DataFrame(index = midx)
        df_new = pd.concat( (df_new,df.loc[pd.IndexSlice[:,city,:,uni_code],:].join(df_temp)) ) #.loc[pd.IndexSlice[:, :,uni_code,:],:]#data.loc[:, pd.IndexSlice[:, ["a", "b"]]]
    
    # rename df_new as df again and shift city to first index(first level will be city)
    df = df_new.reorder_levels([1,2,3,0,4,5]) 
    df.reset_index().to_csv("df_template2.csv",index = False)  
    

In [None]:
df.head(12)

* Sort according to city, uni_name and type

In [None]:
df = df.sort_index(level=[0,1,3],key=lambda x: pd.Index([locale.strxfrm(e)  for e in x] ))  # locale.strxfrm is to sort strings with Turkish chars
df.head()

## 2- Add features of departments (columns)

#### In this step we add  new columns for the features to be extracted

In [None]:
# We will add province names as new columns to represent number of students coming from other provinces
provinces = sorted(df.index.get_level_values(0).unique().tolist(), key=lambda x:locale.strxfrm(x)  ) 
len(provinces)

## 2.1 Prepare *General* top level

In [None]:
department_features = ["quota","placements","not_registered","registered","top_order","bottom_order","min_score","max_score","faculty","entrance_score_type","scholarship","language"]
df[department_features[:6]] = 0         # the first 6 columns are numeric(integers)
df[department_features[6:8]] = 0.0       
df[department_features[8:-1]] = None  # other columns are categorical
df[department_features[-1]] = "Türkçe" # default education language is Turkish
df.head()

### Extract the language if  exists in department name
In this step we :
* remove some expressions like 'Açıköğretim' (open education) ,  'Uzaktan Öğretim' (distant education), 'İÖ' (evening education)
      which will be extracted seperately and saved as features.
* extract language feature if it exists inside parentheses and save it in the corresponding language column.

As a result a department name  such as Psikoloji (İngilizce)  will be Psikoloji while language column will be "İngilizce".

In [None]:
print("Number of deparment names before extracting language and removing expressions in parantheses",len(df.index.get_level_values(4).unique()))

In [None]:
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

languages_set= {'Almanca', 'Arapça', 'Bulgarca', 'Ermenice', 'Fransızca', 'Korece', 'Lehçe', 'Rusça',  'Çince', 'İngilizce','İspanyolca', 'İtalyanca'}
pattern = r'\((.*?)\)'  # Matches text inside parentheses
new_department_dict = {}

df = df.reset_index().set_index("dep_code") # temporarily reset index (only in the for below.We use unique dep_code for iteration for extracting languages)
for dep_code in df.index: # for city, uni_name, uni_code, uni_type, dep_name, dep_code in df.index:
    dep_name = df.loc[dep_code,"dep_name"]
 
    parentheses_content = set(re.findall(pattern, dep_name ))
    if parentheses_content:
        if parentheses_content & languages_set:             # if the parantheses content is a language name
            language, = parentheses_content & languages_set   # get language name from the resulting set with one element(language name)
            df.loc[dep_code,"language"] = language # set the language value 
            #df.loc[pd.IndexSlice[:,:,:,:,:,dep_code],"language"] = language # set the language value 
            # remove  parentheses 
            df.loc[dep_code,"dep_name"] =  dep_name[:dep_name.index("(")].strip()  #new_dep_name

print("Number of deparment names after extracting parentheses_content(language education type):",len(df["dep_name"].unique()))

Now we can see that department names do not contain education language. For instance "Bilgisayar Mühendisliği (İngilizce)" has become "Bilgisayar Mühendisliği"
and its language feature is changed as "İngilizce".

In [None]:
# Get the existing column names
existing_columns = df.columns.tolist()
#  Create the new MultiIndex dynamically
#    Each existing column name will become a lower level under 'Bilgiler'
new_multiindex_tuples = [('General', col) for col in existing_columns]

new_columns = pd.MultiIndex.from_tuples(
    new_multiindex_tuples,
    #names=['lev1', 'lev2'] # Optionally name the levels
)
df.columns = new_columns
df.head()

# 3- Helper Functions for Parsing Sections

In [None]:
def get_row_content(driver,searched_text,imp_wait_time = 1, add_strong=False):
    # Find the td element within the row_element 
    strong=""
    if add_strong:
        strong ="/strong"
    x_path = f"//tr[td{strong}[contains(text(),'{searched_text}')]]/td[last()]" # Selecting the last td element  
    td = WebDriverWait(driver, imp_wait_time).until( EC.presence_of_element_located((By.XPATH, x_path)) )  
    return td.get_attribute("innerText")

In [None]:
def close_pop_ups_js(driver,imp_wait_time):
        driver.execute_script("""
            // Find all featherlight close buttons
            const closeButtons = document.querySelectorAll('span.featherlight-close');
            
            // Close all popups from top to bottom
            for (let i = closeButtons.length - 1; i >= 0; i--) {
                // Click using native DOM method
                closeButtons[i].click();
            }
            
            // Alternative: Remove entire popup containers
            document.querySelectorAll('.featherlight').forEach(popup => {
                popup.style.display = 'none';
                popup.remove();
            });
        """)
        print("Executed JavaScript popup closure")
        
        # Verify closure
        WebDriverWait(driver, 5).until(
            EC.invisibility_of_element_located((By.CSS_SELECTOR, ".featherlight"))
        )
        print("Verified popup disappearance")

In [None]:
def close_pop_ups(driver,imp_wait_time):
    try:
        print("imp_wait_time:",imp_wait_time)
       

        pop_up_element_xpath = "//span[@class='featherlight-close-icon featherlight-close']"
        WebDriverWait(driver,imp_wait_time).until( EC.presence_of_element_located((By.XPATH,pop_up_element_xpath) )  )
        print("FOUND pop_up_element_xpath") 
        close_pop_ups_js(driver,imp_wait_time)        

        #pop_ups = driver.find_elements(by=By.XPATH, value=pop_up_element_xpath)        
     #   print("LEN pop_ups:",len(pop_ups))
     #   for i in range(len(pop_ups)-1,-1,-1): 
            
     #       driver.execute_script("arguments[0].click();", pop_up)#pop_ups[i].click() 

     #       if pop_ups[i].is_displayed():
     #               close_buttons[i].click()
     #               time.sleep(0.5)  # Small delay to allow DOM to update
    #        print("SLEEPING after clikc")
    #        time.sleep(3) 
             # Wait for either page refresh or pop-up disappearance
    #        WebDriverWait(driver, imp_wait_time).until(
     #           EC.any_of(
     #               EC.staleness_of(pop_ups[i]),  # Page refreshed
    #                EC.invisibility_of_element(pop_ups[i])  # Pop-up closed
     #           )
   #         )
   #     print("Pop-up closed successfully")

         
    except:
        print("Pop-up close error")
        print("Refreshing page")
        driver.refresh()

In [None]:
class elements_to_be_number_of: # class to set custom conditions for waiting (used in WebDriverWait(driver,imp_wait_time).until(...)   )
    def __init__(self, locator, number):
        self.locator = locator
        self.number = number
    def __call__(self, driver):
        elements = driver.find_elements(*self.locator)
        if len(elements) == self.number:
            return elements
        else:
            return False

**Decorator for scraping functions of tab contents.**

In [None]:
def parse_number(s, int_type=False):
    if s=='---'or s=="--" or s=="-":
        return 0
    # Replace comma with dot to standardize decimal point
    s = s.strip()
    s = s.replace(',',".")
    if int_type:
        s = s.replace('.',"")   
    # Try converting to float first
    try:
        float_value = float(s)
        
        # If the float value is equivalent to an integer, return as int
        if float_value.is_integer():
            return int(float_value)
        return float_value
    except: # if s is not a number like(unknown) remain it
        return s 
  #  except ValueError:
  #      raise ValueError(f"Cannot parse '{s}' as a number.")

In [None]:
from functools import wraps
import time
def get_section_content_decorator(func):
        @wraps(func) 
        def wrapper(driver,dep_code,div_id,imp_wait_time,table_index=0,start_row=1,x_path_table=None,tr_eng_dict={}):#dep_code is used just for debugging in the except block
            result=False
    
            while not result:
                x_path_tab = f"//a[@href='#c{div_id}']" # find the tab  
                x_path_table = f"//div[@id='icerik_{div_id}']/table/tbody" # find the table(s) 
                x_path_table = f"//div[@id='icerik_{div_id}']//table[@class='table table-bordered']/tbody"
                try:                    
                  #  panel_div = WebDriverWait(driver, imp_wait_time).until( EC.presence_of_element_located((By.XPATH, x_path_tab) )  )  
                    #if  not panel_div or panel_div.get_attribute("aria-expanded")!="true":
                   #     print("TRYING TO CLICK")
                    #    WebDriverWait(driver, imp_wait_time).until( EC.presence_of_element_located((By.XPATH, x_path_tab) )  ).click() #click the tab                       
                   #     driver.implicitly_wait(5)
                    # 4. Check if tab is already open using ARIA attribute
                    x_path_tab = f"//a[@href='#c{div_id}']"                
                    # 3. Wait for tab to be clickable
                    tab_element = WebDriverWait(driver, imp_wait_time).until( EC.element_to_be_clickable((By.XPATH, x_path_tab)))
                    if tab_element.get_attribute("aria-expanded") != "true":
                        print("Tab not open - clicking...")
                        
                        # Scroll to element before clicking
                        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", tab_element)
                        
                        # Click using JavaScript to avoid interception
                        driver.execute_script("arguments[0].click();", tab_element)
                        
                        # Wait for content to appear
                        content_div_id = f"icerik_{div_id}"
                        WebDriverWait(driver, imp_wait_time).until(
                            EC.visibility_of_element_located((By.ID, content_div_id))
                        )
                        print("Tab opened successfully")
                    extracted_data = func(driver,dep_code,div_id,imp_wait_time,table_index,start_row,x_path_table,tr_eng_dict)  
                    result = True# len(extracted_data)>0 #if the dict is not empty, data has been parsed     
    
                except:            
                    close_pop_ups(driver,imp_wait_time)            
                if not result:
                    print(f"Failed to get content of the department {dep_code} in {func.__name__}")
                    driver.refresh()
                    WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.TAG_NAME, "body"))
                    )                
                    #result=True  # it is put temporarily to run 1 time only when error encountered
               
            return extracted_data
        return wrapper

In [None]:
@get_section_content_decorator
def get_content_generic(driver,dep_code,div_id,imp_wait_time,table_index,start_row,x_path_table, tr_eng_dict={}):
    WebDriverWait(driver, imp_wait_time).until( EC.presence_of_element_located((By.XPATH,x_path_table ) )  )
    table = driver.find_elements(By.XPATH, x_path_table)[table_index]
     # Get all td elements in the table
    rows = table.find_elements(By.TAG_NAME, "tr")    # rows = table.find_elements(By.XPATH, './/tr')
    extracted_data = {}
    for row in (rows[start_row:]):# Skip until start row(e.g. first row gives toplam which means total)                 
       # WebDriverWait(row, imp_wait_time).until(elements_to_be_number_of((By.XPATH, './td'), 3)) # wait until 3 cells located  
        cells = row.find_elements(By.TAG_NAME,  'td')
        try:
            row_label = cells[0].text.strip()
            if row_label !="YDT Yabancı Dil (80 soruda)": #there are several foreign languages scores 
                if row_label=="":
                    extracted_data["unknown"]= parse_number(cells[1].text)     
                else:
                    extracted_data[tr_eng_dict.get(row_label,row_label)]= parse_number(cells[1].text)           
        except:
             extracted_data[tr_eng_dict.get(row_label,row_label)]= 0
        
    return extracted_data

### 3.1 General Information Section

In [None]:
imp_wait_time= 10

In [None]:
@get_section_content_decorator
def get_general_info(driver,dep_code,div_id,imp_wait_time,table_index,start_row,x_path_table,tr_eng_dict):

    extracted_data = {}
    table= WebDriverWait(driver, imp_wait_time).until(elements_to_be_number_of((By.XPATH, x_path_table), 3)) # wait until 2 tables located  

    extracted_data["quota"] = int(get_row_content(driver,searched_text= "Toplam Kontenjan",add_strong = True) )   
   
    try:
        extracted_data["placements"] = parse_number( get_row_content(driver,searched_text= "Toplam Yerleşen",add_strong = True),int_type=True   )
    except:
        extracted_data["placements"]  = 0 
        return extracted_data
    try:
        extracted_data["not_registered"] = int(get_row_content(driver,searched_text= "Yerleşip Kayıt Yaptırmayan")  )
    except:
        extracted_data["not_registered"]  = 0
        
    try:
        extracted_data["min_score"] = parse_number( get_row_content(driver,searched_text="0,12 Katsayı ile Yerleşen Son Kişinin Puanı"))
    except:
        extracted_data["min_score"] = -1 # no one placed
    
    try:     
        extracted_data["max_score"] =parse_number(get_row_content(driver,searched_text="Tavan Puan(0,12)"))
    except:
        extracted_data["max_score"] = -1 # no one placed

    try:
        extracted_data["top_order"] = parse_number(get_row_content(driver,searched_text="Tavan Başarı Sırası(0,12)"),int_type=True)
    except:
        extracted_data["top_order"] = 10**7  # no one placed
    try:
        extracted_data["bottom_order"] = parse_number(get_row_content(driver,searched_text="0,12 Katsayı ile Yerleşen Son Kişinin Başarı Sırası"),int_type=True)
    except:
        extracted_data["bottom_order"] = 10**7  # no one placed
        
       
    extracted_data["scholarship"] = get_row_content(driver,searched_text= "Burs Türü") 
    extracted_data["faculty"] = get_row_content(driver,searched_text= "Fakülte")
    extracted_data["entrance_score_type"] = get_row_content(driver,searched_text= "Puan Türü")
    return extracted_data

In [None]:
# Test cell for get_gender_distribution
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=101110227")
get_general_info(driver,101110227 , "1000_1",imp_wait_time)

### 3.2 Gender Distribution

In [None]:
Image.open("images/gender_distribution.jpg")

In [None]:
tr_eng_dict_3_2={'Kız': "num_female", 'Erkek': "num_male","Belli değil":"unknown"}

In [None]:
new_multiindex_tuples = [('Gender', col) for col in tr_eng_dict_3_2.values()]
new_data = pd.DataFrame(
    np.zeros((len(df), len(tr_eng_dict_3_2)) ),  # Fill with zeros (or your desired values)
    columns=new_multiindex_tuples,
    index=df.index,
    dtype=int  # Force integer data type
)
df = pd.concat([df, new_data], axis=1)
df.head(3)

In [None]:
# Test cell for get_gender_distribution
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=101110227")
get_content_generic(driver,101110227 ,div_id= "1010",imp_wait_time=imp_wait_time,start_row=0,tr_eng_dict=tr_eng_dict_3_2)

### 3.3 Get Geographical Regions

In [None]:
@get_section_content_decorator
def get_geographical_regions(driver,dep_code,div_id,imp_wait_time,table_index,start_row,x_path_table,tr_eng_dict={}):
    extracted_data = {}
    tur2eng_city={"Aynı Şehir":"Same province","Farklı Şehir":"Different province","Belli Değil":"Unknown province"}
    tur2eng_region = {"Akdeniz":"Mediterrenian","Doğu Anadolu":"Eastern Anatolia","Ege":"Aegean","Güneydoğu Anadolu":"Southeastern Anatolia","İç Anadolu":"Central Anatolia",
                     "Karadeniz":"Black Sea","Marmara":"Marmara","Belirsiz":"Unknown region"}
    
    tables= WebDriverWait(driver, imp_wait_time).until(elements_to_be_number_of((By.XPATH, x_path_table), 2)) # wait until 2 tables located  
    rows = tables[0].find_elements(By.TAG_NAME, "tr")

    # parsing first table contents 
    for row in rows[0:]:
        WebDriverWait(row, imp_wait_time).until(elements_to_be_number_of((By.XPATH, './td'), 4)) # wait until 2 cells located  
        cells = row.find_elements(By.XPATH,  './td')
        # Use regular expression to find all sequences of digits like "60 Erkek/17 Kız"
        numbers = re.findall(r'(\d[\d.]*\d|\d)', cells[-1].text)   
        numbers = list(map(lambda x: parse_number(x,int_type=True) , numbers))
        # Convert the list of strings to a list of integers
        num_male, num_female  = list(map(int, numbers))
        print("NUMS:",numbers)

        text_in_the_cell = cells[0].text.strip()
        extracted_data[tur2eng_city.get(text_in_the_cell,text_in_the_cell)+"-male" ] = num_male
        extracted_data[tur2eng_city.get(text_in_the_cell,text_in_the_cell)+"-female"] = num_female
        extracted_data[tur2eng_city.get(text_in_the_cell,text_in_the_cell)] = num_male+num_female 

    # parsing second table contents
    rows = tables[1].find_elements(By.TAG_NAME, "tr")
    for row in rows[1:]:# Skip first row(toplam which means total)
          WebDriverWait(row, imp_wait_time).until(elements_to_be_number_of((By.XPATH, './td'), 3)) # wait until 3 cells located  
          cells = row.find_elements(By.XPATH,  './td')  
          text_in_the_cell = cells[0].text.strip()
          extracted_data[tur2eng_region.get(text_in_the_cell,text_in_the_cell)] = int(cells[1].text)
    
    return extracted_data

### Prepare *Regions* top level

In [None]:
tur2eng_city={"Aynı Şehir":"Same province","Farklı Şehir":"Different province","Belli Değil":"Unknown province"}
tur2eng_region = {"Akdeniz":"Mediterrenian","Doğu Anadolu":"Eastern Anatolia","Ege":"Aegean","Güneydoğu Anadolu":"Southeastern Anatolia","İç Anadolu":"Central Anatolia",
                     "Karadeniz":"Black Sea","Marmara":"Marmara","Belirsiz":"Unknown region"}
new_multiindex_tuples = [('Regions', col) for col in list(tur2eng_city.values())+list(tur2eng_region.values())]
new_data = pd.DataFrame(
    np.zeros((len(df), len(tur2eng_city) + len(tur2eng_region)) ),  # Fill with zeros (or your desired values)
    columns=new_multiindex_tuples,
    index=df.index,
    dtype=int  # Force integer data type
)
df = pd.concat([df, new_data], axis=1)
df.head(3)

In [None]:
# Test cell for get_cell_geographical_regions
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=101490621")
get_geographical_regions(driver,101110227 ,div_id= "1020ab",imp_wait_time=15)

### 3.4 Get Provinces Where Students Come From (Yerleşenlerin Geldikleri İller)

In [None]:
Image.open("images/cities.jpg")

### Prepare *Provinces* top level

In [None]:
new_multiindex_tuples = [('Provinces', col) for col in provinces]+[('Provinces',"Unknown"),('Provinces',"Total")]
new_data = pd.DataFrame(
    np.zeros((len(df), len(new_multiindex_tuples))),  # Fill with zeros (or your desired values)
    columns=new_multiindex_tuples,
    index=df.index,
    dtype=int  # Force integer data type
)
df = pd.concat([df, new_data], axis=1)
df.head(3)

In [None]:
df["Provinces"].head()

We have single table and we obtain the first column. Therefore we can use **get_content_generic** function.

In [None]:
# Test cell for cities
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=101110227")
get_content_generic(driver,106510077 ,"1020c",imp_wait_time=imp_wait_time,tr_eng_dict={"Belirsiz":"Unknown","Toplam":"Total"})

## 3.5 Educational Status

In [None]:
Image.open("images/educational_status.jpg")

In [None]:
tr_eng_dict_3_5={'Liseden yeni mezun, YKS ye ilk defa girdi': "New Graduate-First Entrance",
 'Liseden mezun, daha önce hiç üniversiteye yerleşmemişti': "Graduate-Not Placed Previously",
 'Üniversitede öğrenci iken sınava girip buraya yerleşti':"Uni. Student",
 'Bir üniversiteden daha önce mezun olmuş': "University Graduate",
 'Diğer': "Other"}

### Prepare *Education Status* top level

In [None]:
new_multiindex_tuples = [('Education Status', col) for col in tr_eng_dict_3_5.values()]
new_data = pd.DataFrame(
    np.zeros((len(df), len(new_multiindex_tuples))),  # Fill with zeros (or your desired values)
    columns=new_multiindex_tuples,
    index=df.index,
    dtype=int  # Force integer data type
)
df = pd.concat([df, new_data], axis=1)
df.head(3)

We have single table and we obtain the first column. Therefore we can use **get_content_generic** function.

In [None]:
# Test cell for educational status
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=101110227")
get_content_generic(driver,106510077 ,"1030a",imp_wait_time=10,tr_eng_dict=tr_eng_dict_3_5)

### 3.6 Graduation Years

In [None]:
Image.open("images/graduation_years.jpg")

### Prepare *Graduation Years* top level

In [None]:
tr_eng_dict_3_6 = { str(i):i for i in range(1960,2027)}
tr_eng_dict_3_6["Bilinmiyor"]="unknown"

In [None]:
new_multiindex_tuples = [('Graduation Years', year) for year in tr_eng_dict_3_6.values()]+ [('Graduation Years', "unknown") ]
new_data = pd.DataFrame(
    np.zeros((len(df), len(new_multiindex_tuples))),  # Fill with zeros (or your desired values)
    columns=new_multiindex_tuples,
    index=df.index,
    dtype=int  # Force integer data type
)
df = pd.concat([df, new_data], axis=1)
df.head(3)

We have single table and we obtain the first column. Therefore we can use **get_content_generic** function.

In [None]:
# Test cell for graduation years
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=101110227")
get_content_generic(driver,106510077 ,"1030b",imp_wait_time=10,tr_eng_dict= tr_eng_dict_3_6)# dictionary converts strings to integers (except unknown)

### 3.7 Average scores per course

In [None]:
# Test cell for average scores per course
tr_eng_dict_3_7={"TYT Temel Matematik (40 soruda)":"TYT Math (out of 40)",
             'TYT Fen Bilimleri (20 soruda)':"TYT Science (out of 20)",
             'TYT Türkçe (40 soruda)':"TYT Turkish (out of 40)",
             'TYT Sosyal Bilimler (20 soruda)':"TYT Social Sciences (out of 20)",
             'YDT Yabancı Dil (80 soruda)':"Foreign Language (out of 80)",
             'AYT Matematik (40 soruda)': "AYT Math (out of 40)", 
             'AYT Fizik (14 soruda)': "AYT Physics (out of 14)",
             'AYT Kimya (13 soruda)': "AYT Chemistry (out of 13)", 
             'AYT Biyoloji (13 soruda)': "AYT Biology (out of 13)"
              }

### Prepare *Average Scores* top level

In [None]:
new_multiindex_tuples = [('Average Scores', col_name) for col_name in tr_eng_dict_3_7.values()]
new_data = pd.DataFrame(
    np.zeros((len(df), len(tr_eng_dict_3_7))),  # Fill with zeros (or your desired values)
    columns=new_multiindex_tuples,
    index=df.index,
    dtype=int  # Force integer data type
)
df = pd.concat([df, new_data], axis=1)
df.head(3)

In [None]:
# Test
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=101110227")
get_content_generic(driver,106510077 ,"1210a",imp_wait_time=10,start_row = 2,tr_eng_dict=tr_eng_dict_3_7)

### 3.8 Countrywide number of preferences according to preference order (Ülke Genelinde Tercih Edilme istatistikleri)

In [None]:
# Test cell for countrywide number of preferences (which has 2 tables)
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=101110227")
tr_eng_dict_3_8_1={'Toplamda Tercih Eden Aday Sayısı':"Total Number of Preferred Candidates",  'Bir Kontenjana Talip Olan Aday Sayısı':"Number of Candidates Applying for a Quota",
                   'Ortalama Tercih Edilme Sırası':'Average Preference Rank','Birinci Sırada Tercih Eden Sayısı':"Number of First-Rank Preferences",
                   'İlk Üç Sırada Tercih Eden Sayısı':'Number of Candidates Preferred in the First Three Ranks',
                   'İlk Dokuz Sırada Tercih Eden Sayısı':'Number of Candidates Preferred in the First Nine Ranks'}
tr_eng_dict_3_8_2={'10 ve Sonrası':'10 and After'}

### Prepare *Average Scores* top level

In [None]:
new_multiindex_tuples = [('Preferences', col_name) for col_name in list(tr_eng_dict_3_8_1.values())  +  list(tr_eng_dict_3_8_2.values())  ]
new_data = pd.DataFrame(
    np.zeros((len(df), len(new_multiindex_tuples))  ),  # Fill with zeros (or your desired values)
    columns=new_multiindex_tuples,
    index=df.index,
    dtype=int  # Force integer data type
)
df = pd.concat([df, new_data], axis=1)
df.head(3)

In [None]:
#Test
print("Table 1 content:",get_content_generic(driver,106510077 ,"1080",imp_wait_time=10,table_index=0,start_row=0,tr_eng_dict=tr_eng_dict_3_8_1))
print("Table 2 content",get_content_generic(driver,106510077 ,"1080",imp_wait_time=10,table_index=1,start_row=0,tr_eng_dict=tr_eng_dict_3_8_2))

### 3.9 Number of preference order of the placed students (Yerleşenler Ortalama Kaçıncı Tercihlerine Yerleşti?)

In [None]:
# Test cell for countrywide number of preferences (which has 3 tables)
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=101110227")
tr_eng_dict_3_9={'Birinci Tercih Olarak Yerleşen Sayısı': "Number of Placed in First Preference", 'İlk Üç Tercih Olarak Yerleşen Sayısı': "Number of Placed in First Three Preferences", 
                 'İlk On Tercih Olarak Yerleşen Sayısı': "Number of Placed in First Ten Preferences",  'Yerleşenler Ortalama Kaçıncı Tercihinde Yerleşti': "Average Preference Order"}

### Prepare *Preferences of Placed Students* top level

In [None]:
col_names =  list(tr_eng_dict_3_9.values())+list(range(1,25))
new_multiindex_tuples = [('Preferences of Placed Students', col_name) for col_name in col_names ]
new_data = pd.DataFrame(
    np.zeros((len(df), len(col_names)) ),  # Fill with zeros (or your desired values)
    columns=new_multiindex_tuples,
    index=df.index,
    dtype=int  # Force integer data type
)
df = pd.concat([df, new_data], axis=1)
df.head(3)

In [None]:
#Test
print("Table 1 content:",get_content_generic(driver,106510077 ,"1040",imp_wait_time=10,table_index=0,tr_eng_dict=tr_eng_dict_3_9 ))
print("Table 2 content:",get_content_generic(driver,106510077 ,"1040",imp_wait_time=10,table_index=1,start_row=0 ))
print("Table 3 content:",get_content_generic(driver,106510077 ,"1040",imp_wait_time=10,table_index=2,start_row=0 ))

### 3.10 Preference Tendencies of the Places Students - Universities  # Yerleşenlerin Tercih Eğilimleri - Üniversiteler

In [None]:
# Test cell for Preference Tendencies of The Places Students (which has 2 tables)
driver.get("https://yokatlas.yok.gov.tr/lisans.php?y=106510077")
print("Table 1 content:",get_content_generic(driver,106510077 ,"1320",imp_wait_time=10,table_index=0 ))
print("Table 2 content:",get_content_generic(driver,106510077 ,"1320",imp_wait_time=10,table_index=1,start_row=0 ))

In [None]:
### Prepare *Preferences of Placed Students* top level

### 3.11 Preference Tendencies of The Places Students - Provinces (Yerleşenlerin Tercih Eğilimleri - İller)

In [None]:
df.head()

# 4-COMBINE ALL STEPS USING HELPER FUNCTIONS

In [None]:
def add_values_to_df(df,dep_code, level_1_column,extracted_data):
    print(extracted_data)
    for col, val in extracted_data.items():
        #if col in df.columns:
            df.loc[dep_code,(level_1_column,col)] = val
def fill_features(driver, df, dep_code, sleep_time=2, base_dep_url="https://yokatlas.yok.gov.tr/lisans.php?y="):    
    driver.implicitly_wait(imp_wait_time)
    driver.get(base_dep_url + str(dep_code) )
    #  time.sleep(sleep_time)  
    driver.implicitly_wait(imp_wait_time)
    close_pop_ups(driver,imp_wait_time)   # close pop-ups
    #extracted_data = get_general_info(driver,dep_code) 
   # print(extracted_data)
    general_info = get_general_info(driver,dep_code,"1000_1",imp_wait_time)

    if general_info["placements"] != 0 :
        add_values_to_df(df,dep_code, "General",  general_info)                                                                # section 3.1 
        add_values_to_df(df,dep_code, "Gender", get_content_generic(driver,dep_code ,div_id= "1010",imp_wait_time=imp_wait_time,start_row=0,tr_eng_dict=tr_eng_dict_3_2) ) # section 3.2 Gender
        add_values_to_df(df,dep_code, "Regions", get_geographical_regions(driver,dep_code ,div_id= "1020ab",imp_wait_time=15)   )                                           # section 3.3 Regions
        add_values_to_df(df,dep_code, "Provinces", get_content_generic(driver,dep_code ,"1020c",imp_wait_time=imp_wait_time))                                               # section 3.4 Provinces
        add_values_to_df(df,dep_code, "Education Status", get_content_generic(driver,dep_code ,"1030a",imp_wait_time=10,tr_eng_dict=tr_eng_dict_3_5) )                      # section 3.5 Education Status
        add_values_to_df(df,dep_code, "Graduation Years",get_content_generic(driver,dep_code ,"1030b",imp_wait_time=10,tr_eng_dict=tr_eng_dict_3_6) )                                                   # section 3.6 Graduation Years
        add_values_to_df(df,dep_code, "Average Scores",get_content_generic(driver,dep_code ,"1210a",imp_wait_time=10,start_row = 2,tr_eng_dict=tr_eng_dict_3_7) )           # section 3.7    Average scores per course
        add_values_to_df(df,dep_code, "Preferences",get_content_generic(driver,dep_code ,"1080",imp_wait_time=10,table_index=0,start_row=0,tr_eng_dict=tr_eng_dict_3_8_1))  # section 3.8.1
        add_values_to_df(df,dep_code, "Preferences",get_content_generic(driver,dep_code ,"1080",imp_wait_time=10,table_index=1,start_row=0,tr_eng_dict=tr_eng_dict_3_8_2))  # section 3.8.2
        add_values_to_df(df,dep_code, "Preferences of Placed Students",  get_content_generic(driver,dep_code ,"1040",imp_wait_time=10,table_index=0,tr_eng_dict=tr_eng_dict_3_9 )) # section 3.9.1 preferences  of the placed students      
        add_values_to_df(df,dep_code, "Preferences of Placed Students",get_content_generic(driver,dep_code ,"1040",imp_wait_time=10,table_index=1,start_row=0 ) )   # section 3.9.2 preferences  of the placed students 
        add_values_to_df(df,dep_code, "Preferences of Placed Students",get_content_generic(driver,dep_code ,"1040",imp_wait_time=10,table_index=2,start_row=0 ) )   # section 3.9.3 preferences  of the placed students 


    driver.implicitly_wait(imp_wait_time)

In [None]:
df = df.sort_index(axis=1, level=[0,1], ascending=True)

In [None]:
df.head(12)

In [None]:
from IPython.display import clear_output
#c=1
for dep_code in tqdm(df.index):  
    if dep_code not in [102910313 ]:
        print(f"Processing department code: {dep_code}")
        fill_features(driver, df, dep_code)
        clear_output(wait=True)  # clears previous cell output
     
    #    if c==1:
     #       break


In [27]:
df = pd.read_pickle('data/preprocessed/high_edu.pkl')
df = df["General"]
df = df[df["quota"] != 0]  # Drop departments with quotas 0(due to the existence in 2025 OSYM catalog)
#  exclude distant education
df = df[(df["scholarship"] != "AÖ-Ücretli") & (df["scholarship"] != "UÖ-Ücretli")]  # exclude distant education
df.loc[:, "uni_type"] = df.loc[:, "uni_type"].map({'Devlet': 'State', 'Vakıf': 'Foundation'})
scholarship_map = {'Ücretsiz': 'Tuition-Free', '%50 İndirimli': '50% Discounted', 'Burslu': 'Full Scholarship',
                   'Ücretli': 'Paid', '%25 İndirimli': '25% Discounted'}
df.rename(columns={"entrance_score_type": "Score Type", "city": "province"}, inplace=True)
df.loc[:, 'scholarship'] = df.loc[:, 'scholarship'].map(scholarship_map)

df = df.loc[:, ["uni_type",  "Score Type", "province", "uni_name", "dep_name", "scholarship",  "quota", "placements", "not_registered"]]
#df_quota = df.groupby([  "uni_type",  "Score Type","province", "uni_name","dep_name","scholarship"]).sum()

#df_quota = df_quota.sort_index(key=lambda x: pd.Index([locale.strxfrm(e) for e in x]))
#df_quota.head()
df.head()

Unnamed: 0_level_0,uni_type,Score Type,province,uni_name,dep_name,scholarship,quota,placements,not_registered
dep_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
110410307,State,SAY,Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİV...,Bilgi Güvenliği Teknolojisi,Tuition-Free,40,40,0
110410244,State,SAY,Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİV...,Bilgisayar Mühendisliği,Tuition-Free,96,96,0
110410314,State,SAY,Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİV...,Bilişim Sistemleri ve Teknolojileri,Tuition-Free,40,40,1
110410019,State,SAY,Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİV...,Elektrik-Elektronik Mühendisliği,Tuition-Free,96,96,1
110410037,State,SAY,Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİV...,Endüstri Mühendisliği,Tuition-Free,102,102,0


In [16]:
df_quota = df_quota.groupby(["city", "uni_name","uni_type", "dep_name"]).sum()
df_quota

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,quota,placements,not_registered
city,uni_name,uni_type,dep_name,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Bilgi Güvenliği Teknolojisi,40,40,0
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Bilgisayar Mühendisliği,96,96,0
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Bilişim Sistemleri ve Teknolojileri,40,40,1
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Elektrik-Elektronik Mühendisliği,96,96,1
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Endüstri Mühendisliği,102,102,0
...,...,...,...,...,...,...
Şırnak,ŞIRNAK ÜNİVERSİTESİ,State,İktisat,54,54,9
Şırnak,ŞIRNAK ÜNİVERSİTESİ,State,İlahiyat,64,64,5
Şırnak,ŞIRNAK ÜNİVERSİTESİ,State,İlahiyat (M.T.O.K.),7,7,0
Şırnak,ŞIRNAK ÜNİVERSİTESİ,State,İnşaat Mühendisliği,21,1,0


In [14]:
df_quota.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,quota,placements,not_registered
city,uni_name,uni_type,dep_name,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Bilgi Güvenliği Teknolojisi,40,40,0
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Bilgisayar Mühendisliği,96,96,0
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Bilişim Sistemleri ve Teknolojileri,40,40,1
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Elektrik-Elektronik Mühendisliği,96,96,1
Adana,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİVERSİTESİ,State,Endüstri Mühendisliği,102,102,0


In [60]:
df_years_universities = pd.read_csv("data/preprocessed/higher-education/df_years_universities.csv",
                                        usecols=["city", "uni_name", "foundation_year", "type", "region","year"])
df_years_universities= df_years_universities[df_years_universities["year"]==2024]
df_years_universities.drop("year",axis=1,inplace=True)
df_years_universities

Unnamed: 0,uni_name,type,city,region,foundation_year
0,ABDULLAH GÜL ÜNİVERSİTESİ,state,Kayseri,Central Anatolia,2010
1,ACIBADEM MEHMET ALİ AYDINLAR ÜNİVERSİTESİ,foundation,İstanbul,Marmara,2007
2,ADANA ALPARSLAN TÜRKEŞ BİLİM VE TEKNOLOJİ ÜNİV...,state,Adana,Mediterranean,2011
3,ADIYAMAN ÜNİVERSİTESİ,state,Adıyaman,Southeastern Anatolia,2006
4,AFYON KOCATEPE ÜNİVERSİTESİ,state,Afyonkarahisar,Aegean,1992
...,...,...,...,...,...
203,YEDİTEPE ÜNİVERSİTESİ,foundation,İstanbul,Marmara,1996
204,YILDIZ TEKNİK ÜNİVERSİTESİ,state,İstanbul,Marmara,1982
205,YOZGAT BOZOK ÜNİVERSİTESİ,state,Yozgat,Central Anatolia,2006
206,YÜKSEK İHTİSAS ÜNİVERSİTESİ,foundation,Ankara,Central Anatolia,2013


In [55]:
df_years_universities.loc[df_years_universities["type"]=="foundation","city"].unique()


array(['İstanbul', 'Antalya', 'Ankara', 'Trabzon', 'Mersin', 'Gaziantep',
       'İzmir', 'Nevşehir', 'Kocaeli', 'Konya', 'Bursa', 'Kayseri',
       'Samsun', 'Adana', 'Diyarbakır'], dtype=object)

In [36]:
import pandas as pd
#!pip install googletrans==3.1.0a0
df = pd.read_pickle('data/preprocessed/high_edu.pkl')
from googletrans import Translator
# Initialize translator
translator = Translator()
# Function to translate text
def translate_text(text):
    try:
        translation = translator.translate(text, src='tr', dest='en')
        return translation.text
    except:
        return text  # Return original text if translation fails

# Apply translation to the column
df.loc[:,('General',"dep_name")] = df.loc[:,('General',"dep_name")].apply(translate_text)
print(df.head())

                   Average Scores                            \
          AYT Biology (out of 13) AYT Chemistry (out of 13)   
dep_code                                                      
110410307                     4.6                       2.0   
110410244                     7.9                       6.5   
110410314                     3.9                       3.1   
110410019                     6.2                       4.7   
110410037                     5.4                       4.2   

                                                        \
          AYT Math (out of 40) AYT Physics (out of 14)   
dep_code                                                 
110410307                  9.6                     4.4   
110410244                 18.9                     8.4   
110410314                  8.3                     3.8   
110410019                 14.6                     6.6   
110410037                 13.1                     5.4   

                              

In [37]:
df.head()  

Unnamed: 0_level_0,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Education Status,...,Provinces,Provinces,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Graduation Years
Unnamed: 0_level_1,AYT Biology (out of 13),AYT Chemistry (out of 13),AYT Math (out of 40),AYT Physics (out of 14),Foreign Language (out of 80),TYT Math (out of 40),TYT Science (out of 20),TYT Social Sciences (out of 20),TYT Turkish (out of 40),Graduate-Not Placed Previously,...,Belirsiz,Toplam,Turkish Language and Literature (ouf of 24),Geography-1 (ouf of 6),History-1 (ouf of 10),Geography-1 (ouf of 11),AYT Tarih-2 (11 soruda),AYT Felsefe Grubu (12 soruda),Religious Culture and Moral Knowledge (ouf of 6),2027
dep_code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
110410307,4.6,2.0,9.6,4.4,0,16.9,7.3,10.8,24.5,25,...,,,,,,,,,,
110410244,7.9,6.5,18.9,8.4,0,26.9,13.2,12.5,29.2,58,...,,,,,,,,,,
110410314,3.9,3.1,8.3,3.8,0,15.2,7.1,10.5,25.9,34,...,,,,,,,,,,
110410019,6.2,4.7,14.6,6.6,0,22.5,10.4,12.4,28.0,64,...,,,,,,,,,,
110410037,5.4,4.2,13.1,5.4,0,21.2,9.6,11.2,27.4,67,...,,,,,,,,,,


In [22]:
df = df[(df["General"]["scholarship"] != "AÖ-Ücretli") & (df["General"]["scholarship"] != "UÖ-Ücretli")] 
df["General"]["scholarship"].unique()
df.shape    #  exclude distant education

(10153, 279)

In [8]:
vals=df["General"]["scholarship"].unique()
type(vals[1]),vals[1]

(NoneType, None)

In [12]:
# These should work for both None and NaN
result = df[df[("General", "scholarship")].isna()]
print(f"Rows with null: {result.shape[0]}")

Rows with null: 1128


In [13]:
result

Unnamed: 0_level_0,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Education Status,...,Provinces,Provinces,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Average Scores,Graduation Years
Unnamed: 0_level_1,AYT Biology (out of 13),AYT Chemistry (out of 13),AYT Math (out of 40),AYT Physics (out of 14),Foreign Language (out of 80),TYT Math (out of 40),TYT Science (out of 20),TYT Social Sciences (out of 20),TYT Turkish (out of 40),Graduate-Not Placed Previously,...,Belirsiz,Toplam,Turkish Language and Literature (ouf of 24),Geography-1 (ouf of 6),History-1 (ouf of 10),Geography-1 (ouf of 11),AYT Tarih-2 (11 soruda),AYT Felsefe Grubu (12 soruda),Religious Culture and Moral Knowledge (ouf of 6),2027
dep_code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
110410426,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
110410412,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
110410447,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
110410454,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
110410461,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110390370,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
110390377,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
110390384,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
110390398,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,...,,,,,,,,,,
