# Cleaning Education Data

## Libraries & Utilities

In [1]:
import re
import warnings
import numpy as np
import pandas as pd
from tqdm import tqdm
from utils import *
from deep_translator import GoogleTranslator
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords
warnings.filterwarnings('ignore')

translator = GoogleTranslator(source='auto', target='en')
path = '../../../datasets/garanti-bbva-data-camp/education.csv'
output_path = '../../../datasets/garanti-bbva-data-camp/clean_education.csv'

## Education

In [2]:
df = pd.read_csv(path)
print(f'education data shape: {df.shape}')
print(f'degree classes: {df["degree"].nunique()}')
print(f'school_name classes: {df["school_name"].nunique()}')
print(f'fields_of_study classes: {df["fields_of_study"].nunique()}')

education data shape: (142575, 6)
degree classes: 5888
school_name classes: 11113
fields_of_study classes: 11048


In [3]:
df = df.drop(columns = ['start_year_month', 'end_year_month'], axis = 1)
for col in ['school_name', 'degree', 'fields_of_study']:
    df.loc[df[col].notnull(), col] = df.loc[df[col].notnull(), col].apply(lambda x: x.strip())

for i in df['degree'].dropna().unique():
    if re.sub(r'[^\w\s]', '', re.sub(r'\d+', '', i)).strip() == '':
        df.loc[df['degree'] == i, 'degree'] = np.nan

df.loc[df['degree'] == 'lise', 'degree'] = 'High School'
df.loc[df['degree'] == 'Lise', 'degree'] = 'High School'
df.loc[df['degree'] == 'Lisans', 'degree'] = "Bachelor's degree"
df.loc[df['degree'] == 'Lisans Derecesi', 'degree'] = "Bachelor's degree"
df.loc[df['degree'] == 'Ön Lisans', 'degree'] = "Associate's degree"
df.loc[df['degree'] == 'Yüksek Lisans', 'degree'] = "Master's degree"

df.loc[df['degree'].astype(str).str.contains('highschool', case=False), 'degree'] = 'High School'
df.loc[df['degree'].astype(str).str.contains('high[\W\s]school', case=False, regex=True), 'degree'] = 'High School'
df.loc[(df["school_name"].str.contains("Lise", case=False)) & (df["degree"].isnull()), 'degree'] = "High School"
df.loc[(df["school_name"].str.contains("high[\W\s]school", case=False, regex=True)) & (df["degree"].isnull()), 'degree'] = "High School"

df.loc[df['degree'].astype(str).str.contains("Associate's degree", case = False), 'degree'] = "Associate's degree"
df.loc[df['degree'].astype(str).str.contains("Associates degree", case = False), 'degree'] = "Associate's degree"
df.loc[df['degree'].astype(str).str.contains("Associate degree", case = False), 'degree'] = "Associate's degree"
df.loc[df['degree'].astype(str).str.contains("Ön[\W\s]Lisans", case = False, regex=True), 'degree'] = "Associate's degree"
df.loc[df['degree'].astype(str).str.contains("On[\W\s]Lisans", case = False, regex=True), 'degree'] = "Associate's degree"
df.loc[df['degree'].astype(str).str.contains("ÖnLisans", case = False), 'degree'] = "Associate's degree"
df.loc[df['degree'].astype(str).str.contains("OnLisans", case = False), 'degree'] = "Associate's degree"
df.loc[df['degree'].isin(['ön lisana', 'Ön Lisanas', 'Ön lisanas', 'Ön Lisan', "Associate’s Degree", "Associate", 'Associate of Science - AS',
'Associate of Science (A.S.)', 'Associate of Arts - AA', 'Associate of Arts and Sciences (A.A.S.)', 'Associate of Science (AS)', 'Associate of Arts and Sciences (AAS)', 'Vocational School of Higher Education Associate Diploma', 'Cisco Certified Network Associate', 'Associate of Arts (AA)', "Associate's", 'Associate of Law', 'Associate of Science', 'Associate of Arts and Sciences - AAS', 'Associate of Computer Programming', 'Associated', 'Associate\tDegree', 'Associate of Arts (A.A.)', 'Open-University (Associate)', 'Associate of Applied Science (ASS)', 'A.Sc.']), 'degree'] = "Associate's degree"

df.loc[df['degree'].astype(str).str.contains("Master's degree", case = False), 'degree'] = "Master's degree"
df.loc[df['degree'].astype(str).str.contains("Masters degree", case = False), 'degree'] = "Master's degree"
df.loc[df['degree'].astype(str).str.contains("Yuksek[\W\s]Lisans", case = False, regex=True), 'degree'] = "Master's degree"
df.loc[df['degree'].astype(str).str.contains("Yüksek[\W\s]Lisans", case = False, regex=True), 'degree'] = "Master's degree"
df.loc[df['degree'].astype(str).str.contains("YüksekLisans", case = False), 'degree'] = "Master's degree"
df.loc[df['degree'].astype(str).str.contains("YuksekLisans", case = False), 'degree'] = "Master's degree"
df.loc[df['degree'].isin(["Yuksel lisans", "Yüksel Lisans", "Y.Lisans", "Master’s Degree", "Master Degree", "Master degree", "Master’s Degree Student", "Masters", "Master", 'Master’s Degree (MSc)', 'MASTER’s DEGREE (MS)', 'Master’s degree', 'Master’s Degree Program with Thesis', 'Computer Engineering Master`s Degree', 'Master’s Degree(Board and Trusties Scholarship)', 'Master’s Degree with thesis', "Bachelor's or Master s Degree", 'Master’s degree (Dipl.-Ing.)', 'Master’s Degree, Dropout', 'Master’s Degree (MSc)']), 'degree'] = "Master's degree"

df.loc[df['degree'].astype(str).str.contains("Master of Science", case = False), 'degree'] = 'Master of Science'
df.loc[df['degree'].isin(["Master Science", 'MS', "M.S.", 'M. Sc.', 'M.S', 'M. S.', 'M.s.', "Master of Sc. Eng.", "Msc", "M.Sc", "MSc", "M.Sc.", 'M.Sc., Computer Engineering', 'M. Sc. Degree', 'M.Sci.', 'M.Sc. E.E.', "2015' M.Sc", 'M Sc', 'MS; M.SC Information Technology', 'M.Sc. Finance', 'Graduate, M.Sc.', 'M.Sc., Mechanical Engineering', 'M.Sc. with thesis', 'MSc.', 'MSc. (Dipl.- Ing. Elektrotechnik)', 'Computer Graphics MSc.', 'MSc. Mekatronik Mühendisliği', 'International Computer Institute (MSc.)', 'MSc.  Data Engineering', 'M.s. Engineering Management', '3,38 M.S.']), 'degree'] = 'Master of Science'

df.loc[df['degree'].astype(str).str.contains("Master of Business Administration", case = False), 'degree'] = 'Master of Business Administration'
df.loc[df['degree'].astype(str).str.contains("Masters of Business Administration", case = False), 'degree'] = 'Master of Business Administration'
df.loc[df['degree'].isin(['MBA', 'Mba', '(MBA)', "MBA 3.8", "MBA degree", "Master of Business Management - (MBA)"]), 'degree'] = 'Master of Business Administration'

df.loc[df['degree'].astype(str).str.contains("Bachelor of Science", case = False), 'degree'] = 'Bachelor of Science'
df.loc[df['degree'].isin(['BSc', 'BSc.', 'Bsc', 'Bsc.', 'BSc Engineering', 'BSc., Electrical Engineering', 'BsC', 'Chemical Engineer Bsc', '3.22/4, Lisans (BSc)', 'BSc Degree', 'BSc. Electronics&Communication Engineering', 'BSc, Computer Engineering', 'BSC', 'BSCS(HONOURS)', 'B.Sc.', 'B.S.', 'B.S', "Bachelor's of Science", "Bachelor's of Science (BSc)", "Bachelor's of science", "BS", 'B. S.', 'B. Sc.', "B.Sc", 'Bachelor of  Science (B.A.Sc.)','B.Sc. in Computer Engineering', 'B.Sc. EE.', 'B.Sc.E.E.', 'B.Sc. Industrial Engineering', 'B.Sc., Computer Science And Engineering', 'Undergraduate, B.Sc.', 'Undergraduate (B.Sc.) Program', 'B.Sc.Engr.', 'B.Sc. in Mechatronics Engineering', 'Bachelor Science (B.Sc.)', 'Bahelor of Sience (B.Sc.)', 'B.Sc. Computer Engineer', 'B.Sc. Electric & Electronic Engineer', 'B.s., Electric and Electronic Eng.', 'B.S. Degree in Computer Engineering', 'B.S. Cum Laude', 'B.S. (Honor Degree)', 'B.S. Microelectronics Engineering', 'B.s.', "2012' B.Sc", 'B.sc', 'BSc. Department of Electronic', 'Bs.', 'BS.', 'Bs.C', 'Bachelor of Sc. Edu.', 'Bachelor of Sicence']), 'degree'] = 'Bachelor of Science'

df.loc[df['degree'].astype(str).str.contains("Bachelor of Applied Science", case = False), 'degree'] = "Bachelor of Applied Science"
df.loc[df['degree'].astype(str).str.contains("Bachelor of Engineering", case = False), 'degree'] = "Bachelor of Engineering"
df.loc[df['degree'].astype(str).str.contains("Bachelor of Business Administration", case = False), 'degree'] = "Bachelor of Business Administration"
df.loc[df['degree'] == 'Bachelor of Bussines Administration', 'degree'] = "Bachelor of Business Administration"
df.loc[df['degree'] == 'Bachelor of Business', 'degree'] = "Bachelor of Business Administration"

df.loc[df['degree'].astype(str).str.contains("Master of Engineering", case = False), 'degree'] = "Master of Engineering"
df.loc[df['degree'].astype(str).str.contains("Master of Arts", case = False), 'degree'] = "Master of Arts"
df.loc[df['degree'].astype(str).str.contains("Master of Computer Applications", case = False), 'degree'] = "Master of Computer Applications"
df.loc[df['degree'].astype(str).str.contains("Master of Laws", case = False), 'degree'] = "Master of Laws"
df.loc[df['degree'].astype(str).str.contains("Master of Technology", case = False), 'degree'] = "Master of Technology"
df.loc[df['degree'].astype(str).str.contains("Bachelor of Arts", case = False), 'degree'] = "Bachelor of Arts"
df.loc[df['degree'].astype(str).str.contains("Bachelor of Education", case = False), 'degree'] = "Bachelor of Education"
df.loc[df['degree'].astype(str).str.contains("Licentiate", case = False), 'degree'] = "Licentiate degree"

df.loc[df['degree'].astype(str).str.contains("Doctor of Philosophy", case = False), 'degree'] = 'Doctor of Philosophy'
df.loc[df['degree'].isin(['phd', 'Ph.D.', 'Doktora (Dr.)', 'Doktora ( PHD )', 'PhD', 'Phd', 'Honorary Phd.', 'PhD Candidate', 'PHd','PhD.', 'PhD in Economics', 'Doktora (PhD)','Integrated PhD in Economics', 'Phd, Fen Bilimleri Enstitüsü', 'PhD Student','DOCTORATE DEGREE (PhD)','Phd Student (Coursework Only and Some Research)','PhD. in Analytical Chemistry', 'Phd Student','Phd. Computer Engineering','Phd Candidate','Phd Taught Courses', 'pHD', 'PhD (Incomplete)','Integrated PhD','PhD candidate', 'PhD / Drop out','Combined Master and PhD', 'Doctor (PhD)', 'PhD (Dr.)','Doktora (PhD), Elektronik ve Haberleşme Mühendisliği, Elektronik','Doktora/PhD', 'PhD in Computer Engineering 3.86/4.00', 'Phd.','Doctor of Computer Engineering - PhD', 'Masters, PhD','PhD.  in Computer Science and Engineering', 'Erasmus+ Student PhD','PhD (not completed)', 'PhD, Department of Computer Engineering','Phd Study (All But Thesis)', 'PhD Electrical and Electronic Engineering', 'ph.D.', 'Ph.D', 'Ph. D.', 'Ph. D', 'PH.D.', 'Integrated Ph.D.', 'Ph. D. -  Left at 4th semester', 'Visiting Ph.D. Researcher', 'Doctorate (Ph.D)', 'Combination of Master & Ph.D']), 'degree'] = 'Doctor of Philosophy'

df.loc[df['degree'].astype(str).str.contains("Bachelor's degree", case = False), 'degree'] = "Bachelor's degree"
df.loc[df['degree'].astype(str).str.contains("Bachelors degree", case = False), 'degree'] = "Bachelor's degree"
df.loc[df['degree'].astype(str).str.contains("Bachelor degree", case = False), 'degree'] = "Bachelor's degree"
df.loc[df['degree'].isin(['Bachelor degre', 'Baschelor Degree', "Bachelor’s Degree", "Bachelor", "lisans", 'LİSANS', 'Lisans Açık Öğretim', 'Lİsans', 'Lisans Öğrencisi', 'Lisans Derecesi (2.90/4.00 - 4.00/4.00)', 'Lisans ( Açık Öğretim)', 'Lisans, İkinci Anadal', 'Lisans, Anadal', 'Lisans Mezunu', 'lisansn', 'Farabi Exchange Student (Lisans)', '3.62/4.00 Lisans', '3.16/4 Lisans Derecesi', '2.72/4 Çift Anadal / Lisans Derecesi', 'Lisans derecesi', 'Lisans Derecesi Mezunu', '4.sınıf (Lisans)', 'Bilgisayar Mühendiliği Öğrenci(Lisans)', 'Lisans Derecesi(BSc)', 'Lisans, Çift Anadal', 'Lisans, Erasmus', 'Lisans Derecesi, Çift Anadal', 'LISANS', 'Lisans Derecesi, İngilizce', 'Lisans Derecesi 3.35\\4.00', '3.35 Lisans Derecesi', 'Lisans (Bs)', 'Lisans Tamamlama', 'Açıköğretim Lisans', 'Lisans Derecesi(Erasmus)', 'Lisans  3,45/4', 'Teknik Eğitim Fakültesi - Lisans', 'Lisans Derecesi,Hazırlık(İng)', 'Lisans Derecesi (terk) / Undergraduate (dropped)', 'Lisans / Undergraduate', 'Lisans Derecesi , 3.49', 'Üniversite Lisans', 'Lisans Derecesi, 3.45', 'Lisans Derecesi 3.3/4', 'Lisans Derecesi ( Mühendislik Fakültesi )', 'Çift Anadal Lisans Derecesi', '3.52 - Lisans Derecesi', 'lisans Öğrecisi', '2.91/4-Lisans Derecesi', 'Lisans Derecesi Fakülte Birinciliği', 'Lisans Derecesi %25 Burslu', 'Lisans mezunu', 'YG, Lisans Derecesi', 'Lisans Derece', 'Lisans Derecesi 2.83/4', 'Lisans Derecesi - 3,32 / 4', 'Lisans  3,14/4', 'Lisans Derecesi  GPA : 3.36/4', 'Lisans Derecesi- Dokuz Eylül Üniversitesi', 'Lisans Derecesi (Bachelor)', 'Lisans Derecesi (Bölüm Birincisi)', 'Lisans Derecesi, 3.57/4.00', 'Lisans Derecesi / License Degree', 'Lisans Derecesi:2.54', 'Lisans Derecesi: (TERK)', 'Matematik Lisans', 'Lisans(Terk)', 'Lisans Derecesi(4.)', '3.30 Lisans Derecesi', '3,21 - Lisans Derecesi', 'Lisans Eğitimine Devam Ediyor', 'Lisans Derecesi-  3.76/4', 'Lisans, İktisadi ve idari bilimler fakültesi', 'Lisans(terk)', 'Lisans (Açıköğretim)', 'Yandal (lisans)', 'Lisans öğrencisi', 'Açıköğretim lisans', 'İkinci Üniversite (Lisans)', 'Mühendislik Fakültesi Lisans', 'Açıköğretim Fakültesi - Lisans', 'Lisans Derecesi (3.36)', 'lisans derecesi', 'Lisans Derecesi (Erasmus)', 'Lisans Derecesi: 2.90', 'Lisans Derecesi - Erasmus', '3.48/4.00, Lisans', 'Bachelors / Lisans Derecesi', 'Lisans / Sertifika', 'Bilgisayar Mühendisliği Lisans Derecesi', 'Lisans Derecesi, 3.32', 'Lisans Derecesi, Bölüm Birincisi', 'Lisans (Diploma: 3.01)', 'Lisans, Açıköğretim', 'Lisans Derecesi (Bachelor’s Degree)', 'Lisans Derecesi (2,68/4)', 'Lisans (Erasmus)', 'Mühendislik Fakültesi / Lisans', "Bachelors", "Bachelor's", "License", "Licence", 'B.S. degree', 'B. S. Degree', '4. Sınıf',
'4.Sınıf','4.sınıf','3.Sınıf','3. Sınıf', '4. sınıf', '3.sınıf', '3. sınıf', '4.SINIF', 'Bachelor´s degree.', 'BACHELOR’s DEGREE (BS)', "bachelor's", 'Bachelor´s degree.', "Bachelor's .Degree", "Bachelor’s Degree with honor", 'Bachelor´s degree.', 'BACHELOR’s DEGREE (BS)', 'Bachelor’s Degree (Drop Out)', 'BACHELOR’S DEGREE', '3.02/4, Bachelor’s Degree', 'Bachelor’s Degree - Erasmus+', 'Bachelor’s Degree %100 English', 'Bachelor S degree', 'Bachelor`s Degree', 'Bachelor’s Degree in Computer Engineering', 'Bachelor’s degree', 'Bachelor’s Degree, 2.71', 'Bachelor’s Degree (Transfer)', 'Bachelor’s Degree, Electrical Engineering, GPA: 3.02 /4.00', 'Bachelor’s Degrees', 'Bachelor’s Degree (with Honors)', 'bachelor']), 'degree'] = "Bachelor's degree"

df.loc[df['degree'].str.contains('engineer[\W\s]s degree', case = False, na=False, regex=True), 'degree'] = "Engineer's degree"
df.loc[df['degree'].str.contains('bachelor of law', case = False, na=False), 'degree'] = 'Bachelor of Laws'
df.loc[df['degree'].str.contains('bachelor of fine arts', case = False, na=False), 'degree'] = 'Bachelor of Fine Arts'

In [4]:
degree_keywords = ['lisans', 'ön', 'yüksek', 'bölüm', 'fakülte', 'üniversite', 'sertifika', 'derece', 'doktor', 'mezun', 'öğre', 'lise', 'mühendis', 'sınıf']

degree_translated = dict()
for kw in degree_keywords:
    print(f'keyword: {kw}')
    for i in tqdm(df.loc[df['degree'].astype(str).str.contains(kw, case = False), 'degree'].unique()):
        if i not in degree_translated.keys():
            try:
                degree_translated[i] = translator.translate(i)
            except:
                degree_translated[i] = i

for key in degree_translated.keys():
    df.loc[df['degree'] == key, 'degree'] = degree_translated[key]

df.loc[df['degree'].notnull(), 'degree'] = df.loc[df['degree'].notnull(), 'degree'].apply(lambda x: x.lower())
df.loc[df['degree'].notnull(), 'degree'] = df.loc[df['degree'].notnull(), 'degree'].apply(lambda x: translation(x))

print(f'education data shape: {df.shape}')
print(f'degree classes: {df["degree"].nunique()}')
print(f'school_name classes: {df["school_name"].nunique()}')
print(f'fields_of_study classes: {df["fields_of_study"].nunique()}')
#df.to_csv('../../../datasets/garanti-bbva-data-camp/clean_education_v2.csv', index = False)

keyword: lisans


100%|██████████| 1/1 [00:00<00:00,  2.56it/s]


keyword: ön


100%|██████████| 24/24 [00:06<00:00,  3.58it/s]


keyword: yüksek


100%|██████████| 103/103 [00:25<00:00,  3.96it/s]


keyword: bölüm


100%|██████████| 88/88 [00:22<00:00,  3.94it/s]


keyword: fakülte


100%|██████████| 264/264 [01:09<00:00,  3.81it/s]


keyword: üniversite


100%|██████████| 41/41 [00:09<00:00,  4.52it/s]


keyword: sertifika


100%|██████████| 10/10 [00:02<00:00,  3.78it/s]


keyword: derece


100%|██████████| 2/2 [00:00<00:00,  6.94it/s]


keyword: doktor


100%|██████████| 4/4 [00:01<00:00,  3.89it/s]


keyword: mezun


100%|██████████| 219/219 [00:33<00:00,  6.60it/s]


keyword: öğre


100%|██████████| 85/85 [00:15<00:00,  5.55it/s]


keyword: lise


100%|██████████| 95/95 [00:13<00:00,  7.01it/s]


keyword: mühendis


100%|██████████| 178/178 [00:19<00:00,  9.10it/s]


keyword: sınıf


100%|██████████| 31/31 [00:05<00:00,  5.54it/s]


education data shape: (142575, 4)
degree classes: 2833
school_name classes: 10967
fields_of_study classes: 10653


In [5]:
#degree_translated = dict()
#for i in tqdm(df['degree'].dropna().unique()):
#    degree_translated[i] = translator.translate(i)
#for key in degree_translated.keys():
#    df.loc[df['degree'] == key, 'degree'] = degree_translated[key]
#df.loc[df['degree'].notnull(), 'degree'] = df.loc[df['degree'].notnull(), 'degree'].apply(lambda x: x.lower())
#    
#print(f'education data shape: {df.shape}')
#print(f'degree classes: {df["degree"].nunique()}')
#print(f'school_name classes: {df["school_name"].nunique()}')
#print(f'fields_of_study classes: {df["fields_of_study"].nunique()}')
#df.to_csv(output_path, index = False)
#df.head()

## School Names

In [6]:
#df = pd.read_csv(output_path)
#print(f'education data shape: {df.shape}')
#print(f'degree classes: {df["degree"].nunique()}')
#print(f'school_name classes: {df["school_name"].nunique()}')
#print(f'fields_of_study classes: {df["fields_of_study"].nunique()}')
#df.head()

In [7]:
df.loc[df['school_name'].astype(str).str.contains("Anadolu Üni", case = False), 'school_name'] = 'Anadolu University'
df.loc[df['school_name'].astype(str).str.contains("Anadolu Uni", case = False), 'school_name'] = 'Anadolu University'

df.loc[df['school_name'].astype(str).str.contains("Yıldız Te", case = False), 'school_name'] = "Yildiz Technical University"
df.loc[df['school_name'].astype(str).str.contains("Yildiz Te", case = False), 'school_name'] = "Yildiz Technical University"
df.loc[df['school_name'].isin(['YTU', 'YTÜ PEM', 'YTÜ mat.', 'YTÜ - Chemical Engineer']), 'school_name'] = "Yildiz Technical University"

df.loc[df['school_name'].astype(str).str.contains("İstanbul Uni", case = False), 'school_name'] = "Istanbul University"
df.loc[df['school_name'].astype(str).str.contains("İstanbul Üni", case = False), 'school_name'] = "Istanbul University"

df.loc[df['school_name'].astype(str).str.contains("Sakarya Üni", case = False), 'school_name'] = "Sakarya University"
df.loc[df['school_name'].astype(str).str.contains("Sakarya Uni", case = False), 'school_name'] = "Sakarya University"
df.loc[df['school_name'].isin(['University of Sakarya']), 'school_name'] = "Sakarya University"

df.loc[df['school_name'].astype(str).str.contains("Istanbul Te", case = False), 'school_name'] = "Istanbul Technical University"
df.loc[df['school_name'].isin(['ITU','ITU Computer Engeenering','ITU computer engineering','IZMIR INSTITUTE OF TECHNOLOGY','ITU Ekrem Elginkan High School','ITU. S.M.F. DMYO ELEKTRİK','ITU Electronics and Communication Engineering','ITU - Matematik Muhendisligi']), 'school_name'] = "Istanbul Technical University"

df.loc[df['school_name'].astype(str).str.contains("Marmara Uni", case = True), 'school_name'] = "Marmara University"
df.loc[df['school_name'].astype(str).str.contains("Marmara Üni", case = True), 'school_name'] = "Marmara University"

df.loc[df['school_name'].astype(str).str.contains("Kocaeli Uni", case = True), 'school_name'] = "Kocaeli University"
df.loc[df['school_name'].astype(str).str.contains("Kocaeli Üni", case = True), 'school_name'] = "Kocaeli University"

df.loc[df['school_name'].astype(str).str.contains("Hacettepe Uni", case = True), 'school_name'] = "Hacettepe University"
df.loc[df['school_name'].astype(str).str.contains("Hacettepe Üni", case = True), 'school_name'] = "Hacettepe University"

df.loc[df['school_name'].astype(str).str.contains("Ege Uni", case = True), 'school_name'] = "Ege University"
df.loc[df['school_name'].astype(str).str.contains("Ege Üni", case = True), 'school_name'] = "Ege University"

df.loc[df['school_name'].astype(str).str.contains("Osmangazi Uni", case = False), 'school_name'] = "Eskisehir Osmangazi University"
df.loc[df['school_name'].astype(str).str.contains("Osmangazi Üni", case = False), 'school_name'] = "Eskisehir Osmangazi University"

df.loc[(df['school_name'].astype(str).str.contains("Gazi Uni", case = False)) 
& (df['school_name'] != 'Eskisehir Osmangazi University'), 'school_name'] = "Gazi University"
df.loc[(df['school_name'].astype(str).str.contains("Gazi Üni", case = False)) 
& (df['school_name'] != 'Eskisehir Osmangazi University'), 'school_name'] = "Gazi University"

df.loc[df['school_name'].astype(str).str.contains("Bahçeşehir Uni", case = False), 'school_name'] = "Bahcesehir University"
df.loc[df['school_name'].astype(str).str.contains("Bahçeşehir Üni", case = False), 'school_name'] = "Bahcesehir University"
df.loc[df['school_name'].astype(str).str.contains("Bahcesehir Uni", case = False), 'school_name'] = "Bahcesehir University"
df.loc[df['school_name'].astype(str).str.contains("Bahcesehir Üni", case = False), 'school_name'] = "Bahcesehir University"

df.loc[df['school_name'].astype(str).str.contains("Dokuz Eylul Uni", case = False), 'school_name'] = "Dokuz Eylul University"
df.loc[df['school_name'].astype(str).str.contains("Dokuz Eylul Üni", case = False), 'school_name'] = "Dokuz Eylul University"
df.loc[df['school_name'].astype(str).str.contains("Dokuz Eylül Uni", case = False), 'school_name'] = "Dokuz Eylul University"
df.loc[df['school_name'].astype(str).str.contains("Dokuz Eylül Üni", case = False), 'school_name'] = "Dokuz Eylul University"

df.loc[df['school_name'].astype(str).str.contains("Selçuk Uni", case = False), 'school_name'] = "Selcuk University"
df.loc[df['school_name'].astype(str).str.contains("Selçuk Üni", case = False), 'school_name'] = "Selcuk University"
df.loc[df['school_name'].astype(str).str.contains("Selcuk Uni", case = False), 'school_name'] = "Selcuk University"
df.loc[df['school_name'].astype(str).str.contains("Selcuk Üni", case = False), 'school_name'] = "Selcuk University"

df.loc[df['school_name'].astype(str).str.contains("Beykent Uni", case = False), 'school_name'] = "Beykent University"
df.loc[df['school_name'].astype(str).str.contains("Beykent Üni", case = False), 'school_name'] = "Beykent University"

df.loc[df['school_name'].astype(str).str.contains("Ankara Uni", case = False), 'school_name'] = "Ankara University"
df.loc[df['school_name'].astype(str).str.contains("Ankara Üni", case = False), 'school_name'] = "Ankara University"

df.loc[df['school_name'].astype(str).str.contains("Trakya Uni", case = False), 'school_name'] = "Trakya University"
df.loc[df['school_name'].astype(str).str.contains("Trakya Üni", case = False), 'school_name'] = "Trakya University"

df.loc[df['school_name'].astype(str).str.contains("Süleyman Demirel Üni", case = False), 'school_name'] = "Suleyman Demirel University"
df.loc[df['school_name'].astype(str).str.contains("Süleyman Demirel Uni", case = False), 'school_name'] = "Suleyman Demirel University"
df.loc[df['school_name'].astype(str).str.contains("Suleyman Demirel Uni", case = False), 'school_name'] = "Suleyman Demirel University"

df.loc[df['school_name'].astype(str).str.contains("Karadeniz Te", case = False), 'school_name'] = "Karadeniz Technical University"

df.loc[df['school_name'].astype(str).str.contains("Bogaziçi Üni", case = False), 'school_name'] = "Bogazici University"
df.loc[df['school_name'].astype(str).str.contains("Bogazici Uni", case = False), 'school_name'] = "Bogazici University"
df.loc[df['school_name'].astype(str).str.contains("Bogaziçi Uni", case = False), 'school_name'] = "Bogazici University"
df.loc[df['school_name'].astype(str).str.contains("Boğaziçi Uni", case = False), 'school_name'] = "Bogazici University"
df.loc[df['school_name'].astype(str).str.contains("Boğaziçi Üni", case = False), 'school_name'] = "Bogazici University"

df.loc[df['school_name'].astype(str).str.contains("Firat Uni", case = False), 'school_name'] = "Firat University"
df.loc[df['school_name'].astype(str).str.contains("Firat Üni", case = False), 'school_name'] = "Firat University"
df.loc[df['school_name'].astype(str).str.contains("Fırat Uni", case = False), 'school_name'] = "Firat University"
df.loc[df['school_name'].astype(str).str.contains("Fırat Üni", case = False), 'school_name'] = "Firat University"

df.loc[df['school_name'].astype(str).str.contains("Bilkent Uni", case = False), 'school_name'] = "Bilkent University"
df.loc[df['school_name'].astype(str).str.contains("Bilkent Üni", case = False), 'school_name'] = "Bilkent University"

df.loc[df['school_name'].astype(str).str.contains("Erciyes Uni", case = False), 'school_name'] = "Erciyes University"
df.loc[df['school_name'].astype(str).str.contains("Erciyes Üni", case = False), 'school_name'] = "Erciyes University"

df.loc[df['school_name'].astype(str).str.contains("Çukurova Uni", case = False), 'school_name'] = "Cukurova University"
df.loc[df['school_name'].astype(str).str.contains("Çukurova Üni", case = False), 'school_name'] = "Cukurova University"

df.loc[df['school_name'] == 'TOBB Ekonomi ve Teknoloji Üniversitesi', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB Ekonomi ve Teknoloji Universitesi', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB University of Economics &Technology', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB Economy and Technology University', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB Economics and Technology University', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB Ekonomi ve Teknoloji Universitesi', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB University of Economics & Technology', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB ETÜ, Electrical and Electronic Engineering', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB ETU - University of Economics & Technology', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB ETU University of Economics and Technology', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'] == 'TOBB Economics and Technolgy University', 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'].astype(str).str.contains("TOBB ETU", case = False), 'school_name'] = 'TOBB University of Economics and Technology'
df.loc[df['school_name'].astype(str).str.contains("TOBB ETÜ", case = False), 'school_name'] = 'TOBB University of Economics and Technology'

df.loc[df['school_name'].astype(str).str.contains("Gebze Technical", case = False), 'school_name'] = "Gebze Technical University"
df.loc[df['school_name'].astype(str).str.contains("Gebze Teknik", case = False), 'school_name'] = "Gebze Technical University"
df.loc[df['school_name'] == 'Gebze Tecnical University', 'school_name'] = "Gebze Technical University"

df.loc[df['school_name'].astype(str).str.contains("Istanbul Bilgi Uni", case = False), 'school_name'] = "Istanbul Bilgi University"
df.loc[df['school_name'].astype(str).str.contains("Istanbul Bilgi Üni", case = False), 'school_name'] = "Istanbul Bilgi University"

df.loc[df['school_name'].astype(str).str.contains("Yeditepe Ü", case = False), 'school_name'] = "Yeditepe University"
df.loc[df['school_name'].astype(str).str.contains("Yeditepe Uni", case = False), 'school_name'] = "Yeditepe University"

df.loc[df['school_name'].astype(str).str.contains("Orta Dogu", case = False), 'school_name'] = "Middle East Technical University"
df.loc[df['school_name'].astype(str).str.contains("Orta Doğu Teknik Üni", case = False), 'school_name'] = "Middle East Technical University"
df.loc[df['school_name'].astype(str).str.contains("Orta Dogu Teknik Üni", case = False), 'school_name'] =  "Middle East Technical University"
df.loc[df['school_name'].astype(str).str.contains("Middle East Tech", case = False), 'school_name'] =  "Middle East Technical University"
df.loc[df['school_name'].isin(['METU','Middle East Technical University Development Foundation High School (METU Collage)','METU Development Foundation School','METU-Middle East Technical University (Turkey)','METU College','METU Yabancı Diller Fakültesi','ODTÜ (METU)','Middle East Technical University (METU)', "ODTÜ", "Odtü Uzaktan Eğitim", "ODTU"]), 'school_name'] = "Middle East Technical University"

df.loc[df['school_name'].astype(str).str.contains("Doğu Akdeniz", case = False), 'school_name'] = "Eastern Mediterranean University"
df.loc[df['school_name'].astype(str).str.contains("Akdeniz Ün", case = False), 'school_name'] = "Mediterranean University"
df.loc[df['school_name'].astype(str).str.contains("Akdeniz Uni", case = False), 'school_name'] = "Mediterranean University"

df.loc[df['school_name'].astype(str).str.contains("Mugla Sitki Kocman Uni", case = False), 'school_name'] = 'Mugla Sitki Kocman University'
df.loc[df['school_name'].astype(str).str.contains("Muğla Sıtkı Koçman Uni", case = False), 'school_name'] = 'Mugla Sitki Kocman University'
df.loc[df['school_name'].astype(str).str.contains("Muğla Sıtkı Koçman Üni", case = False), 'school_name'] = 'Mugla Sitki Kocman University'

df.loc[df['school_name'].astype(str).str.contains("Koc Uni", case = False), 'school_name'] = "Koc University"
df.loc[df['school_name'].astype(str).str.contains("Koç Üni", case = False), 'school_name'] = "Koc University"
df.loc[df['school_name'].astype(str).str.contains("Koç Uni", case = False), 'school_name'] = "Koc University"

df.loc[df['school_name'].astype(str).str.contains("Sabanci Uni", case = False), 'school_name'] = "Sabanci University"
df.loc[df['school_name'].astype(str).str.contains("Sabancı Üni", case = False), 'school_name'] = "Sabanci University"

df.loc[df['school_name'] == 'Galatasaray Üniversitesi', 'school_name'] = 'Galatasaray University'
df.loc[df['school_name'] == 'GalaGalatasaray Üniversitesi', 'school_name'] = 'Galatasaray University'

df.loc[df['school_name'] == 'Ahmet Yesevi Üniversitesi', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Hoca Ahmet Yesevi Üniversitesi', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'H. Ahmet Yesevi Üniversitesi', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Ahmet Yesevi Üniversitesi ', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == "Ahmet Yesevi Üniversitesi(master's degree)", 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Ahmet Yesevi Üniversitesi Mühendislik Fakültesi', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Ahmet Yesevi Üniversitesi - Uzaktan Eğitim', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Ahmet Yesevi Üniversity', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Ahmet Yesevi Universitesi', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Ahmet Yesevi Universitesi Yuksek Lisans', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Hoca Ahmet Yesevi University', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Ahmet Yesevi Universty', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'] == 'Hoca Ahmet Yesevi Universty', 'school_name'] = 'Ahmet Yesevi University'
df.loc[df['school_name'].astype(str).str.contains("Ahmet Yesevi Ün", case = False), 'school_name'] = 'Ahmet Yesevi University'

df.loc[df['school_name'].astype(str).str.contains("Atatürk Uni", case = False), 'school_name'] = "Ataturk University"
df.loc[df['school_name'].astype(str).str.contains("Atatürk Üni", case = False), 'school_name'] = "Ataturk University"
df.loc[df['school_name'].astype(str).str.contains("Ataturk Uni", case = False), 'school_name'] = "Ataturk University"

df.loc[df['school_name'].astype(str).str.contains("Eskişehir Tech", case = False), 'school_name'] = "Eskisehir Technical University"
df.loc[df['school_name'].astype(str).str.contains("Eskişehir Tek", case = False), 'school_name'] = "Eskisehir Technical University"
df.loc[df['school_name'].astype(str).str.contains("Eskisehir Tech", case = False), 'school_name'] = "Eskisehir Technical University"

df.loc[df['school_name'].astype(str).str.contains("Başkent Üni", case = False), 'school_name'] = "Baskent University"
df.loc[df['school_name'].astype(str).str.contains("Baskent Üni", case = False), 'school_name'] = "Baskent University"
df.loc[df['school_name'].astype(str).str.contains("Baskent Uni", case = False), 'school_name'] = "Baskent University"

df.loc[df['school_name'].astype(str).str.contains("Uludag Uni", case = False), 'school_name'] = "Uludag University"
df.loc[df['school_name'].astype(str).str.contains("Uludağ Üni", case = False), 'school_name'] = "Uludag University"

df.loc[df['school_name'].astype(str).str.contains("Istanbul Kültür Uni", case = False), 'school_name'] = "Istanbul Kultur University"
df.loc[df['school_name'].astype(str).str.contains("Istanbul Kultur Uni", case = False), 'school_name'] = "Istanbul Kultur University"
df.loc[df['school_name'].astype(str).str.contains("Istanbul Kültür Üni", case = False), 'school_name'] = "Istanbul Kultur University"

df.loc[df['school_name'].astype(str).str.contains("Ondokuz Mayis Uni", case = False), 'school_name'] = "Ondokuz Mayis University"
df.loc[df['school_name'].astype(str).str.contains("Ondokuz Mayis Üni", case = False), 'school_name'] = "Ondokuz Mayis University"
df.loc[df['school_name'].astype(str).str.contains("Ondokuz Mayıs Üni", case = False), 'school_name'] = "Ondokuz Mayis University"

df.loc[df['school_name'].astype(str).str.contains("Atilim Uni", case = False), 'school_name'] = "Atilim University"
df.loc[df['school_name'].astype(str).str.contains("Atilim Üni", case = False), 'school_name'] = "Atilim University"

df.loc[df['school_name'].astype(str).str.contains("Celal Bayar Uni", case = False), 'school_name'] = "Celal Bayar University"
df.loc[df['school_name'].astype(str).str.contains("Celal Bayar Üni", case = False), 'school_name'] = "Celal Bayar University"

df.loc[df['school_name'].astype(str).str.contains("Pamukkale Uni", case = False), 'school_name'] = "Pamukkale University"
df.loc[df['school_name'].astype(str).str.contains("Pamukkale Üni", case = False), 'school_name'] = "Pamukkale University"

df.loc[df['school_name'].astype(str).str.contains("Istanbul Ticaret Üni", case = False), 'school_name'] = "Istanbul Commerce University"
df.loc[df['school_name'].astype(str).str.contains("Istanbul Commerce", case = False), 'school_name'] = "Istanbul Commerce University"

df.loc[df['school_name'].astype(str).str.contains("Çankaya Uni", case = False), 'school_name'] = "Cankaya University"
df.loc[df['school_name'].astype(str).str.contains("Çankaya Üni", case = False), 'school_name'] = "Cankaya University"
df.loc[df['school_name'].astype(str).str.contains("Cankaya Uni", case = False), 'school_name'] = "Cankaya University"

df.loc[df['school_name'].astype(str).str.contains("Maltepe Uni", case = False), 'school_name'] = "Maltepe University"
df.loc[df['school_name'].astype(str).str.contains("Maltepe Üni", case = False), 'school_name'] = "Maltepe University"

df.loc[df['school_name'].astype(str).str.contains("Kırıkkale Uni", case = False), 'school_name'] = "Kirikkale University"
df.loc[df['school_name'].astype(str).str.contains("Kırıkkale Üni", case = False), 'school_name'] = "Kirikkale University"
df.loc[df['school_name'].astype(str).str.contains("Kirikkale Üni", case = False), 'school_name'] = "Kirikkale University"
df.loc[df['school_name'].astype(str).str.contains("Kirikkale Uni", case = False), 'school_name'] = "Kirikkale University"

df.loc[df['school_name'] == 'İzmir Ekonomi Üniversitesi', 'school_name'] = 'Izmir University of Economics'
df.loc[df['school_name'] == 'Izmir Ekonomi Universitesi', 'school_name'] = 'Izmir University of Economics'
df.loc[df['school_name'] == 'İzmir University of Economics', 'school_name'] = 'Izmir University of Economics'
df.loc[df['school_name'] == 'Izmir University of Economics Graduate School of Social Sciences', 'school_name'] = 'Izmir University of Economics'

df.loc[df['school_name'].astype(str).str.contains("Gaziantep Uni", case = False), 'school_name'] = "Gaziantep University"
df.loc[df['school_name'].astype(str).str.contains("Gaziantep Üni", case = False), 'school_name'] = "Gaziantep University"

df.loc[df['school_name'].astype(str).str.contains("Mersin Uni", case = False), 'school_name'] = "Mersin University"
df.loc[df['school_name'].astype(str).str.contains("Mersin Üni", case = False), 'school_name'] = "Mersin University"

df.loc[df['school_name'].astype(str).str.contains("İsik Uni", case = False), 'school_name'] = "Isik University"
df.loc[df['school_name'].astype(str).str.contains("Işık Uni", case = False), 'school_name'] = "Isik University"
df.loc[df['school_name'].astype(str).str.contains("Işık Üni", case = False), 'school_name'] = "Isik University"

df.loc[df['school_name'].astype(str).str.contains("Doğuş Uni", case = False), 'school_name'] = "Dogus University"
df.loc[df['school_name'].astype(str).str.contains("Doğuş Üni", case = False), 'school_name'] = "Dogus University"
df.loc[df['school_name'].astype(str).str.contains("Dogus Un", case = False), 'school_name'] = "Dogus University"
df.loc[df['school_name'].astype(str).str.contains("Dogus Ün", case = False), 'school_name'] = "Dogus University"

df.loc[df['school_name'].astype(str).str.contains("Kadir Has Uni", case = False), 'school_name'] = "Kadir Has University"
df.loc[df['school_name'].astype(str).str.contains("Kadir Has Üni", case = False), 'school_name'] = "Kadir Has University"

df.loc[df['school_name'].astype(str).str.contains("Okan Uni", case = False), 'school_name'] = "Okan University"
df.loc[df['school_name'].astype(str).str.contains("Okan Üni", case = False), 'school_name'] = "Okan University"

df.loc[df['school_name'].astype(str).str.contains("Abant İzzet Baysal Üni", case = False), 'school_name'] = "Abant Izzet Baysal University"
df.loc[df['school_name'].astype(str).str.contains("Abant Izzet Baysal Uni", case = False), 'school_name'] = "Abant Izzet Baysal University"

df.loc[df['school_name'].astype(str).str.contains("Afyon Kocatepe Uni", case = False), 'school_name'] = "Afyon Kocatepe University"
df.loc[df['school_name'].astype(str).str.contains("Afyon Kocatepe Üni", case = False), 'school_name'] = "Afyon Kocatepe University"

df.loc[df['school_name'].astype(str).str.contains("Inönü Uni", case = False), 'school_name'] = "Inonu University"
df.loc[df['school_name'].astype(str).str.contains("Inönü Üni", case = False), 'school_name'] = "Inonu University"
df.loc[df['school_name'].astype(str).str.contains("Inonu Uni", case = False), 'school_name'] = "Inonu University"

df.loc[df['school_name'].astype(str).str.contains("Fatih Uni", case = False), 'school_name'] = "Fatih University"
df.loc[df['school_name'].astype(str).str.contains("Fatih Üni", case = False), 'school_name'] = "Fatih University"

df.loc[df['school_name'].astype(str).str.contains("Haliç Üni", case = False), 'school_name'] = "Halic University"
df.loc[df['school_name'].astype(str).str.contains("Halic Uni", case = False), 'school_name'] = "Halic University"

df.loc[df['school_name'].astype(str).str.contains("Çanakkale Onsekiz Mart Uni", case = False), 'school_name'] = "Canakkale Onsekiz Mart University"
df.loc[df['school_name'].astype(str).str.contains("Çanakkale Onsekiz Mart Üni", case = False), 'school_name'] = "Canakkale Onsekiz Mart University"
df.loc[df['school_name'].astype(str).str.contains("Canakkale Onsekiz Mart Uni", case = False), 'school_name'] = "Canakkale Onsekiz Mart University"

df.loc[df['school_name'].astype(str).str.contains("Balıkesir Uni", case = False), 'school_name'] = "Balikesir University"
df.loc[df['school_name'].astype(str).str.contains("Balıkesir Üni", case = False), 'school_name'] = "Balikesir University"
df.loc[df['school_name'].astype(str).str.contains("Balikesir Üni", case = False), 'school_name'] = "Balikesir University"
df.loc[df['school_name'].astype(str).str.contains("Balikesir Uni", case = False), 'school_name'] = "Balikesir University"

df.loc[df['school_name'].astype(str).str.contains("Mugla Uni", case = False), 'school_name'] = "Mugla University"
df.loc[df['school_name'].astype(str).str.contains("Muğla Üni", case = False), 'school_name'] = "Mugla University"
df.loc[df['school_name'].astype(str).str.contains("Muğla Uni", case = False), 'school_name'] = "Mugla University"

df.loc[df['school_name'].astype(str).str.contains("Ankara Yildirim Beyazit Uni", case = False), 'school_name'] = "Ankara Yildirim Beyazit University"
df.loc[df['school_name'].astype(str).str.contains("Ankara Yıldırım Beyazit Üni", case = False), 'school_name'] = "Ankara Yildirim Beyazit University"

df.loc[df['school_name'].astype(str).str.contains("Yaşar Üni", case = False), 'school_name'] = "Yasar University"
df.loc[df['school_name'].astype(str).str.contains("Yasar Uni", case = False), 'school_name'] = "Yasar University"

df.loc[df['school_name'].astype(str).str.contains("Konya Teknik Uni", case = False), 'school_name'] = "Konya Technical University"
df.loc[df['school_name'].astype(str).str.contains("Konya Teknik Üni", case = False), 'school_name'] = "Konya Technical University"
df.loc[df['school_name'].astype(str).str.contains("Konya Technical Uni", case = False), 'school_name'] = "Konya Technical University"

df.loc[df['school_name'].astype(str).str.contains("Altınbaş Üni", case = False), 'school_name'] = "Altinbas University"

df.loc[df['school_name'].astype(str).str.contains("Dumlupinar Uni", case = False), 'school_name'] = "Dumlupinar University"
df.loc[df['school_name'].astype(str).str.contains("Dumlupinar Üni", case = False), 'school_name'] = "Dumlupinar University"

df.loc[df['school_name'].astype(str).str.contains("Nisantasi Uni", case = False), 'school_name'] = "Nisantasi University"
df.loc[df['school_name'].astype(str).str.contains("Nişantaşı Uni", case = False), 'school_name'] = "Nisantasi University"
df.loc[df['school_name'].astype(str).str.contains("Nişantaşı Üni", case = False), 'school_name'] = "Nisantasi University"

df.loc[df['school_name'].astype(str).str.contains("Istanbul Gelişim Üni", case = False), 'school_name'] = "Istanbul Gelisim University"
df.loc[df['school_name'].astype(str).str.contains("Istanbul Gelisim", case = False), 'school_name'] = "Istanbul Gelisim University"
df.loc[df['school_name'].astype(str).str.contains("Istanbul Gelisim Uni", case = False), 'school_name'] = "Istanbul Gelisim University"

df.loc[df['school_name'].astype(str).str.contains("Sivas Cumhuriyet Üni", case = False), 'school_name'] = "Sivas Cumhuriyet University"

df.loc[df['school_name'].astype(str).str.contains("Düzce Uni", case = False), 'school_name'] = "Duzce University"
df.loc[df['school_name'].astype(str).str.contains("Düzce Üni", case = False), 'school_name'] = "Duzce University"
df.loc[df['school_name'].astype(str).str.contains("Duzce Uni", case = False), 'school_name'] = "Duzce University"

df.loc[df['school_name'].astype(str).str.contains("İzmir Katip", case = False), 'school_name'] = "Izmir Katip Celebi University"

df.loc[df['school_name'] == 'Namık Kemal Üniversitesi', 'school_name'] = 'Namik Kemal University'
df.loc[df['school_name'] == 'Namık Kemal Üniversitesi', 'school_name'] = 'Namik Kemal University'
df.loc[df['school_name'] == 'Namık Kemal University', 'school_name'] = 'Namik Kemal University'
df.loc[df['school_name'] == 'University of Namik Kemal', 'school_name'] = 'Namik Kemal University'
df.loc[df['school_name'].astype(str).str.contains("Namık Kemal Üni", case = False), 'school_name'] = 'Namik Kemal University'

df.loc[df['school_name'].astype(str).str.contains("Istanbul Şehir Uni", case = False), 'school_name'] = "Istanbul Sehir University"

df.loc[df['school_name'].astype(str).str.contains('Ozyegin Uni', case=False), 'school_name'] = "Ozyegin University"
df.loc[df['school_name'].astype(str).str.contains('Özyeğin Uni', case=False), 'school_name'] = "Ozyegin University"
df.loc[df['school_name'].astype(str).str.contains('Özyeğin Üni', case=False), 'school_name'] = "Ozyegin University"

In [8]:
df.loc[df['school_name'].astype(str).str.contains('İzmir Atatürk', case=False), 'school_name'] = "Izmir Ataturk High School"
df.loc[df['school_name'].astype(str).str.contains('İzmir Ataturk', case=False), 'school_name'] = "Izmir Ataturk High School"

df.loc[df['school_name'].astype(str).str.contains('Ankara Atatürk', case=False), 'school_name'] = "Ankara Ataturk High School"
df.loc[df['school_name'].astype(str).str.contains('Ankara Ataturk', case=False), 'school_name'] = "Ankara Ataturk High School"

df.loc[df['school_name'].astype(str).str.contains('Bornova Ana', case=False), 'school_name'] = "Bornova Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Sirri Yircali', case=False), 'school_name'] = "Sirri Yircali Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Kadıköy Anadolu Lisesi', case=False), 'school_name'] = "Kadiköy Anatolian High School"
df.loc[df['school_name'].astype(str).str.contains('Kadıkoy Anadolu Lisesi', case=False), 'school_name'] = "Kadiköy Anatolian High School"
df.loc[df['school_name'].astype(str).str.contains('Kadıkoy Anatolian', case=False), 'school_name'] = "Kadiköy Anatolian High School"
df.loc[df['school_name'].astype(str).str.contains('Kadıköy Anatolian', case=False), 'school_name'] = "Kadiköy Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Kabataş Erkek', case=False), 'school_name'] = "Kabatas High School"
df.loc[df['school_name'].astype(str).str.contains('Kabatas Erkek', case=False), 'school_name'] = "Kabatas High School"
df.loc[df['school_name'] == 'KABATAŞ HİGH SCHOOL', 'school_name'] = "Kabatas High School"

df.loc[df['school_name'].astype(str).str.contains('Pertevniyal ', case=False), 'school_name'] = "Pertevniyal High School"

df.loc[df['school_name'].astype(str).str.contains('Beşiktaş Ata', case=False), 'school_name'] = "Besiktas Ataturk Anatolian High School"
df.loc[df['school_name'].astype(str).str.contains('Besiktas Ata', case=False), 'school_name'] = "Besiktas Ataturk Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Istanbul Erkek', case=False), 'school_name'] = "Istanbul Erkek High School"

df.loc[df['school_name'].astype(str).str.contains('Burak Bora', case=False), 'school_name'] = "Burak Bora Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Haydarpaşa Li', case=False), 'school_name'] = "Haydarpasa High School"
df.loc[df['school_name'].astype(str).str.contains('Haydarpasa L', case=False), 'school_name'] = "Haydarpasa High School"
df.loc[df['school_name'].astype(str).str.contains('Haydarpasa Ana', case=False), 'school_name'] = "Haydarpasa Anatolian Technical High School"
df.loc[df['school_name'].isin(['Haydarpasa Technical High School','Haydarpasa ATL','Haydarpasa Anatolian Technical High School','Haydarpasa Teknik Lisesi','Haydarpasa Technical High School Istanbul','HAYDARPASA TECHNICAL HIGH SCHOOL   Automation Systems','Haydarpasa Technical Anatolian High School','Haydarpasa A.T.L.']), 'school_name'] = "Haydarpasa Anatolian Technical High School"

df.loc[df['school_name'].astype(str).str.contains('Bursa Ana', case=False), 'school_name'] = "Bursa Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Karşıyaka Ana', case=False), 'school_name'] = "Karsiyaka Anatolian High School"
df.loc[df['school_name'].astype(str).str.contains('Karsiyaka Ana', case=False), 'school_name'] = "Karsiyaka Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Istanbul Koy', case=False), 'school_name'] = "Istanbul Koy Hizmetleri Anatolian High School"
df.loc[df['school_name'].astype(str).str.contains('Istanbul Köy', case=False), 'school_name'] = "Istanbul Koy Hizmetleri Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Galatasaray Lise', case=False), 'school_name'] = "Galatasaray High School"

df.loc[df['school_name'].astype(str).str.contains('Denizli Ana', case=False), 'school_name'] = "Denizli Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Sehremini', case=False), 'school_name'] = "Sehremini Anatolian High School"
df.loc[df['school_name'].astype(str).str.contains('Şehremini', case=False), 'school_name'] = "Sehremini Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Adana Ana', case=False), 'school_name'] = "Adana Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Tekirdag Ana', case=False), 'school_name'] = "Tekirdag Anatolian Teacher High School"
df.loc[df['school_name'].astype(str).str.contains('Tekirdağ Ana', case=False), 'school_name'] = "Tekirdag Anatolian Teacher High School"

df.loc[df['school_name'].astype(str).str.contains('Antalya Ana', case=False), 'school_name'] = "Antalya Anatolian Technical High School"

df.loc[df['school_name'].astype(str).str.contains('Samsun Ana', case=False), 'school_name'] = "Samsun Anatolian Technical High School"

df.loc[df['school_name'].astype(str).str.contains('Bahcelievler Ana', case=False), 'school_name'] = "Bahcelievler Anatolian High School"
df.loc[df['school_name'].astype(str).str.contains('Bahçelievler Ana', case=False), 'school_name'] = "Bahcelievler Anatolian High School"

df.loc[df['school_name'].astype(str).str.contains('Kocaeli Ana', case=False), 'school_name'] = "Kocaeli Anatolian High School"

df.loc[df['school_name'].notnull(), 'school_name'] = df.loc[df['school_name'].notnull(), 'school_name'].apply(lambda x: x.replace('highschool', "High School"))
df.loc[df['school_name'].notnull(), 'school_name'] = df.loc[df['school_name'].notnull(), 'school_name'].apply(lambda x: x.replace('Highschool', "High School"))

In [9]:
school_keywords = ['lise', 'üniversite', 'mesleki', 'yüksek', 'okul']

school_translated = dict()
for kw in school_keywords:
    print(f'keyword: {kw}')
    for i in tqdm(df.loc[df['school_name'].astype(str).str.contains(kw, case = False), 'school_name'].unique()):
        if i not in school_translated.keys():
            try:
                school_translated[i] = translator.translate(i)
            except:
                school_translated[i] = i

for key in school_translated.keys():
    df.loc[df['school_name'] == key, 'school_name'] = school_translated[key]

df.loc[df['school_name'].notnull(), 'school_name'] = df.loc[df['school_name'].notnull(), 'school_name'].apply(lambda x: x.lower())
df.loc[df['school_name'].notnull(), 'school_name'] = df.loc[df['school_name'].notnull(), 'school_name'].apply(lambda x: translation(x))

print(f'education data shape: {df.shape}')
print(f'degree classes: {df["degree"].nunique()}')
print(f'school_name classes: {df["school_name"].nunique()}')
print(f'fields_of_study classes: {df["fields_of_study"].nunique()}')
#df.to_csv('../../../datasets/garanti-bbva-data-camp/clean_education_v2.csv', index = False)

keyword: lise


100%|██████████| 3945/3945 [52:49<00:00,  1.24it/s]  


keyword: üniversite


100%|██████████| 174/174 [02:14<00:00,  1.29it/s]


keyword: mesleki


100%|██████████| 165/165 [00:04<00:00, 35.94it/s]


keyword: yüksek


100%|██████████| 36/36 [00:23<00:00,  1.54it/s]


keyword: okul


100%|██████████| 294/294 [03:13<00:00,  1.52it/s]


education data shape: (142575, 4)
degree classes: 2833
school_name classes: 8630
fields_of_study classes: 10653


In [10]:
#school_translated = dict()
#for i in tqdm(df['school_name'].dropna().unique()):
#    try:
#        school_translated[i] = translator.translate(i)
#    except:
#        school_translated[i] = i
#for key in school_translated.keys():
#    df.loc[df['school_name'] == key, 'school_name'] = school_translated[key]
#df.loc[df['school_name'].notnull(), 'school_name'] = df.loc[df['school_name'].notnull(), 'school_name'].apply(lambda x: x.lower())
#    
#print(f'education data shape: {df.shape}')
#print(f'degree classes: {df["degree"].nunique()}')
#print(f'school_name classes: {df["school_name"].nunique()}')
#print(f'fields_of_study classes: {df["fields_of_study"].nunique()}')

In [11]:
#df.to_csv(output_path, index = False)
#df.head()

## Fields of Study

In [12]:
#df = pd.read_csv(output_path)
#print(f'education data shape: {df.shape}')
#print(f'degree classes: {df["degree"].nunique()}')
#print(f'school_name classes: {df["school_name"].nunique()}')
#print(f'fields_of_study classes: {df["fields_of_study"].nunique()}')
#df.head()

In [13]:
df.loc[df['fields_of_study'] == 'Bilgisayar Mühendisliği', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'Elektrik ve Elektronik Mühendisliği', 'fields_of_study'] = 'Electrical and Electronics Engineering'
df.loc[df['fields_of_study'] == 'Matematik', 'fields_of_study'] = 'Mathematics'
df.loc[df['fields_of_study'] == 'İşletme ve Yönetim, Genel', 'fields_of_study'] = 'Business Administration and Management, General'
df.loc[df['fields_of_study'] == 'Bilgisayar Yazılımı Mühendisliği', 'fields_of_study'] = 'Software Engineering'
df.loc[df['fields_of_study'] == 'Yazılım Mühendisliği', 'fields_of_study'] = 'Software Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Makine Mühendisliği', 'fields_of_study'] = 'Mechanical Engineering'
df.loc[df['fields_of_study'] == 'Fizik', 'fields_of_study'] = 'Physics'
df.loc[df['fields_of_study'] == 'Ekonomi', 'fields_of_study'] = 'Economics'
df.loc[df['fields_of_study'] == 'İstatistik', 'fields_of_study'] = 'Statistics'
df.loc[df['fields_of_study'] == 'Kimya', 'fields_of_study'] = 'Chemistry'
df.loc[df['fields_of_study'] == 'Elektrik Mühendisliği', 'fields_of_study'] = 'Electrical Engineering'
df.loc[df['fields_of_study'] == 'Elektrik mühendisliği', 'fields_of_study'] = 'Electrical Engineering'
df.loc[df['fields_of_study'] == 'Elektrik Mühendisliği', 'fields_of_study'] = 'Electrical Engineering'
df.loc[df['fields_of_study'] == 'Elektrik Mühendisi', 'fields_of_study'] = 'Electrical Engineering'
df.loc[df['fields_of_study'] == 'elektrik mühendisliği', 'fields_of_study'] = 'Electrical Engineering'
df.loc[df['fields_of_study'] == 'bilgisayar mühendisliği', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'bilgisayar mühendisliği ', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'bilgisayar mühendisligi', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'bilgisayar müh.', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'bilgisayar mühendisi', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'yönetim bilişim sistemleri', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Yönetim Bilişim Sistemleri', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Computer Engineering BSC', 'fields_of_study'] = 'Computer Engineering' 
df.loc[df['fields_of_study'] == 'Computer Engineering, BE', 'fields_of_study'] = 'Computer Engineering' 
df.loc[df['fields_of_study'] == ' Computer Engineering', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'computer engineering', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'computer engineer', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'computer engineerig', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'computer engeneering', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'computer science', 'fields_of_study'] = 'Computer Science'
df.loc[df['fields_of_study'] == 'Bilgisayar Bilimleri', 'fields_of_study'] = 'Computer Science'
df.loc[df['fields_of_study'] == 'computer sciences', 'fields_of_study'] = 'Computer Science'
df.loc[df['fields_of_study'] == 'computer scientist', 'fields_of_study'] = 'Computer Science'
df.loc[df['fields_of_study'] == 'computer Engineering', 'fields_of_study'] = 'Computer Engineering' 
df.loc[df['fields_of_study'] == 'computer Engineer', 'fields_of_study'] = 'Computer Engineering' 
df.loc[df['fields_of_study'] == 'computer programming', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'computer programmer', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'Bilgisayar Programlama/Programcı, Genel', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'elektrik elektronik mühendisliği', 'fields_of_study'] = 'Electrical and Electronics Engineering'
df.loc[df['fields_of_study'] == 'elektrik elektronik mühendisi', 'fields_of_study'] = 'Electrical and Electronics Engineering'
df.loc[df['fields_of_study'] == 'elektrik elektronik', 'fields_of_study'] = 'Electrical and Electronics Engineering'
df.loc[df['fields_of_study'] == 'endüstri mühendisliği', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği/Industrial Engineering', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği / Industrial Engineering', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği, Mühendislik Yönetimi ', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisi', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği Yüksek Lisans', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği-Mühendislik Yönetimi', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği (Tezli)', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği / Müh. Yönetimi ', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği Ana Bilim Dalı', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği Lisans', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği Yöneylem Araştırması Anabilim Dalı', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği (EN)', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği - Yan Dal Programı', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Lisans-Endüstri Mühendisliği', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği, Tam Burslu', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği (Minor)', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Industrial Engineering (Endüstri Mühendisliği)', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Sistem Mühendisliği (Endüstri Mühendisliği)', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Industrial Engineering / Endüstri Mühendisliği', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği (Industrial Engineer)', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'Endüstri Mühendisliği lisans', 'fields_of_study'] = 'Industrial Engineering'
df.loc[df['fields_of_study'] == 'kimya', 'fields_of_study'] = 'Chemistry'
df.loc[df['fields_of_study'] == 'KİMYA', 'fields_of_study'] = 'Chemistry'
df.loc[df['fields_of_study'] == 'KİMYAGER', 'fields_of_study'] = 'Chemistry'
df.loc[df['fields_of_study'] == 'Business Administration and Management, General', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Business Administration and Management', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Business Administration and Management (English)', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Business Administration and Management, Technology Track', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Business Administration and Management, MBA', 'fields_of_study'] = 'MBA'
df.loc[df['fields_of_study'] == 'Business Administration and Management, Executive', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Business Administration and Management (Master)', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Business Administration and Management (German)', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Business Administration and Management.Open University', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Faculty of Business Administration and Management', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Bachelor of Business Administration, Business Administration and Management', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Istanbul Master degree in Business Administration and Management, General', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Business Administration and Management, Toronto ON ', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'Business Administration and Management, Marketing', 'fields_of_study'] = 'Business Administration and Management'
df.loc[df['fields_of_study'] == 'MAKİNE MÜHENDİSLİĞİ', 'fields_of_study'] = 'Mechanical Engineering'
df.loc[df['fields_of_study'] == 'BİLGİSAYAR MÜHENDİSLİĞİ', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'Computer science', 'fields_of_study'] = 'Computer Science'
df.loc[df['fields_of_study'] == 'computer science', 'fields_of_study'] = 'Computer Science'
df.loc[df['fields_of_study'] == 'Master of Business Administration', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (MBA)', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration - MBA', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Adminstration', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (M.B.A.)', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration(MBA)', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Executive Master of Business Administration', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'MBA (Master of Business Administration)', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Institute of Social Sciences, Master of Business Administration', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administrator (with Thesis)', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administrator', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Executive Master of Business Adminstrations', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration MBA', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (MBA) (Without Thesis)', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (MBA) , Executive MBA', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (e-MBA)', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Information Systems', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (MBA) - English', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'MBA, Master of Business Administration', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (MBA), Master degree', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business and Administration', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration MBA - thesis program', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration - MBA, Social Sciences', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (MBA), Hospital and Health Institutions Management', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master’s degree • Master of Business Administration', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (M.B.A.), Executive M.B.A', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration ', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration,MBA', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'MBA - Master of Business Administration', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration (M.B.A), Marketing/Marketing Management, General', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration ( MBA )', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration, MBA', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Management and Strategy - Master of Business Administration', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration, Executive MBA', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Master of Business Administration in Finance', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'MBA', 'fields_of_study'] = 'Master of Business Administration'
df.loc[df['fields_of_study'] == 'Bilgisayar Mühendisliği (İngilizce)', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'Bilgisayar programcılığı', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'Bilgisayar Teknolojisi ve Programlama', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'Bilgisayar Programlama, Özel Uygulamalar', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'Bilgisayar Teknolojileri ve Programlama', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'Bilgisayar Mühendisi', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'Bilgisayar Mühendisliği Yüksek Lisans', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'Bilgisayar Programcılığı / Tekniker', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'B.Sc., Faculty of Computer and Information Science, Computer Engineering', 'fields_of_study'] = 'Computer Engineering'
df.loc[df['fields_of_study'] == 'bilgisayar programcısı', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'bilgisayar programcılığı', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'bilgisayar programlama,bilgisayar teknoljileri', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'bilgisayar teknolojisi ve programlama', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'bilgisayar programcılığı ', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'bilgisayar programlama', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'Bilgisayar Teknolojileri ve bilgisayar programcılığı', 'fields_of_study'] = 'Computer Programming'
df.loc[df['fields_of_study'] == 'Yönetim Bilgi Sistemleri, Genel', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information Systems', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information Systems, General', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information System', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information Systems (MIS)', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information Systems and Engineering', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information Systems and Services', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Yönetim Bilişim Sistemleri - Management Information Systems', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information Systems ', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information Systems-MBA', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information Sciences', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Informations System', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Management Information System (MIS)', 'fields_of_study'] = 'Management Information Systems'
df.loc[df['fields_of_study'] == 'Auzef • Yönetim Bilişim Sistemleri / Management Information Systems', 'fields_of_study'] = 'Management Information Systems'


#Management Information Systems
df.loc[df['fields_of_study'].isin(['Management Information Systems (M.I.S)', 'Management Information Systems (MSc)', 'Management Information Systems (3.41/4)', 'Management Information Systems, Full Scholarship Student', 'Management Informatics Systems,Undergradute Program', 'Management Informatics Systems', 'Management Information Systems (M.I.S.)', 'Yönetim Bilişim Sistemleri (MIS - Management Information Systems)', 'MBA - Management Information Systems and Services', 'Yönetim Bilişim Sistemleri, Management Information Systems', 'Management Information Sytems', 'Management Information Systems,MIS', 'Management Information Systems (Yönetim Bilişim Sistemleri)', 'MBA / Management Information Systems', 'Management Information Systems', ' Management Information System (MIS)', 'Management Information Systems / Master', 'Management Information System (MIS) - Yönetim Bilişim Sistemleri', ' Management Information Systems ( Yönetim Bilişim Sistemleri )', 'Management Information Systems Specialist ', "bachelor's degree • Management Information Systems", 'Management Information Systems, General (in German)', ' Management Information Systems and Engineering ', 'Management Information Systems (German)', 'Management Information System and Engineering', 'Management Information Systems, 3.26', 'Management information systems', 'Management information systems ', 'Management information system/MBA', 'Management information System', 'Management Infırmation Systems', 'Management Infomations Systems', 'yönetim bilişim ve sistemleri', 'Management Information Systems (MIS) Master', "Management Information Systems / Master's Degree", 'Master of Management Information Systems', '(MIS) Management Information Systems', 'Yönetim Bilişim Sistemleri (Management Information Systems)', "Bachelor's Degree, Management Information Systems ", 'Management Information Technology', 'Management Information Systems', 'Engineer Faculty - Management Information Systems', 'Management Information Systems in German (Wirtschaftsinformatik)', ' Faculty of Commercial Sciences, Department of Management Information Systems', 'Management Information Systems(MIS)', ' Management Information Systems', 'Management Information Systems - MIS', 'Management Information Systems Engineering', 'MBA-  Management Information Systems', 'Management Information Systems (English) ', 'Yönetim Bilişim Sistemleri / Management Information Systems', 'Management Information Systems', 'Management Information Systems (Ph.D.)', 'IT Institute/Management Information Sytem', 'MIS - Management Information Systems', "Bachelor's Degree Management Information Systems", 'Master of Management Information Systems ( M.I.S. )', 'Management Information Systems (Master)', 'Management Information Systems, Full Scholarship', 'Management Information Systems Master’s Program (with thesis)', 'Management Information Sys. And Eng.', 'Management Information Systems and Engineering ', 'Management Informations System (MIS)', 'Management Information Systems MIS', 'M.S, Management Information Systems', 'Management Information Systems & Engineering', 'MIS(Management Information Systems)', 'Management Information', ' Management Informaton Systems', 'Management Information Systems and Engineering (MIS)', 'Management Information Systems Technologies ', 'Management Information Systems', "Master's degree, Management Information Systems", 'Yönetim Bilişim Sistemleri ( Management Information Systems )', 'MIS Management Information Systems - Yönetim Bilişi', 'Management Information Systems, Honours Degree', 'Yüksek Lisans (Master) / Management Information Systems', 'Management Information Systems (Wirtschaftsinformatik)', "Master's Degree, Management Information Systems (MIS).", 'MIS (Management Information Systems)', 'Astronomy , Computer Programming, Management Information Systems', 'Management Information Systems (Graduate School)', 'Management Information Systems (Success Scholarship)', "Faculty of Engineering Management Information Systems Master's Degree",'Bachelor’s Degree in Management Information Systems', 'Management Information Systems, General (Yönetim Bilişim Sistemleri)', 'Yönetim bilişim sistemleri mühendisliği /Management information systems engineering (%100 İngilizce)',]), 'fields_of_study'] = 'Management Information Systems'

#Business Administration
df.loc[df['fields_of_study'].isin(['işletme', 'işletme ', 'işletme bölümü', 'işletme/işletmecilik', 'işletmecilik', 'İngilizce işletme','business administration', 'School of business administration', 'school of business administration', 'MBA(Master of business administration)', 'business administrative  (english)', 'Master of business administration (MBA)', 'Master of business administration', "İşletme"]), 'fields_of_study'] = 'Business Administration'

#Electrical and Electronics Engineering
df.loc[df['fields_of_study'].isin(['Department of Electrical and Electronics Engineering', 'Electrical-Electronics Engineer', 'Electrical and Electronical Engineering', 'Electrical and Electronic Engineer', 'Electrical and electronics engineering', 'Electric and Electronic Engineer', 'Electric and Electronic engineering', 'Electric and Electronic Engineering (Eng.)', 'Electric and Electronics Engineering, EEE', 'Electric and Electronics Engineering (MS)', 'Electric and Electronics engineering', 'Elektrik ve Elektronik Mühendisliği (İngilizce)', 'Elektrik ve Elektronik Mühendisliği(İngilizce)', 'Elektrik ve Elektronik Mühendisliği, Medya ve Görsel Sanatlar', 'Elektrik ve Elektronik Mühendisliği (ÇAP)', 'Elektrik ve Elektronik Mühendisliği & Fizik(3,89)', 'Elektrik ve Elektronik Mühendisliği(English)', 'Elektrik ve Elektronik Mühendisliği Tezli', 'Electrical and Electronics Engineering, (Elektrik ve Elektronik Mühendisi)', 'Elektrik ve Elektronik Mühendisliği Tam Burslu', 'Elektrik ve Elektronik Mühendisliği Yüksek Lisansı', 'Elektrik ve Elektronik Mühendisliği(Örgün Öğretim,İngilizce)', 'Fen Bilimleri Enstitüsü- Elektrik ve Elektronik Mühendisliği Ana Bilim Dalı (Tezli)', 'Teknoloji Fakültesi Elektrik ve Elektronik Mühendisliği', 'Elektrik ve Elektronik Mühendisliği, Bilgisayar Mühendisliği', 'Elektrik ve Elektronik Mühendisliği, 2.84/4.00', 'ELEKTRİK VE ELEKTRONİK MÜHENDİSLİĞİ', 'Elektrik ve Elektronik Mühendisliği / Elektronik ve Haberleşme Dalı', 'Elektrik ve Elektronik Mühendisliği Bölümü', 'Elektrik ve Elektronik Mühendisliği Yüksek Lisans', 'Elektrik ve Elektronik Mühendisliği (ingilizce)', 'Elektrik ve Elektronik Mühendisliği - 3.31/4', 'Elektrik ve Elektronik Mühendisliği Tezli YL', 'Elektrik ve Elektronik Mühendisliği (ing)', 'Elektrik ve Elektronik Mühendisliği, Bilgisayar Mühendisliği (Minor Program)', 'Elektrik ve Elektronik Mühendisliği, Bilişim Teknolojileri', 'Elektrik ve Elektronik Mühendisliği, 3.61', 'İngilizce Elektrik ve Elektronik Mühendisliği', 'Teknoloji Fakültesi - Elektrik ve Elektronik Mühendisliği', 'Elektrik ve Elektronik Mühendisliği (Tezli)( Burslu)', 'Elektrik ve Elektronik Mühendisliği ( %100 Scholarship )', 'Elektrik ve Elektronik Mühendisliği/ Electronic Electronic Engineer', 'Elektrik ve Elektronik Mühendisliği (%30 İngilizce)', 'BSc Elektrik ve Elektronik Mühendisliği', 'Elektrik ve Elektronik Mühendisliği(ingilizce)', 'Elektrik ve Elektronik Mühendisliği(in', 'Elektrik ve Elektronik Mühendisliği/İngilizce', 'Elektrik Elektronik Mühendisliği', 'Elektrik elektronik mühendisliği', 'Elektrik Elektronik Mühendisliği %100 Burslu', 'Elektrik Elektronik Mühendisi', 'Elektrik Elektronik Mühendisliği (İngilizce)', 'Elektrik Elektronik Müh. Yuksek Lisans', 'Electrical and Electronic Engineering', 'Electrical & Electronics Engineering', 'Electrical-Electronics Engineering', 'Electrical Electronics Engineering', 'Electrical&Electronics Engineering', 'Electric and Electronic Engineering', 'Electrical and Electronics Engineer', 'Electrical - Electronics Engineering', 'Electric and Electronics Engineering', 'Electrical & Electronics Engineer', 'Elektrik-Elektronik Mühendisliği', 'electrical and electronic engineering', 'electrical and electronics engineering',]), 'fields_of_study'] = "Electrical and Electronics Engineering"

#Chemical Engineering
df.loc[df['fields_of_study'].isin(['Kimya Mühendisliği', 'Chemical Engineering', 'Chemical Engineer', 'Chemical engineering', 'Chemical engineer', 'Department of Chemical Engineering', 'Chemical Process Technology', 'Chemical Engineering ', 'Chemical Engineering, Polymer', 'Chemical Engineering-Transition to Computer Engineering/Drop out', 'Chemical Engineering/Chemical Technologics', 'Chemical Engineering, 3.55/4, Graduated with third degree', 'Chemical Engineering, 3.97', 'Chemical Engineering (%100 English)', 'Chemical Engineering, 3.64', 'Chemical Technologies', 'Chemical Engineering Department', 'Chemical Engineering / Master', 'Chemical Engineering', 'Chemical Engineering Process and Reactor Design', 'Chemical Engineering and Applied Chemistry %100 Scholarship', 'Master Student (thesis stage), Chemical Engineering', 'B.S., Chemical Engineering', 'B.Sc Chemical Engineering', 'Chemical and Biochemical Engineering', "Chemical Engineering Master's Programme", 'Chemical and Bioprocess Engineering', 'B.Sc.,Chemical Engineering', 'M.Sc. Chemical Engineering', 'M.Sc., Chemical Engineering', 'BSc, Chemical Engineering', 'MSc, Chemical Engineering', 'Kimya Mühendisliği - Chemical Engineering', 'Chemical and Process Engineering', 'B.Sc. Chemical Engineering', 'Chemical Enginnering', 'chemical engineering', 'chemical engineer', 'chemical',]), 'fields_of_study'] = 'Chemical Engineering'

#Mathematical Engineering
df.loc[df['fields_of_study'].isin(['Matematik Mühendisliği', 'Matematik Mühendisliği ', 'Matematik Mühendisi', 'Matematik Mühendisligi', 'Matematik Mühendisliği (İngilizce)', 'Matematik Mühendisliği(İngilizce)', 'Matematik Mühendisliği, 3.39', 'Matematik Mühendisliği (İng)', 'Matematik Mühendisliği/ Mathematical Engineering', 'Matematik Mühendisliği 3,22', 'Matematik Mühendisligi (%100 Ingilizce)', 'Department of Mathematical Engineering / Matematik Mühendisliği', 'Matematik Mühendisliği (%100 İngilizce)', 'Matematik Mühendisliği (Ingilizce) ', 'Kimya Metalurji fakültesi, Matematik Mühendisliği', 'Matematik Mühendisliği Yüksek Lisans', 'Matematik mühendisliği', 'Matematik mühendisliği ', 'Matematik mühendisliği 3.33', 'matematik mühendisliği', 'matematik mühendisi', 'matematik mühendisliği / mathematical engineering', 'mathematical enginering', 'mathematical Engineering', 'mathematical engineering', 'Mathematical Engineer', 'Mathematical Engineering (English)', 'Mathematical Engineering(%100 ENG)', 'Mathematical engineering', 'Mathematical Engineering(%100 English)', 'B.S, Mathematical Engineering', 'Bachelor of Science (BS), Mathematical Engineering', 'Mathematical Engineering , Applied Mathematics', 'Bachelor of Science, Mathematical Engineering', 'Mathematical Engineering (100% Eng)', 'Mathematical Engineering (Informatics)', 'Mathematical Engineering(100% English)', 'Mathematical Engineering Department', 'BSc in Mathematical Engineering', 'Mathematical Engineering(ENG)', 'Bachelor of Science (B.Sc.), Mathematical Engineering', "Bachelor's Degree,Mathematical Engineer, Result : 3,16/4", 'Applied Mathematics - Department of Mathematical Engineering', "Bachelor's degree Mathematical Engineering", 'Mathematical Engineering (Double Major Program)', 'Math. Eng.', "Mathematics Engineering", "BSc, Mathematics Engineering", 'Maths Engineering', 'Mathematics Engineer', 'Faculty of Science and Literature – Mathematics Engineering', 'Mathematics engineering', 'mathematics engineering', 'Mathematics Enginner', 'mathematics engineer', 'math engineer', 'Math Engineering', 'Maths Engineer']), 'fields_of_study'] = 'Mathematical Engineering'

#Mathematics and Science
df.loc[df['fields_of_study'].isin(['Math-Science', 'Math&Science', 'Math/Science', 'Math/Science Curriculum', 'MF (Math-Science)', 'Math-Science (MF)', 'Math-Science Applications', 'Math Science', 'Math Science Based Course', 'Math-Sciences', 'Matematik-Fen', 'Matematik Fen', 'Matematik/Fen', 'Matematik Fen Bölümü', 'Sayısal(Matematik-Fen)', 'Matematik-Fen (MF)', 'Matematik/Fen/Mathematic-Science', 'Matematik-Fen / Maths & Science', 'Matematik-Fen Bilimleri', 'Fen-Matematik', 'Fen Matematik', 'Fen/Matematik', 'Fen-Matematik Bölümü', 'Science-Mathematics', 'Science-Math', 'Science Math', 'Science/Mathematics', 'Science&Math', 'Science-Math Track', 'Science Mathematic', 'Science/Math Track of MEB Program', 'Science/Mathematics Student', 'Science-Math Major', 'Science&Mathematics', 'Science-Math Class', 'Science-Maths', 'Science/Math', 'Science and Maths', 'Science & Maths', 'Science - Maths', 'Maths - Science', 'Maths & Science', 'Maths, Science', 'Maths and Science (MS)', 'Antalya; Maths-Science', 'Science and Mathematics', 'Math and Science', 'Math & Science', 'Science & Mathematics', 'Mathematics & Science', 'Science and Math', 'Science & Math', 'Mathematics - Science', 'Maths and Science', 'Mathematics-Science', 'Mathematics, Science', 'Math - Science', 'Science and Mathematics Department', 'Mathematisc and Science Education', 'Mathematics and Sciences', 'Math / Science', 'Math & Science Track', 'Science & Math (Education in German)', 'Advanced Math & Science Program', 'Science &  Math Track', 'Math and Sience', 'Science-Math Track, German', 'Science and Math Class / Anatolian High School', 'Studied English, math and science extensively', 'Math and Science, 83.64/100', 'Math /Science', 'Math & Science Division', 'Graduate, Science and Math Based Program',]), 'fields_of_study'] = "Mathematics and Science"

#Mathematics
df.loc[df['fields_of_study'].isin(['Math', 'Maths', "Mathematics (English)", 'matematik', 'matematik(ingilizce)', 'Department of Mathematics']), 'fields_of_study'] = "Mathematics"

#Statistics
df.loc[df['fields_of_study'].isin(['Statistic', 'fields_of_study', 'Statictics', 'fields_of_study', 'BSc, Statistics', 'fields_of_study', 'B.Sc., Statistics', 'fields_of_study', 'Statistic ', 'fields_of_study', 'Statictics', 'fields_of_study', 'Statistics (English)', 'fields_of_study', 'Department of Statistics', 'fields_of_study', 'İstatistik (Ana Dal)', 'Istatistik', 'İstatistik Lisans', 'İstatistik Bölümü']), 'fields_of_study'] = 'Statistics'

#Computer Science
df.loc[df['fields_of_study'].isin(['Computer Science  (2 year)', 'Computer Science (Cyber Security)', 'Computer Science  (4 year)', "Computer Sciences", "Computer Science,Science", "Computer Science Department", "Computer Science (Bilgisayar Bilimleri)", "Bilgisayar Bilimleri (Tezli)", 'Bilgisayar Bilimi']), 'fields_of_study'] = 'Computer Science'

#Computer Engineering
df.loc[df['fields_of_study'].isin(['Computer Engineer', "Bilgisayar Mühendisliği(İngilizce)", "Bilgisayar mühendisi", "Bilgisayar mühendisliği", "Bilgisayar Mühendisliği Bölümü", "Bilgisayar Mühendisliği (Computer Engineering)", "Bilgisayar Mühendisliği / Computer Engineering",'Computer Engineering / Bilgisayar Mühendisliği', 'Mühendislik Fakültesi, Bilgisayar Mühendisliği', 'Bilgisayar Mühendisliği (%100 İngilizce)', 'Bilgisayar Mühendisligi - Bilişim Teknolojileri', 'Bilgisayar Mühendiliği', "Computer Engineering - Bilgisayar Muhendisligi", 'Bilgisayar Muhendisligi', 'Bilgisayar Muhasebe', 'Bilgisayar Muhendisi', 'Bilgisayar muhendisligi', 'Bilgisayar Muhendisligi / Computer Engineering', 'Computer Engineering - Bilgisayar Muhendisligi', 'Bilgisayar Muh.', "Computer Engineering MSc", "MSc,Computer Engineering", 'M.Sc. Computer Engineering (M-CE)', 'Computer Enginnering M.Sc.', 'Computer Engineer (English)', 'bilgisayar bilimci', 'Department of Computer Engineering', 'Department of Computer Engineer', 'Department Of Computer Engineering', 'Faculty of Engineering, Department of Computer Engineering', 'Department of Computer Engineering (Full Scholarship)', 'Department of Computer Engineering / Information Technology', 'Engineering Department of Computer Engineering', 'Department of Computer Engineering, Information Technology', 'Graduate School of Science and Engineering, Department of Computer Engineering', 'e-Information Technology (Department of Computer Engineering)', 'BSc, Computer Engineering', 'MSc Computer Engineering', 'Computer Engineering - Faculty of Engineering', 'Computer engineering', 'Computer engineer', 'Computer engineering ', 'Computer engineerig', 'Computer engineering and natural sciences faculty', 'COMPUTER ENGINEERING', '( ENG. FAC.) / COMPUTER ENGINEERING', 'COMPUTER ENGINEER', 'COMPUTER ENGİNEER', 'COMPUTER ENGİNEERİNG', 'Computer Eng.', 'Computer Eng. and Information Sci.', 'Bilgisayar Müh (Comp. Eng.)', 'Comp. Eng.', 'Bilgisayar Mühendisliği (3.02)', 'Bilgisayar Mühendisliği - Computer Engineering', 'Bilgisayar Mühendisliği(%100 ingilizce)', 'Bilgisayar Mühendisliği  100% İngilizce', 'Bilgisayar Müh', 'Bİlgisayar Mühendisliği', 'Bilgisayar Mühendisliği(%100 İngilizce)',
'Bilgisayar Mühendisliği (English)', 'Bilgisayar Mühendisliği (ingilizce)', 'Computer Engineer - Bilgisayar Mühendisliği', 'Bilgisayar Mühendisliği (Master)', 'Fen Bilimleri Enstitüsü, Bilgisayar Mühendisliği (Türkçe) - Yüksek Lisans', 'Bilgisayar Mühendisliği(Tezli Yüksek Lisans)', 'Bilgisayar Mühendisliği /Computer Engineering', 'Bilgisayar Mühendisliği Yuksek Lisans', 'Bilgisayar Müh.', 'Bilgisayar Mühendisliği A.B.D', 'Bilgisayar Mühendisliği(Double Major)', 'Bilgisayar Mühendisliği (1. Anadal)', 'Computer Engineering (English)', 'Computer Enginnering', 'Computer Engineering Department', 'Computer Enginering', 'Computer Engineering (100% English)', 'Computer Engineering, 3.74/4', 'Computer Enginner', 'Computer Engineering (%100 English)', 'Computer Engineering (Double Major)', 'Master of Computer Engineering', 'Master of Science (MSc), Computer Engineering', 'Control and Computer Engineering', 'Informatics and Computer Engineering', "Bachelor's Degree, Computer Engineering", 'Computer Engineering & Information Science', 'Computer Engineering (Double Major with Management Engineering )', 'Control & Computer Engineering', 'Computer Engineering and Information Science', 'Master of Computer Applications (M.C.A.), Computer Engineering', 'Electronics & Computer Engineering', 'Computer Engineering (Eng)', 'Computer Sciences Department  / Master of Computer Engineering', 'Bachelor of Science (B.Sc.), Computer Engineering', 'Control&Computer Engineering', "Master's Degree Computer Engineering", 'Computer Engineering and Informatics', 'Computer Engineering/Science', 'Engineering Faculty Computer Engineering', 'Computer Engineering, MSc', 'Bachelor of Science (BSc), Computer Engineering', 'Bachelor of Science (BS), Computer Engineering', 'Computer Engineering, Cyber Security', 'Bachelor of Engineering (B.E.), Computer Engineering', 'Computer Engineering (Full Scholarship)', 'Computer Engineering Dept', 'Computer Engineering with 100% scholarship', 'Computer Engineering,%75 Scholarship']), 'fields_of_study'] = "Computer Engineering"

#Management Information Systems
df.loc[df['fields_of_study'].isin(['YÖNETİM BİLİŞİM SİSTEMLERİ', 'Yönetim Bilişim Sistemleri, Genel', 'Yönetim bilişim sistemleri', 'Yönetim Bilişim Sistemleri (MIS)', 'Yönetim Bilişim Sistemleri ve Mühendisliği', 'İşletme Yönetimi , Yönetim Bilişim Sistemleri', 'Yönetim Bilişim sistemleri', 'MIS - Yönetim Bilişim Sistemleri', 'Yönetim Bilişim Sistemleri Mühendisliği', 'Yönetim Bilişim Sistemleri Yüksek Lisans', 'Yönetim Bilişim Sistemleri(MIS)', 'MBIS-Yönetim Bilişim Sistemleri', 'Yönetim Bilişim Sistemleri (Management of Information Systems-MIS)', 'Yönetim Bilişim Sistemleri/Açık ve Uzaktan Eğitim Fakültesi', 'Yönetim Bilişim Sistemleri ve Mühendisliği (İngilizce)', 'Yönetim Bilişim Sistemleri, Yüksek Lisans', 'Yönetim Bilişim Sistemleri (Almanca)', 'Yönetim Bilişim Sistemleri ve Mühendisliği(İngilizce)', 'Yüksek Lisans (Master), Yönetim Bilişim Sistemleri', 'Yönetim Bilişim Sistemleri Tezsiz Yüksek Lisans', 'Yönetim Bilisim Sistemleri', 'Yönetim Bilişim Sistemleri (Alm.)', 'Mühendislik Fakültesi, MIS - Yönetim Bilişim Sistemleri (Yüksek Lisans)', 'M.Sc, Yönetim Bilişim Sistemleri', 'Yüksek Lisans - Yönetim Bilişim Sistemleri', 'Yönetim Bİlişim Sİstemleri', 'Yönetim Bilişim Sistemeleri', 'MIS Yönetim Bilişim Sistemleri', 'Yönetim Bilişim Sistemleri - Yüksek Lisans', 'MIS - Yönetim Bilişim Sistemleri (Computer science)', 'Yönetim Bilişim Sistemleri Lisans', 'Yönetim Bilişimleri Sistemi', '(MIS) Yönetim Bilişim Sistemleri', 'Açıköğretim Fakültesi - Yönetim Bilişim Sistemleri', 'Sosyal Bilimler Enstitüsü, MIS - Yönetim Bilişim Sistemleri', 'Yönetim Bilişim Sistemleri - MIS (Almanca)', 'Yönetim Bilişim Sistemler', 'Yönetim Bilişim Sistemleri (Açık ve Uzaktan Eğitim Fakültesi)', 'Yönetim Bilişim Sistemleri - Uzaktan Öğretim', 'İktisadi ve İdari Bilimler Fakültesi - Yönetim Bilişim Sistemleri', 'MIS-Yönetim Bilişim Sistemleri', 'Yönetim Bilişim Sistemeri', 'Yönetim Bilişim Sistemleri-MIS', 'Bilişim Teknolojileri - Veritabanı Yönetim Bilişim Sistemleri', 'Mühendislik Fakültesi Yönetim Bilişim Sistemleri', 'Yönetim Bilişim Sistemleri, GPA: 3.35/4.00', 'Business Informatics / Wirtschaftsinformatik / Yönetim Bilişim Sistemleri', 'Management Information Systems ( Yönetim Bilişim Sistemleri )', 'Yönetim Bilişim Sistemleri(Managemet Information Systems)', 'Yönetim Bilişim Sistemleri - MIS Yüksek Lisans', 'Yönetim Bilişim Sİstemleri', 'Yönetim Bilişim Sistemleri | MIS', 'Yönetim bilişim sistemleri(MIS)', 'Yönetim Bilişim Sistremleri']), 'fields_of_study'] = "Management Information Systems"

#Software Engineering        
df.loc[df['fields_of_study'].isin(['Yazilim Muhendisligi', "yazilim muh", "Software Engineering, Honor Student", "Software Engineer", "software engineering", "M.Sc. in Software Engineering", 'software engineer', 'MSc, Software Engineering', 'MS, Software Engineering', 'yazılım mühendisliği', 'yazılım mühendisliği ', 'yazılım mühendiliği', 'yazılım mühendisligi', 'yazılım mühendisi', 'Yazılım mühendisliği', 'Yazılım mühendisliği ', 'Yazılım mühendisi', 'Yazılım Mühendisi', 'Yazılım Mühendisliği ', 'Yazılım Mühendislik', ' Yazılım Mühendisliği', 'Yazılım Mühendisliği / Software Engineering', 'Yazılım Mühendisliği (Çift Anadal)', 'Yazılım Mühendisliği Yüksek Lisans Programı', 'Yazılım Mühendisliği ve Veri Bilimi', 'Yazılım Mühendisliği(İngilizce)', 'Yazılım Mühendisliği Software Engineering', 'Yazılım Mühendisliği MS', 'Yazılım Mühendisliği (UOLP)', 'Yazılım Mühendisliği (%100 Burslu)', 'Yazılım Mühendisliği(Software Engineering)', 'Bilgisayar Bilimleri Fakültesi, Yazılım Mühendisliği', 'Software Engineering - Yazılım Mühendisliği', 'Yazılım Müh.', 'Yazılım Mühendisliği (Uzaktan)', 'Yazılım Mühendisliği , Ortalama :  3.1 / 4', 'Yazılım Mühendisliği ( Ingilizce )', 'Yazılım Mühendisliği U.O.L.P (ingilizce)', 'Yazılım Mühendisliği (EN/TR)', 'Yazılım Mühendisliği(EN/TR), Comprehensive Scholarship (%100)', 'Yazılım Mühendisliği(English)', 'Bilgisayar Yazılım Mühendisliği', 'Yazılım Mühendisligi', 'Yazılım Mühendisliği; Bilgisayar Tek. ve Prog', 'Bilgisayar ve Yazılım Mühendisliği', 'Yazılım Mühendisliğ', 'Yazılım Mühendisliği (Software Engineering)', 'Sofware Developer', 'Sofware Engineer', 'Sofware Engineering', 'YAZILIM MÜHENDİSLİĞİ', 'yazılım Mühendisliği', 'yazilim mühendisliği yandal', 'Yazilim Mühendisliği', 'Yazilim Mühendisligi', 'yazilim mühendisliği', 'Software Engineering, 3.52', 'Software Engineering (English)', 'Software Enginnering', 'Software Engineering , Graduated in 3rd place']), 'fields_of_study'] = "Software Engineering"

#Electronics and Communication Engineering
df.loc[df['fields_of_study'].isin(['Electronics and Communication Engineering', 'Electronics and Communication Engineer', 'Electronics And Communication Engineering', 'Master of Science (M.Sc.), Electronics and Communication Engineering', 'MSc, Electronics and Communication Engineering', 'Electronics Department / Electronics and Communication Engineering', 'Department of Electronics and Communication Engineering/Electronics', 'Faculty of Engineering, Electronics and Communication Engineering', 'M.Sc. in Electronics and Communication Engineering  Division.', 'B.Sc. in Electronics and Communication Engineering Division', 'Electrical, Electronics and Communication Engineering', 'BSc Electronics and Communication Engineering', 'Electronics and communication Engineer', 'Bachelor of Science (BS), Electronics and Communication Engineering', "Bachelor's Degree, Electronics and Communication Engineering", 'ELECTRONICS AND COMMUNICATION ENGINEERING', 'Faculty of Engineering-Electronics and Communication Engineering', 'B.Sc., Electronics and Communication Engineering', '3.15 / 4 - Bachelor of Science (BS), Electronics and Communication Engineering', 'Bachelor Degree of Electronics and Communication Engineering', 'Electronics and communication engineering', 'Electronics and Communication Engineering Program', 'Elektronik ve Haberlesme Muhendisligi, Electronics and Communication Engineering', 'Electronics and Communication Engineering (English)', 'Electronics and Communication Engineering │ Kocaeli, Turkey', 'Electronics Communication Engineering', 'Electronics & Communication Engineering', "Master's Degree, Electronics & Communication Engineering", 'Electronics & Communication Engineer', 'Elektronik ve haberleşme mühendisliği', 'Elektronik Ve Haberleşme Mühendisliği', 'Elektronik ve Haberleşme Mühendisliği, Bilgisayar Mühendisliği', 'Elektronik Ve Haberleşme Mühendisi', 'Elektronik ve haberleşme mühendisligi', 'Elektronik ve Haberleşme müh.', 'Elektronik ve haberleşme mühendisi', 'Lisans - Elektronik ve HAberleşme Mühendisliği', 'Elektronik ve Haberleşme Mühendisliği(%100 İngilizce)', 'elektronik ve Haberleşme müh', 'Elektronik ve Haberleşme Mühendisliği (%30 İngilizce)', 'Elektronik ve Haberleşme Mühendisliği - 2.81', 'Elektronik ve haberleşme Mühendisliği', 'Elektronik ve Haberleşme Mühendisliğ', 'Elektronik ve Haberleşme Mühedisliği', 'Elektronik ve Haberleşme Mühendisliği - Haberleşme (Tezli)', 'Elektronik ve Haberleşme Mühendisliği / Elektronik', 'Elektronik ve Haberleşme Mühendisliği / Hazırlık', 'Elektronik ve Haberleşme Mühendisliği (%100 İngilizce)', 'Elektronik ve Haberleşme Müh', 'Elektronik ve Haberleşme Mühendisl', 'Elektronik ve haberleşme Mühendisliği Yüksek Lisans', 'Elektronik ve Haberleşme Mühendisliği.', 'Elektronics and Communication Engineering', 'Elektronics and Communication Enginnering', 'Elektronics and Telecommunication Engineering', 'Elektronics and Communications Engineering', 'Elektronics and Communication engineering', 'electronics and communication engineering', 'electronics and Communication Engineer', 'electronics and commnication engineering', 'Electronics and Communications Engineering', 'elektronik ve haberleşme mühendisliği', 'elektronik ve haberleşme ', 'elektronik ve haberlesme muhendisligi', 'elektronik ve haberleșme mühendisliği', 'elektronik ve haberleşme mühendisi', 'elektronik ve haberleşme mühendisi ', 'elektronik ve haberleşme mühendisligi', 'ELEKTRONİK VE HABERLEŞME MÜHENDİSLİĞİ', 'ELEKTRONİK VE HABERLEŞME', 'ELEKTRONİK HABERLEŞME', 'ELEKTRONİK HABERLEŞME MÜHENDİSLİĞİ', 'ELEKTRONİK HABERLEŞME TEKNOLOJİSİ', 'Electronic and Communication Eng.', 'Electronics and Communication Eng.', 'Electronics&Communication Eng.', 'Electrical and Electronics Faculty, Electronics and Communication Eng. (Turkish)', 'Electronics & Telecommunications Engineering', 'Electrics and Electronics Engineering', 'Elektronik ve Haberleşme Mühendisliği', 'Elektronik ve Haberleşme Mühendisi', 'Elektronik ve Haberleşme Mühendisliği ', 'Elektronik ve Haberleşme', 'Elektronik ve Haberleşme mühendisliği', 'Elektronik ve Haberleşme Müh.', 'Elektronik ve Haberleşme Teknolojisi', 'Elektrik, Elektronik ve Haberleşme Mühendisliği', ' Elektronik ve Haberleşme Mühendisliği', 'Elektronik ve Habeleşme Mühendisliği', 'Elektronik ve Haberleşme Mühendisliği (İngilizce)', ' Elektronik ve Haberleşme Mühendisliği, Bilgisayar Mühendisliği', 'Elektronik ve Haberlesme Muhendisligi', 'Elektronik ve Haberleşme Mühendisligi', 'Elektronik ve Haberlesme Muh.', 'Lisans (BSc), Elektronik ve Haberleşme Mühendisliği', 'Yüksek Lisans (MSc), Elektronik ve Haberleşme Mühendisliği, Elektronik Programı',]), 'fields_of_study'] = "Electronics and Communication Engineering"

#Mechanical Engineering
df.loc[df['fields_of_study'].isin(['Makina Mühendisliği', 'Makina Mühendisi', 'makina mühendisliği (İng)', 'makina mühendisi', 'Mechanical Engineering', 'Mechanical Engineer', 'Mechanical Engineering - Heat Process', 'MECHANICAL ENGINEERING', 'Mechanical engineer', 'Mechanical engineering', 'Mechanical Engineering Department', 'Mechanical Engineering - Thermodynamics', "Bachelor's Degree, Mechanical Engineering", 'mechanical engineering', 'Makine Mühendisliği/Makine Teknolojisi/Teknisyen', 'Makine Mühendisliği İle İlgili Teknolojiler', 'Makine Mühendisliği Yüksek Lisans', 'Makine Mühendisliği İle İlgili Teknolojiler/Teknisyen', 'Makine Mühendisi', 'Makine Mühendisliği Konstrüksiyon ve İmalat Ana Bilim Dalı', 'Makine Mühendisliği / Malzeme ve İmalat', 'Makine Mühendisliği, (Terk)', 'Makine Mühendisliği/Sistem Dinamiği ve Kontrol', 'Mühendislik Mimarlık Fakültesi Makine Mühendisliği Bölümü', 'Makine Mühendisliği(İngilizce Eğitim Dili)', 'Makine Mühendisliği (İngilizce)', 'Makine Mühendisliği - Yüksek Lisans', 'Makine Mühendisliği/Makine Teknolojisi', 'Makine Mühendisliği/ Isı Proses', 'Makine mühendisliği', 'Makine Mühendisliği / Mechanical Engineering', 'Makine Mühendisliği, 3.62', 'Makine Mühendisliği Isı Proses', 'makine mühendisliği', 'Fen Bilimleri Enstitüsü Makine Mühendisliği', 'Makine Mühendisliği Bölümü - Otomotiv Mühendisliği Programı (ING)', 'Makine Mühendisliği Otomotiv Programı']), 'fields_of_study'] = "Mechanical Engineering"

#Computer Programming
df.loc[df['fields_of_study'].isin(['Bilgisayar Programlama/Programcı', 'Bilgisayar Programciligi', 'Bilgisayar Programlamcılığı', 'Bilgisayar Programcılğı', 'Bilgisayar Programcılığı ve Teknolojisi', 'Bilgisayar Programcılığı ve Sistem Analistliği', 'Bilgisayar Programcısı', 'Bilişim Teknolojileri, Bilgisayar Programlama', 'Teknik Bilimler Meslek Yüksek Okulu - Bilgisayar Programcılığı', 'Bilgisayar Programcılığı ve Teknolojileri', 'Bilgisayar Programcığı', 'Bilgisayar Programlama/Programcı, Yazılım Geliştirme Veritabanı Yönetim Uzmanı', 'Bilgisayar Programcılığı / Yazılım', 'Bilgisayar Programlama (Uzaktan Eğitim)', 'Bilgisayar programcılığı.', 'Bilgisayar Programlama ve Teknolojileri', 'Bilgisayar Programcılığı - Computer Programming', 'Bilgisayar Programlama Bölümü', 'Bilgisayar Programcılığı ( Computer Programming )', 'Bİlgisayar Programcılığı', 'Bilgisayar Programlama,', 'Bilgisayar Programcılığı / Computer Programmer', 'Bilgisayar Programcılığı Mezunu', 'Bilgisayar Programcılığı Bölümü', 'Bilgisayar Prog.', 'Bilgisayar Programcılığı (Uzaktan Eğitim)', 'Bilgisayar Programlama/Programcı, Uzaktan Eğitim', 'Bilgisayar Progrmcılığı', 'Önlisans - Bilgisayar Programcılığı', 'Bilgisayar Programcılığı, GPA: 3.72/4.00', 'Bilgisayar Programcılığı, GPA: 3.35/4.00 - Onur Öğrencisi olarak mezun oldum.', 'Bilgisayar programcılıgı', 'Bilgisayar Programcılığı  U.E.', 'Bilgisayar Programcılığı(İngilizce)', 'Bilgisayar Programclığı', 'Bilgisayar Programlama', 'bilgisayar programcılığı', 'Bilgisayar Programcılığı', 'bilgilsayar programcılığı', 'BİLGİSAYAR PROGRAMCILIĞI', 'BİLGİSAYAR PROGRAMCILIĞI - ÖN LİSANS', 'Bilgisayar Programcılığı / Bilişim', 'Bilgisayar programcılıgı', 'Bilgisayar Programcılığı  U.E.', 'Bilgisayar Programlama 3.19 / 4.0', 'Önlisans, Bilgisayar Programlama', 'Bilgisayar Programcılıgı', 'Computer Programmer', 'Computer programming', 'Computer Programing', "Computer Programming/Programmer, General", "Computer Programming / Programmer, General", "Computer Programming/Programmer", 'Computer programmer', 'Commputer Programmer', 'Computer Programming,Programmer', 'COMPUTER PROGRAMMER', 'Computer Programming/Programmer,']), 'fields_of_study'] = "Computer Programming"

#Control and Automation Engineering
df.loc[df['fields_of_study'].isin(['Kontrol ve Otomasyon Mühendisliği', 'Kontrol Ve Otomasyon Mühendisliği', 'Kontrol ve otomasyon mühendisliği', 'Kontrol ve Otomasyon mühendisliği', 'Kontrol ve Otomasyon Mühendisliği (İngilizce)', 'Kontrol ve Otomasyon Mühendisi', 'kontrol ve otomasyon mühendisliği,3.25', 'Kontrol ve Otomasyon müh.', 'Kontrol ve Otomasyon Mühendisliği (%100 İngilizce)', 'kontrol ve otomasyon mühendisliği', 'Control and Automation Engineer',
'Control And Automation Engineering', 'Control and Automation Engineering (EN)', 'Control and Automation Enginering', "Bachelor's Degree, Control and Automation Engineering", 'Control and Automation Engineering  ( English )', 'Control and automation engineering', 'control and automation engineering', 'Master of Science (M.Sc.), Control and Automation Engineering', 'Electrical and Electronic Faculty/Control And Automation Engineering(Honor List)', 'Control and Automation Engineering ( %100 English )', 'Control and Automation Engineering (English)', 'Control and Automation Engineering (Double Major)', 'Master of Science (MSc), Control and Automation Engineering', 'Control and Automation Engineering (100% English Program)', 'Control and Automation Enigineering', 'Bachelor of Science (BSc), Control and Automation Engineering', 'Doctor of Philosophy (PhD), Control and Automation Engineering', 'Control and Automation Enginnering',]), 'fields_of_study'] = "Control and Automation Engineering"

#Civil Engineering
df.loc[df['fields_of_study'].isin(['İnşaat Mühendisliği', 'İnşaat Mühendisliği Teknolojisi/Teknisyen', 'İnşaat Müh.', 'İnşaat Mühendisliği (EN)', 'İnşaat Mühen', 'Civil Engineer / İnşaat Mühendisliği', 'İnşaat Mühendisliği Teknolojileri/Teknisyen', 'İnşaat Mühendisliği(ing)', 'İnşaat Mühendisliği B.S.C.E', 'İnşaat Mühendisliği Yapı', 'İNŞAAT MÜHENDİSİ', 'İnşaat Mühendisliği Malzeme Anabilim Dalı Master Programı', 'İnşaat Mühendisi', 'İnşaat Mühendisliği M.S.C.E','İnşaat Mühendisliği/Ulaştırma', 'İnşaat Mühendisliği (İngilizce)', 'İnşaat mühendisliği', 'Civil Engineer / İnşaat Mühendisliği', 'Civil Engineering Depertmant', 'Civil Engineering (100% English)', 'Bachelor of Science (B.Sc), Civil Engineering', 'civil engineering', 'Civil Engineer',]), 'fields_of_study'] = "Civil Engineering"

#Mechatronics Engineering
df.loc[df['fields_of_study'].isin(['Mechatronics Engineer', 'Mechatronics engineering', 'Mechatronics Enginnering', 'Mechatronics Engineering | High Honor Student', 'Mechatronics Engineering | Graduated With The First Degree', 'Mechatronics Engineering (Full Scholarship)', 'Mechatronics Engineering and Computer Science', 'Mechatronics Engineering (English)', 'Mechatronics Engineering (Eng.)', 'Mechatronics Engineering  (Double Major Program )', 'Mekatronik Mühendisliği', 'Mekatronik mühendisliği', 'mekatronik mühendisliği', 'Mekatronik Mühendisliği (İngilizce)', 'Mekatronik Mühendisliği Yüksek Lisans Programı', 'Mekatronik Mühendisi', 'Mekatronik Mühendislği', 'Mekatronik Mühendisliği Anabilim Dalı', 'mekatronik mühendisi', 'Teknoloji Fakültesi- Mekatronik Mühendisliği (M.T.O.K)', 'Yüksek Lisans (Master) - Mekatronik Mühendisliği', 'Mekatronik Mühendisliği (çift ana dal)', 'mekatronik mühendisi,3.03/4', 'Mekatronik Mühendisliği (%50 Burslu, %100 İngilizce)']), 'fields_of_study'] = "Mechatronics Engineering"

#Food Engineering
df.loc[df['fields_of_study'].isin(['Food Engineering', 'Food Engineer', 'Food engineering', 'Food Engineer Departmant', 'food engineering', 'FOOD ENGINEER', 'Food Engineering (50% Schoolar ship)', 'Food engineer', 'BSc, Food Engineering (100% English)', 'MSc, Food Engineering', 'Food Engineering-Nutrition', 'Engineering faculty, Food Engineering Department', 'Food Engineering Faculty', 'Food Engineering / Food Technology', 'Food Engineering - Double Major', 'Chemical Food Engineering', 'FOOD ENGINEERING', 'Food Engineering Department', 'Food Engineering (Yarıda Bırakıldı,Abandoned)', 'Department of Food Engineering', 'Gıda Mühendisliği (Food Engineering)', 'Food Engineering-Food Quality Control', 'İzmir ,Faculty of Engineering ,Department of Food Engineering', 'MSc. Food Engineering, Erasmus Student', '3,27/ Food Engineering', 'Food Engineering-100%  English', 'Bachelor of Science (BSc), Food Engineering', 'BSc. Food Engineering', 'MSc. Food Engineering', 'Food Engineering (English)', 'Master of Food Engineering', 'Gıda Mühendisliği / Food engineering', 'Gıda Mühendisliği', 'Gıda Mühendisi', 'Gıda mühendisliği', 'gıda mühendisliği', 'GIDA MÜHENDİSLİĞİ', 'Gıda mühendisi', 'Mühendislik Fakültesi Gıda Mühendisliği', 'Gida Mühendisliği', 'Gıda Mühendissliği', 'Gıda Mühendsiliği', 'Gıda Mühendisliği Bölümü', 'Gıda Mühendisliği / Food engineering', 'gıda mühendisi']), 'fields_of_study'] = 'Food Engineering'

#Mechatronics, Robotics and Automation Engineering
df.loc[df['fields_of_study'].isin(['Mechatronics, Robotics, and Automation Engineering', 'Mechatronics, Robotics and Automation Engineering', 'Mechatronics, Robotics, and Automation', 'Mechatronics, Robotics and Automation Engineering  3.03/4', 'Mechatronics, Robotics and Automation', 'Mechatronics, Robotics, Automation', 'Mechatronics Robotics and Automation Engineering', 'Mekatronik, Robotik ve Otomasyon Mühendisliği']), 'fields_of_study'] = 'Mechatronics, Robotics and Automation Engineering'

#Information Systems Engineering
df.loc[df['fields_of_study'].isin(['Information Systems Engineering', 'Information Systems Engineer', 'İnformation Systems Engineer', 'Information systems engineering', 'information systems engineering', 'Information Systems Enginerring', 'Faculty of Computer and Informatics  /\tInformation Systems Engineering', 'Information Systems Engineering - %100 English', 'Information Systems Engineering(DDP)', 'Information Systems Engineering(SUNY)', 'Bachelor of Science in Information Systems Engineering', 'Information Systems Enginnering', 'information Systems Engineering', 'information systems enginneer', 'information systems engineer', 'İnformation systems engineer', 'Bilişim Sistemleri Mühendisliği', 'Bilişim sistemleri mühendisliği', 'bilişim sistemleri mühendisliği', 'Bilişim Sistemleri Mühendisi', 'Bilgisayar ve Bilişim Sistemleri Mühendisliği', 'Bilişim Sistemleri Mühendisliği (Tezli)', 'Bilişim Sistemleri mühendisliği', 'Bilişim Sistemleri Müh.', 'Bilişim Sİstemleri Mühendisliği', 'Bilişim Sistemleri Mühendisliği  ABD Yüksek Lisans', 'BİLİŞİM SİSTEMLERİ MÜHENDİSLİĞİ', 'Bilişim Sistemleri Mühendislik', 'Bilişim Sistemleri Mühensiliği']), 'fields_of_study'] = 'Information Systems Engineering'

#Physics Engineering
df.loc[df['fields_of_study'].isin(['Physics Engineer', 'Faculty of Engineering  Department of Physics Engineering.', 'geophysics engineering', 'Geophysics Engineering', 'Physics Engineering (%100 English)', 'Physics Engineerng', 'Faculty of Engineering, Physics Engineering', 'Fizik Mühendisliği / Physics Engineering', 'Physics Eng. Msc.', "Fizik Mühendisliği"]), 'fields_of_study'] = 'Physics Engineering'

#Electrical, Electronics and Communications Engineering
df.loc[df['fields_of_study'].isin([["Bachelor's degree, Electrical, Electronics and Communications Engineering", 'Electrical, Electronics and Communications Engineering, Wireless Communication', 'Electrical, Electronics and Communications Engineering (BSc)', 'Electrical, Electronics and Communications Engineer', "Electrical, Electronics and Communications Engineering - Bachelor's Level", 'Elektrik, Elektronik ve İletişim Mühendisliği', 'Elektrik, Elektronik ve İletişim Mühendisliği Teknolojileri/Teknisyen', "Elektrik, Elektronik ve İletişim Mühendisliği"]]), 'fields_of_study'] = 'Electrical, Electronics and Communications Engineering'

#Electrical Engineering
df.loc[df['fields_of_study'].isin(['Electrical Engineer', 'Electrical Engineering Technologies/Technicians', 'Electrical Enginnering', 'Electrical engineer', 'Electrical engineering', 'electrical engineering', 'Electrical Engineering, B.Sc.', 'Electric-Electrical Engineering', 'Electrical Eng.']), 'fields_of_study'] = 'Electrical Engineering'

df.loc[df['fields_of_study'] == '-', 'fields_of_study'] = np.nan
df.loc[df['fields_of_study'] == '--', 'fields_of_study'] = np.nan

df.loc[df['fields_of_study'].notnull(), 'fields_of_study'] = df.loc[df['fields_of_study'].notnull(), 'fields_of_study'].apply(lambda x: x.replace('Eng.', 'Engineering'))
df.loc[df['fields_of_study'].notnull(), 'fields_of_study'] = df.loc[df['fields_of_study'].notnull(), 'fields_of_study'].apply(lambda x: x.replace('eng.', 'Engineering'))
df.loc[df['fields_of_study'].notnull(), 'fields_of_study'] = df.loc[df['fields_of_study'].notnull(), 'fields_of_study'].apply(lambda x: x.replace('Sci.', 'Science'))

In [14]:
study_keywords = ['ingilizce', 'mühendis', 'matematik', 'kimya', 'fizik', 'istatisik', 'biyoloji', 'yönetim', 'bilişim', 'sistem', 'yazılım', 'veri', 'bilgisayar', 'bilim', 'endüstri', 'gıda', 'elektrik', 'ekonomi']

study_translated = dict()
for kw in study_keywords:
    print(f'keyword: {kw}')
    for i in tqdm(df.loc[df['fields_of_study'].astype(str).str.contains(kw, case = False), 'fields_of_study'].unique()):
        if i not in study_translated.keys():
            try:
                study_translated[i] = translator.translate(i)
            except:
                study_translated[i] = i

for key in study_translated.keys():
    df.loc[df['fields_of_study'] == key, 'fields_of_study'] = study_translated[key]

df.loc[df['fields_of_study'].notnull(), 'fields_of_study'] = df.loc[df['fields_of_study'].notnull(), 'fields_of_study'].apply(lambda x: x.lower())
df.loc[df['fields_of_study'].notnull(), 'fields_of_study'] = df.loc[df['fields_of_study'].notnull(), 'fields_of_study'].apply(lambda x: translation(x))

print(f'education data shape: {df.shape}')
print(f'degree classes: {df["degree"].nunique()}')
print(f'school_name classes: {df["school_name"].nunique()}')
print(f'fields_of_study classes: {df["fields_of_study"].nunique()}')
df.to_csv('../../../datasets/garanti-bbva-data-camp/clean_education_v2.csv', index = False)

keyword: ingilizce


100%|██████████| 110/110 [01:24<00:00,  1.30it/s]


keyword: mühendis


100%|██████████| 521/521 [05:45<00:00,  1.51it/s]


keyword: matematik


100%|██████████| 85/85 [00:50<00:00,  1.68it/s]


keyword: kimya


100%|██████████| 48/48 [00:24<00:00,  1.96it/s]


keyword: fizik


100%|██████████| 40/40 [00:14<00:00,  2.72it/s]


keyword: istatisik


0it [00:00, ?it/s]


keyword: biyoloji


100%|██████████| 24/24 [00:13<00:00,  1.77it/s]


keyword: yönetim


100%|██████████| 236/236 [02:39<00:00,  1.48it/s]


keyword: bilişim


100%|██████████| 229/229 [02:13<00:00,  1.71it/s]


keyword: sistem


100%|██████████| 184/184 [01:06<00:00,  2.77it/s]


keyword: yazılım


100%|██████████| 109/109 [01:10<00:00,  1.54it/s]


keyword: veri


100%|██████████| 105/105 [00:30<00:00,  3.49it/s]


keyword: bilgisayar


100%|██████████| 462/462 [03:01<00:00,  2.55it/s]


keyword: bilim


100%|██████████| 263/263 [01:53<00:00,  2.31it/s]


keyword: endüstri


100%|██████████| 70/70 [00:35<00:00,  1.99it/s]


keyword: gıda


100%|██████████| 13/13 [00:05<00:00,  2.18it/s]


keyword: elektrik


100%|██████████| 130/130 [00:59<00:00,  2.18it/s]


keyword: ekonomi


100%|██████████| 32/32 [00:10<00:00,  2.98it/s]


education data shape: (142575, 4)
degree classes: 2833
school_name classes: 8630
fields_of_study classes: 8198


In [15]:
#study_translated = dict()
#for i in tqdm(df['fields_of_study'].dropna().unique()):
#    try:
#        study_translated[i] = translator.translate(i)
#    except:
#        study_translated[i] = i
#for key in study_translated.keys():
#    df.loc[df['fields_of_study'] == key, 'fields_of_study'] = study_translated[key]
#df.loc[df['fields_of_study'].notnull(), 'fields_of_study'] = df.loc[df['fields_of_study'].notnull(), 'fields_of_study'].apply(lambda x: x.lower())
#    
#print(f'education data shape: {df.shape}')
#print(f'degree classes: {df["degree"].nunique()}')
#print(f'school_name classes: {df["school_name"].nunique()}')
#print(f'fields_of_study classes: {df["fields_of_study"].nunique()}')
#df.head()

In [16]:
#df.to_csv(output_path, index = False)

In [17]:
#df.loc[df['fields_of_study'].astype(str).str.contains('Math', case=False, regex=False), 'fields_of_study'].value_counts().keys().tolist()

In [18]:
#df.loc[df['fields_of_study'].astype(str).str.contains('computer', case = False), 'fields_of_study'].value_counts().keys().tolist()[:20]

In [19]:
#for i in df.loc[df['school_name'].astype(str).str.contains('Lisesi'), 'school_name'].value_counts()[:50].keys():
#    translated = GoogleTranslator(source='auto', target='en').translate(i)
#    if df.loc[df['school_name'] == translated].shape[0] != 0:
#        print(f'df.loc[df["school_name"] == "{i}", "school_name"] = "{translated.title()}"')

In [20]:
#a = pd.DataFrame({'text': ['highschool', 'High school', 'High-school', 'High*School']})
#
#a.loc[a['text'].str.contains('high[\W\s]school', case=False, regex=True)]

In [21]:
#import re
#
#text = "The quick-brown fox jumps over the lazy dog."
#
#re.search(r'quick[\W\s]+fox', text)
#
#
#

In [22]:
#df['degree'].value_counts()[:20]

In [23]:
#s = 'Ümer'
#s_encoded = s.encode('utf-8')
#print(s_encoded)