In [40]:
import numpy as np
import random
import torch
import torch.nn as nn
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
import pandas as pd

In [96]:
data = pd.read_csv("data1.csv")

In [97]:
data.head()

Unnamed: 0,STUDENT ID,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,COURSE ID,GRADE
0,STUDENT1,2,2,3,3,1,2,2,1,1,...,1,1,3,2,1,2,1,1,1,1
1,STUDENT2,2,2,3,3,1,2,2,1,1,...,1,1,3,2,3,2,2,3,1,1
2,STUDENT3,2,2,2,3,2,2,2,2,4,...,1,1,2,2,1,1,2,2,1,1
3,STUDENT4,1,1,1,3,1,2,1,2,1,...,1,2,3,2,2,1,3,2,1,1
4,STUDENT5,2,2,1,3,2,2,1,3,1,...,2,1,2,2,2,1,2,2,1,1


In [98]:
# 1-10 of the data are the personal questions, 11-16. questions include family questions, 
# and the remaining questions include education habits.

rename_dict = {
    '1': 'Age',
    '2': 'Sex',
    '3': 'Grad. Highschool type',
    '4': 'Scholarship',
    '5': 'Additional work',
    '6': 'Regular artistic or sports activity',
    '7': 'Partner',
    '8': 'Total salary if available',
    '9': 'Transport to university',
    '10': 'Accommodation type in Cyprus',
    '11': 'Mother-education',
    '12': 'Father-education',
    '13': '# of siblings',
    '14': 'Parental Status',
    '15': 'Mother-occupation',
    '16': 'Father-occupation',
    '17': 'Weekly hours study',
    '18': 'reading non-sci freq.',
    '19': 'reading sci freq.',
    '20': 'Attendance to the conferences',
    '21': 'Impact of your projects',
    '22': 'Class attendence',
    '23': 'Midterm study (alone / withfriends)',
    '24': 'Midterm study (close to exam / regularly)',
    '25': 'Taking notes in classes',
    '26': 'Listening in classes',
    '27': 'Discussion improves my interest and success',
    '28': 'Flip-classroom useful',
    '29': 'CGPA last semester',
    '30': 'Expected CGPA in grad.',
    
}
data.rename(columns=rename_dict, inplace=True)

In [99]:
mappings = {
    'Age': {1: '18-21', 2: '22-25', 3: '26+'},
    'Sex': {1: 'Female', 2: 'Male'},
    'Grad. Highschool type': {1: 'private', 2: 'state', 3: 'other'},
    'Scholarship': {1: 'None', 2: '25%', 3: '50%', 4: '75%', 5: 'Full'},
    'Additional work': {1: 'Yes', 2: 'No'},
    'Regular artistic or sports activity': {1: 'Yes', 2: 'No'},
    'Partner': {1: 'Yes', 2: 'No'},
    'Total salary if available': {1: 'USD 135-200', 2: 'USD 201-270', 3: 'USD 271-340', 4: 'USD 341-410', 5: 'above 410'},
    'Transport to university': {1: 'Bus', 2: 'Private car/taxi', 3: 'bicycle', 4: 'Other'},
    'Accommodation type in Cyprus': {1: 'rental', 2: 'dormitory', 3: 'with family', 4: 'Other'},
    'Mother-education': {1: 'primary school', 2: 'secondary school', 3: 'high school', 4: 'university', 5: 'MSc.', 6: 'Ph.D.'},
    'Father-education': {1: 'primary school', 2: 'secondary school', 3: 'high school', 4: 'university', 5: 'MSc.', 6: 'Ph.D.'},
    '# of siblings': {1: '1', 2: '2', 3: '3', 4: '4', 5: '5 or above'},
    'Parental Status': {1: 'married', 2: 'divorced', 3: 'died - one of them or both'},
    'Mother-occupation': {1: 'retired', 2: 'housewife', 3: 'government officer', 4: 'private sector employee', 5: 'self-employment', 6: 'other'},
    'Father-occupation': {1: 'retired', 2: 'government officer', 3: 'private sector employee', 4: 'self-employment', 5: 'other'},
    'Weekly hours study': {1: 'None', 2: '<5 hours', 3: '6-10 hours', 4: '11-20 hours', 5: 'more than 20 hours'},
    'reading non-sci freq.': {1: 'None', 2: 'Sometimes', 3: 'Often'},
    'reading sci freq.': {1: 'None', 2: 'Sometimes', 3: 'Often'},
    'Attendance to the conferences': {1: 'Yes', 2: 'No'},
    'Impact of your projects': {1: 'positive', 2: 'negative', 3: 'neutral'},
    'Class attendence' : {1: 'always', 2: 'sometimes', 3: 'never'},
    'Midterm study (alone / withfriends)': {1: 'alone', 2: 'friends', 3: 'N/A'},
    'Midterm study (close to exam / regularly)': {1: 'close', 2: 'regularly', 3: 'never'},
    'Taking notes in classes': {1: 'never', 2: 'sometimes', 3: 'always'},
    'Listening in classes' : {1: 'never', 2: 'sometimes', 3: 'always'},
    'Discussion improves my interest and success': {1: 'never', 2: 'sometimes', 3: 'always'},
    'Flip-classroom useful': {1: 'not useful', 2: 'useful', 3: 'N/A'},
    'CGPA last semester': {1: '<2.00', 2: '2.00-2.49', 3: '2.50-2.99', 4: '3.00-3.49', 5: 'above 3.49'},
    'Expected CGPA in grad.': {1: '<2.00', 2: '2.00-2.49', 3: '2.50-2.99', 4: '3.00-3.49', 5: 'above 3.49'},
    'COURSE ID': {1: 'not useful', 2: 'useful', 3: 'N/A'},
    'GRADE': {0: 'Fail', 1: 'DD', 2: 'DC', 3: 'CC', 4: 'CB', 5: 'BB', 6: 'BA', 7: 'AA'}
}

# The code to apply these mappings would be similar to before:
for column, mapping in mappings.items():
    data[column] = data[column].map(mapping)

In [100]:
data.head()

Unnamed: 0,STUDENT ID,Age,Sex,Grad. Highschool type,Scholarship,Additional work,Regular artistic or sports activity,Partner,Total salary if available,Transport to university,...,Midterm study (alone / withfriends),Midterm study (close to exam / regularly),Taking notes in classes,Listening in classes,Discussion improves my interest and success,Flip-classroom useful,CGPA last semester,Expected CGPA in grad.,COURSE ID,GRADE
0,STUDENT1,22-25,Male,other,50%,Yes,No,No,USD 135-200,Bus,...,alone,close,always,sometimes,never,useful,<2.00,<2.00,not useful,DD
1,STUDENT2,22-25,Male,other,50%,Yes,No,No,USD 135-200,Bus,...,alone,close,always,sometimes,always,useful,2.00-2.49,2.50-2.99,not useful,DD
2,STUDENT3,22-25,Male,state,50%,No,No,No,USD 201-270,Other,...,alone,close,sometimes,sometimes,never,not useful,2.00-2.49,2.00-2.49,not useful,DD
3,STUDENT4,18-21,Female,private,50%,Yes,No,Yes,USD 201-270,Bus,...,alone,regularly,always,sometimes,sometimes,not useful,2.50-2.99,2.00-2.49,not useful,DD
4,STUDENT5,22-25,Male,private,50%,No,No,Yes,USD 271-340,Bus,...,friends,close,sometimes,sometimes,sometimes,not useful,2.00-2.49,2.00-2.49,not useful,DD
