# Path

In [1]:
import sys
import os

project_path = r'/home/craxiss/Documents/projects/kapsul_etkinlik_v2'

os.chdir(project_path)
sys.path.append(project_path)

# Imports

In [2]:
from helpers import column_formatter, first_letter_id_generator
from charset_normalizer import detect
from config import data_path as path
from string import ascii_lowercase
from json import load, dump
import pandas as pd
import openpyxl

# Read

In [3]:
with pd.ExcelFile(path + '/2-cleaned/basvuru.xlsx') as appeals_xlsx:
    appeals = pd.read_excel(appeals_xlsx, index_col='Unnamed: 0', sheet_name='Başvurular')
    lessons_all = pd.read_excel(appeals_xlsx, index_col='Unnamed: 0', sheet_name='Dersler')


with pd.ExcelFile(path + '/2-cleaned/yoklama.xlsx') as rolls_xlsx:
    rolls_info = pd.read_excel(rolls_xlsx, 'General Info', index_col='Unnamed: 0')

    rolls = dict()
    for sheet in rolls_info.index:
        rolls[sheet] = pd.read_excel(rolls_xlsx, sheet)

# Concatting

## Get Known Genders

In [4]:
genders = dict()
for sheet in rolls:
    df_temp = rolls[sheet]
    for name, gender in zip(df_temp['name'], df_temp['gender']):
        genders[name] = gender

## Concat Genders & Information

### Appeals

In [5]:
information = dict()
duplicate_person = list()
for name, phone, mail in zip(appeals['name'], appeals['phone'], appeals['mail']):
    
    try:
        gender = genders[name]
    except KeyError:
        gender = float('NaN')

    personal = {
        'name': name,
        'phone': phone,
        'mail': mail,
        'gender': gender
    }

    if name not in information:
        information[name] = personal
    else:
        duplicate_person.append([information[name], personal])

In [6]:
len(information)

460

### Rolls

In [7]:
for sheet in rolls:
    df = rolls[sheet].copy()

    for name in df.name:
        personal = {
            'name': name,
            'phone': float('NaN'),
            'mail': float('NaN'),
            'gender': genders[name]
        }

        if name not in information:
            information[name] = personal


# Id

In [8]:
def first_letter_id_generator(name:str, hash_table:dict):
    try:
        letter = name[0]
    except IndexError:
        letter = name
    finally:
        id = ['22']
        try:
            id.extend(hash_table[letter])
        except KeyError:
            letter = letter.lower() # for "NaN"
            id.extend(hash_table[letter])
        finally:
            hash_table[letter][1] += 1

    id[1:]=(list(map(lambda x: str(x)[1:], id[1:])))

    return "".join(id), hash_table

In [None]:
lowercase_letters = list(ascii_lowercase)
lowercase_letters.extend([" ", ""])
letter_codes = list()

for i, letter in enumerate(lowercase_letters):
    letter_codes.append([101+i, 1001+i])

id_dict = dict()

for letter, code in zip(lowercase_letters, letter_codes):
    id_dict[letter] = code

id_information = dict()
for name, info in information.items():
    if type(name) == type(float('NaN')):
        name = ""
    id, id_dict = first_letter_id_generator(name, id_dict)
    id_information[id] = info

# DataFrame

In [None]:
information_df = pd.DataFrame(id_information).T

# Write

In [None]:
with pd.ExcelWriter(path+'3-result/information.xlsx', mode='w') as writer:
    information_df.to_excel(writer, sheet_name='Information')