# Path

In [1]:
import sys
import os

project_path = r'/home/craxiss/Documents/projects/kapsul_etkinlik_v2'

os.chdir(project_path)
sys.path.append(project_path)

# Import

In [2]:
from helpers import column_formatter, utf8_convert
from charset_normalizer import detect
from config import data_path as path
from itertools import combinations 
from datetime import datetime
from json import dump, load
import plotly.express as px
import pandas as pd
import warnings
import openpyxl

# Read

In [3]:
information = pd.read_excel(
    path+'/3-result/information.xlsx', index_col='Unnamed: 0')
events_per_person = pd.read_excel(
    path+'/3-result/events_per_person.xlsx', index_col='Unnamed: 0')
events = pd.read_excel(
    path+'/3-result/events.xlsx', index_col='Unnamed: 0')
ohe = pd.read_excel(
    path+'/3-result/event_per_person_ohe.xlsx', index_col='Unnamed: 0')


with pd.ExcelFile(path + '/2-cleaned/basvuru.xlsx') as appeals_xlsx:
    appeals = pd.read_excel(
        appeals_xlsx, index_col='Unnamed: 0', sheet_name='Başvurular')
    lessons_all = pd.read_excel(
        appeals_xlsx, index_col='Unnamed: 0', sheet_name='Dersler')


with pd.ExcelFile(path + '/2-cleaned/yoklama.xlsx') as rolls_xlsx:
    rolls_info = pd.read_excel(
        rolls_xlsx, 'General Info', index_col='Unnamed: 0')

    rolls = dict()
    for sheet in rolls_info.index:
        rolls[sheet] = pd.read_excel(rolls_xlsx, sheet)

information['id'] = information.index

# Appeals

In [4]:
events_list=list(events['eğitim'])

In [5]:
target_cols = ohe.columns[::2]
appeals_cross_df = pd.DataFrame(columns=target_cols, index=target_cols)

combs = list(combinations(target_cols, 2))

for comb in combs:
    x,y = int(comb[0].replace('b', ''))-1, int(comb[1].replace('b', ''))-1
    appeals_cross_df.iloc[x,y] = ohe[(ohe[comb[0]] == 1) & (ohe[comb[1]] == 1)].shape[0]
    appeals_cross_df.iloc[y,x] = appeals_cross_df.iloc[x,y]

for col in target_cols:
    x = int(col.replace('b', ''))-1
    appeals_cross_df.iloc[x, x] = ohe[col].sum()

px.imshow(appeals_cross_df, text_auto=True, width=1000, height=1000)

# Rolls

In [6]:
target_cols = ohe.columns[1::2]
rolls_cross_df = pd.DataFrame(columns=target_cols, index=target_cols)

combs = list(combinations(target_cols, 2))

for comb in combs:
    x,y = int(comb[0].replace('k', ''))-1, int(comb[1].replace('k', ''))-1
    rolls_cross_df.iloc[x,y] = ohe[(ohe[comb[0]] == 1) & (ohe[comb[1]] == 1)].shape[0]
    rolls_cross_df.iloc[y,x] = rolls_cross_df.iloc[x,y]

for col in target_cols:
    x = int(col.replace('k', ''))-1
    rolls_cross_df.iloc[x, x] = ohe[col].sum()

px.imshow(rolls_cross_df, text_auto=True, width=1000, height=1000)

# Ratio

In [7]:
pre_ratio = pd.DataFrame(columns=range(24), index=range(24))

for i in range(24):
    for j in range(24):
        pre_ratio.iloc[i,j] = [rolls_cross_df.iloc[i,j], appeals_cross_df.iloc[i,j]]

In [8]:
perc_ratio = pd.DataFrame(columns=range(24), index=range(24))
for i in range(24):
    for j in range(24):
        k, b = pre_ratio.iloc[i,j]
        perc_ratio.iloc[i,j] = k/b

perc_ratio.columns, perc_ratio.index = list(map(lambda x: f'r{x}', list(range(1,25)))), list(map(lambda x: f'r{x}', list(range(1,25))))
query_perc_ratio=(perc_ratio).astype(float) * 100

px.imshow(query_perc_ratio.astype(int), text_auto=True, width=1000, height=1000)

# Get All

In [9]:
combs = list(combinations(query_perc_ratio.columns, 2))

e1, e2, ratio = list(),list(),list()
for comb in combs:
    x,y = int(comb[0].replace('r', ''))-1, int(comb[1].replace('r', ''))-1
    e1.append(events_list[x])
    e2.append(events_list[y])
    ratio.append(query_perc_ratio.iloc[x,y])

ratio_comb_df = pd.DataFrame()
for col, serie in zip(['ders1', 'ders2', 'oran'], [e1, e2, ratio]):
    ratio_comb_df[col] = serie

In [10]:
combs = list(combinations(rolls_cross_df.columns, 2))

e1, e2, rolls = list(),list(),list()
for comb in combs:
    x,y = int(comb[0].replace('k', ''))-1, int(comb[1].replace('k', ''))-1
    e1.append(events_list[x])
    e2.append(events_list[y])
    rolls.append(rolls_cross_df.iloc[x,y])

roll_comb_df = pd.DataFrame()
for col, serie in zip(['ders1', 'ders2', 'katılım'], [e1, e2, rolls]):
    roll_comb_df[col] = serie

## Freq

In [11]:
ratio_comb_df['freq'] = float('NaN')
lower_limit, upper_limit = list(range(0,91,10)), list(range(10,101,10))
freq = list(range(276))

for lower, upper in zip(lower_limit, upper_limit):
    index = ratio_comb_df[(ratio_comb_df['oran'] >= lower) & (ratio_comb_df['oran'] < upper) & (ratio_comb_df['oran'] < 100)].index

    for i in index:
        freq[i] = f'[{lower}, {upper})'

ratio_comb_df['freq'] = freq

events_kw = events.copy()
events_kw.index = events_kw['eğitim']
events_kw=events_kw.T

## Get Other Configs

In [12]:
other_configs = list(map(lambda x: list(), range(6)))

for d1,d2 in zip(ratio_comb_df['ders1'], ratio_comb_df['ders2']):
    d1, d2 = events_kw[d1], events_kw[d2]
    # day
    other_configs[0].append(d1[1])
    other_configs[1].append(d2[1])

    # hour
    other_configs[2].append(d1[3])
    other_configs[3].append(d2[3])

    # duration
    other_configs[4].append(d1[5])
    other_configs[5].append(d2[5])


other_configs_df = pd.DataFrame(other_configs).T
other_configs_df.columns = ['gün1', 'gün2', 'saat1', 'saat2', 'süre1', 'süre2']

ratio_comb_df[other_configs_df.columns] = other_configs_df
roll_comb_df[other_configs_df.columns] = other_configs_df

# Group By

In [13]:
day_equals = ratio_comb_df['gün1'] == ratio_comb_df['gün2']
hour_equals = ratio_comb_df['saat1'] == ratio_comb_df['saat2']
duration_equals = ratio_comb_df['süre1'] == ratio_comb_df['süre2']

groupby_df = ratio_comb_df[day_equals|hour_equals|duration_equals]

In [14]:
groups = ['freq', 'gün1', 'gün2', 'saat1', 'saat2', 'süre1', 'süre2']

combs = list(map(lambda x : (x,), groups))
for i in range(2, len(groups)+1):
    combs.extend(combinations(groups, i))

groupby = dict()
for comb in combs:
    key = ' '.join(list(map(lambda x: x[0]+x[-1], comb)))
    groupby_obj = ratio_comb_df.groupby(by=list(comb))
    
    
    df = pd.DataFrame()

    df['ders combinasyonu'] = groupby_obj.count()[['ders1']]
    df['ortalama oran'] = groupby_obj.sum()['oran'] / groupby_obj.count()['oran']

    groupby[key] = df

In [15]:
with pd.ExcelWriter(path+'3-result/groupby_correlation.xlsx', mode='w') as writer:
    for group,df in groupby.items():
        df.to_excel(writer, sheet_name=group)

In [16]:
ratio_comb_df.to_excel(path + '/3-result/ratio_combinations.xlsx')
roll_comb_df.to_excel(path + '/3-result/roll_combinations.xlsx')

In [17]:
sorted = ratio_comb_df.sort_values(by='oran', ascending=False)

# Visualize

In [18]:
df1, df2 = ohe[ohe.columns[::2]], ohe[ohe.columns[1::2]]

df = pd.DataFrame([df1.T.sum() , df2.T.sum()]).T
df.columns = ['b', 'k']

df['ratio'] = df.k / df.b


target_df = df[df['ratio'] >= 0.5].sort_values(by='b', ascending=False)[:49]

ohe = ohe.T[target_df.index].T

In [19]:
ohe[ohe.columns[1::2]].sum().sort_values()

k24     0
k22     0
k23     0
k6      1
k10     2
k12     3
k7      4
k8      4
k2      4
k20     4
k13     4
k3      4
k1      5
k16     6
k17     6
k18     6
k21     6
k14     8
k4      8
k9      8
k5      9
k19     9
k15     9
k11    15
dtype: int64

In [20]:
list(enumerate(events_list, start=1))

[(1, 'Hikaye Anlatıcılığı 101'),
 (2, 'Finans 101'),
 (3, 'Büyük Veriye Giriş 101'),
 (4, 'Proje Döngüsü Yönetimi 101'),
 (5, 'Girişimcilik 101'),
 (6, 'Arduino 101'),
 (7, 'İşletim Sistemleri 101'),
 (8, 'IOT 101'),
 (9, 'Zihin Haritalama 101'),
 (10, 'Blockchain 101'),
 (11, 'Siber Güvenlik 101'),
 (12, 'Eleştirel Düşünce 101'),
 (13, 'Yapay Zeka 101'),
 (14, 'Python 101'),
 (15, 'İnovasyon 101'),
 (16, 'Machine Learning 101'),
 (17, 'Dijital Şehir 101'),
 (18, 'Dijital Dünyada Bilinç 101'),
 (19, 'Tasarım Odaklı Düşünce 101'),
 (20, 'Araştırma Yöntemleri 101'),
 (21, 'Tasarım İlkeleri 101'),
 (22, 'Tasarım 101'),
 (23, 'Metaverse 101'),
 (24, 'Programlama ve Algoritma 101')]