<a href="https://colab.research.google.com/github/nerudxlf/contribution-of-departments/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import re

import pandas as pd

from pandas import DataFrame
from google.colab import files

files.upload()

{}

In [None]:
def merge_two_dicts(x: dict, y: dict) -> dict:
    """
    :param x: исходный словарь
    :param y: словарь для объединения со словарем x
    :return: возвращает объединенный словарь
    """
    z = x.copy()
    z.update(y)
    return z


class DataCalculation:
  """
  Класс родитель для подсчета показателей
  :function find_in_dictionary: метод для поиска человека в словаре
  :function __get_sum_dict_npr: статическая функция, для суммирования всех
  ставок одного нпр
  :__get_current_value_proportion_npr: статическая функция, для нахождения
  усредненной ставки
  :get_employees_dict: метод для получения нпр из списка с его текущей ставкой
  :count_values: метод для расчета показателей по кафедрам  
  """
  department_values = {}

  def __init__(self, dictionary_df: DataFrame, employees_df: DataFrame, data_df: DataFrame):
    self.dictionary = dict(zip(dictionary_df["Сотрудник"].to_list(), dictionary_df["names"].to_list()))
    self.employees = employees_df
    self.data_df = data_df

  def find_in_dictionary(self, names: str):
    for keys, values in self.dictionary.items():
      if values.find(names.lower()) != -1:
        return keys
    else:
      return None

  @staticmethod
  def __get_sum_dict_npr(dictionary: dict) -> dict:
    tmp_dict = {}
    for values in dictionary.values():
      for keys_v, values_v in values.items():
        if tmp_dict.get(keys_v):
          tmp_dict[keys_v] += values_v
        else:
          tmp_dict = merge_two_dicts(tmp_dict, {keys_v: values_v})
    return tmp_dict

  @staticmethod
  def __get_current_value_proportion_npr(dictionary_tmp: dict, current_dict: dict) -> dict:
    for keys, values in current_dict.items():
      for keys_v, values_v in values.items():
        current_dict[keys][keys_v] = values_v / dictionary_tmp[keys_v]
    return current_dict

  def get_employees_dict(self) -> dict:
    department_list = self.employees["Подразделение"].to_list()
    names_list = self.employees["ФИО"].to_list()
    proportion_list = self.employees["Ставка"].to_list()
    result_dict = {}
    for i in range(len(department_list)):
      if department_list[i].find("Кафедра") != -1:
        if result_dict.get(department_list[i]):
          result_dict[department_list[i]] = merge_two_dicts(result_dict[department_list[i]], {names_list[i]: proportion_list[i]})
        else:
          result_dict = merge_two_dicts(result_dict, {department_list[i]: {names_list[i]: proportion_list[i]}})
    tmp_dict = self.__get_sum_dict_npr(result_dict)
    return_dict = self.__get_current_value_proportion_npr(tmp_dict, result_dict)
    return return_dict

  def count_values(self):
    pass

class DataCalculationScopus(DataCalculation):
  def count_values(self):
    data_set = []
    employees_dict = self.get_employees_dict()

    for authors in self.data_df["Авторы"].to_list():
      for i in authors.split(", "):
        name = self.find_in_dictionary(i)
        if not name:
          data_set.append(i)
          break
        for keys, values in employees_dict.items():
          if values.get(name):
            if self.department_values.get(keys):
              self.department_values[keys] += values[name]
            else:
              self.department_values = merge_two_dicts(self.department_values, {keys: values[name]})
    return self.department_values

class DataCalculationWoS(DataCalculation):
  def count_values(self):
    employees_dict = self.get_employees_dict()
    for authors in self.data_df["Authors"].to_list():
      for i in authors.split("; "):
        name = self.find_in_dictionary(i)
        if not name:
          break
        for keys, values in employees_dict.items():
          if values.get(name):
            if self.department_values.get(keys):
              self.department_values[keys] += values[name]
            else:
              self.department_values = merge_two_dicts(self.department_values, {keys: values[name]}) 
    return self.department_values


class DataCalculationElibrary(DataCalculation):
  def __get_name(self, name: str):
    if bool(re.search('[а-яА-Я]', name)):
      return name
    else:
      name = self.find_in_dictionary(name)
      if name:
        name_list = name.split()
        if len(name_list) == 2:
          return f"{name_list[0]} {name_list[1][0]}."
        elif len(name_list) == 3:
          return f"{name_list[0]} {name_list[1][0]}.{name_list[2][0]}."
      return None

  def get_employees_dict(self):
    department_list = self.employees["Подразделение"].to_list()
    names_list = self.employees["ФИО"].to_list()
    proportion_list = self.employees["Ставка"].to_list()
    result_dict = {}
    for i in range(len(department_list)):
      if department_list[i].find("Кафедра") != -1:
        if result_dict.get(department_list[i]):
          names_list_split = names_list[i].split()
          if len(names_list_split) == 2:
            name = f"{names_list_split[0]} {names_list_split[1][0]}."  # Получаем Фамилия И.
          else:
            name = f"{names_list_split[0]} {names_list_split[1][0]}.{names_list_split[2][0]}."  # Получаем Фамилия И.О.
        else:
          names_list_split = names_list[i].split()
          if len(names_list_split) == 2:
            name = f"{names_list_split[0]} {names_list_split[1][0]}."
          else:
            name = f"{names_list_split[0]} {names_list_split[1][0]}.{names_list_split[2][0]}."
          result_dict = merge_two_dicts(result_dict, {department_list[i]: {name: proportion_list[i]}})
    tmp_dict = self._get_sum_dict_npr(result_dict)
    return_dict = self._get_current_value_proportion_npr(tmp_dict, result_dict)
    return result_dict

  def count_values(self):
    employees_dict = self.get_employees_dict()
    for authors in self.data_df["Авторы"].to_list():
      for i in authors.split(", "):
        name = self.__get_name(i)
        if not name:
          break
        for keys, values in employees_dict.items():
          if values.get(name):
            if self.department_values.get(keys):
              self.department_values[keys] += values[name]
            else:
              self.department_values = merge_two_dicts(self.department_values, {keys: values[name]})
    return self.department_values

In [None]:
dictionary_df = pd.read_excel("dictionary.xlsx")
employees_df = pd.read_excel("Сотрудники.xls")
data_df = pd.read_excel("Scopus 2021.xlsx")

# dc_wos = DataCalculationWoS(dictionary_df, employees_df, data_df)
# count_values = dc_wos.count_values()
dc_scopus = DataCalculationScopus(dictionary_df, employees_df, data_df)
count_values = dc_scopus.count_values()
result_df = pd.DataFrame({"Кафедра": count_values.keys(), "Значения": count_values.values()})
result_df.to_excel("result Scopus 2021 AR.xlsx", index=False)
