# UNIT tests of psychotyping

In [1]:
import sys
sys.path.append("../../")

In [4]:
import warnings
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import json
import numpy as np
import random
from collections import defaultdict, Counter
from analytics_lib.nlp_texts.psychotype import dict_with_feature_range_psyh, df_with_sample_ranges_and_psych

In [5]:
public_df = pd.read_pickle("../data/public_df.pkl")
public_semantic_role_df = pd.read_pickle("../data/public_semantic_role_df.pkl")
morph_df = pd.read_pickle("../data/morph_df.pkl")
    
df_with_ranges = df_with_sample_ranges_and_psych(public_df, public_semantic_role_df, morph_df)

In [81]:
def dict_with_features_by_psychotype(psychotype, df_with_ranges):
    dict_with_features = {}
    dict_with_psychotypes = {"Истероид": 0, "Гипертим": 0, "Шизоид": 0, "Параноял": 0, "Эпилептоид": 0, "Эмотив": 0, "Тревожный": 0}
    df = df_with_ranges.set_index("Название характеристики")
    for feature_name in list(df.index.values):
        index_of_end = df.at[feature_name, psychotype].find(":")
        if index_of_end == -1:
            #dict_with_features[feature_name] = [-1, []]
            dict_with_features[feature_name] = -1
            continue
        range_ = df.at[feature_name, psychotype][ :index_of_end]
        tuple_range = eval(df.at[feature_name, psychotype][index_of_end + 2:])
        if abs(tuple_range[0] - tuple_range[1]) < 1e-5:
            continue
        # print("\n\n")
        # print(df.at[feature_name, psychotype])
        # print(tuple_range)
        #dict_with_local_psych = defaultdict(int)
        for key in ["Истероид", "Гипертим", "Шизоид", "Параноял", "Эпилептоид", "Эмотив", "Тревожный"]:
            if size_by_feature_name_and_psychotype(df, feature_name, key) == range_:
                dict_with_psychotypes[f"{key}"] += 1
                #dict_with_local_psych[f"{key}"] += 1
                
        dict_with_features[feature_name] = random.uniform(tuple_range[0], tuple_range[1])
        #dict_with_features[feature_name] = [random.uniform(tuple_range[0], tuple_range[1]), [key for key in dict_with_psychotypes.keys() if dict_with_local_psych[key]]]
        #dict_with_local_psych = defaultdict(int)
        
    res_dict = processed_dict_with_features(dict_with_features, df)
    dict_with_psychotypes = {key: value for key, value in dict_with_psychotypes.items() if value}
    res_dict["psychotype"] = dict_with_psychotypes
    return res_dict

In [82]:
def processed_dict_with_features(dict_with_all_features, df_with_ranges):
    temp_dict_with_features = {"Морфология": {}, "Агенсность, Предикаты": {}}
    list_of_morph = list(df_with_ranges.index.values)
    list_of_semantic_role = ['Агенс',
                 'Пациенс',
                 'Внутренний предикат',
                 'Внешний предикат']
    for feature_name in list_of_morph:
        temp_dict_with_features["Морфология"][feature_name] =  dict_with_all_features.get(feature_name, -1)
    for feature_name in list_of_semantic_role:
        temp_dict_with_features["Агенсность, Предикаты"][feature_name] =  dict_with_all_features.get(feature_name, -1)
        
    res_dict_with_features = {}
    res_dict_with_features["text_feat"] = temp_dict_with_features
    
    return res_dict_with_features

In [83]:
def size_by_feature_name_and_psychotype(df, feature_name, psychotype):
    index_of_end = df.at[feature_name, psychotype].find(":")
    if index_of_end == -1:
        size = "-"
        return size
    size = df.at[feature_name, psychotype][ :index_of_end]
    return size

## Сохранение в json по 5 тестов на каждый психотип

In [88]:
%%time
for psych in ["Истероид", "Гипертим", "Шизоид", "Параноял", "Эпилептоид", "Эмотив", "Тревожный"][:1]:
    for i in range(5):
        with open(f'tests/{psych}_{i}.json', 'w', encoding = "utf8") as f:
            json.dump(dict_with_features_by_psychotype(psych, df_with_ranges), f, ensure_ascii=False)

CPU times: user 16.9 ms, sys: 2.36 ms, total: 19.2 ms
Wall time: 17.8 ms


In [89]:
def df_from_dict(dict_input):
    dict_input['Текст'] = 'Проверочный текст'
    dict_res = {k[0]: [k[1]] for k in dict_input.items()}
    df = pd.DataFrame.from_dict(dict_res)
    return df

In [90]:
list_of_all_tests = []
list_of_psychotype = ['Истероид', 'Гипертим', 'Шизоид', 'Параноял', 'Эпилептоид', 'Эмотив', 'Тревожный'][:1]
for psych in list_of_psychotype:
    for i in range(5):
        dict_temp = {}  # словарь для проверки
        with open(f'tests/{psych}_{i}.json', encoding='utf-8') as json_file:
            dict_temp = json.load(json_file)
            list_of_all_tests.append(dict_temp)

In [92]:
list_of_tests_res = []
for dict_test in list_of_all_tests:
    person_public_df = df_from_dict(dict_test['text_feat']['Морфология'])
    person_public_semantic_role_df = df_from_dict(dict_test['text_feat']['Агенсность, Предикаты'])
    text = 'Проверочный текст'
    dict_psych = dict(dict_with_psychotype_by_text(text, person_public_df, person_public_semantic_role_df, public_df, public_semantic_role_df))
    
    dict_test_psych = {k: v for k, v in dict_test["psychotype"].items()}
    
    assert dict_test_psych == dict_psych, "Несовпадение"

In [53]:
# def dict_with_psychotype_by_text(text, person_public_df, person_public_semantic_role_df, public_df, public_semantic_role_df):
#     df = df_with_features_ranges_psyh_by_text(text, person_public_df, person_public_semantic_role_df, pd.DataFrame(), public_df, public_semantic_role_df, pd.DataFrame())
#     dict_res = Counter(np.concatenate(df['Психотип']))
#     return dict_res

# def df_with_features_ranges_psyh_by_text(text, person_public_df, person_public_semantic_role_df, person_morph_df, public_df, public_semantic_role_df, morph_df):
#     res_df = pd.DataFrame()
#     list_of_text_features = list(df_with_ranges.set_index("Название характеристики").index.values)
#     list_of_semantic_role_features = ['Агенс', 'Пациенс', 'Внутренний предикат', 'Внешний предикат']
#     list_of_morph_features = ["Прошедшее, %", "Настоящее, %", "Будущее, %", "Абсолютное, %"]
#     list_of_text_features = [item for item in list_of_text_features if item not in list_of_semantic_role_features + list_of_morph_features]
#     #list_of_morph_features = ["Прошедшее, %", "Настоящее, %", "Будущее, %", "Абсолютное, %"]
#     for feature_name in list_of_text_features:
#         dict_text = dict_with_feature_range_psyh(text, feature_name, person_public_df, public_df)
#         res_df = res_df.append(dict_text, ignore_index=True)
#     for feature_name in list_of_semantic_role_features:
#         dict_semantic = dict_with_feature_range_psyh(text, feature_name, person_public_semantic_role_df, public_semantic_role_df)
#         res_df = res_df.append(dict_semantic, ignore_index=True)
#     # for feature_name in list_of_morph_features:
#     #     dict_morph = dict_with_feature_range_psyh(text, feature_name, person_morph_df, morph_df)
#     #     res_df = res_df.append(dict_morph, ignore_index=True)
#     cols = ["Название характеристики", "Значение", "Количество", "Диапазон", "Психотип"]
#     res_df = res_df[cols]
#     return res_df