# Сводные таблицы и тест хи-квадрат

In [7]:
import pandas as pd
data = pd.read_csv("extended_data.csv", index_col=0, sep=';')

In [8]:
import numpy as np

color_groups = ['black', 'white', 'grey_silver', 'blue_brown', 'red', 'other']
def color_group(x):
    if pd.isna(x):
        return np.nan
    for i in color_groups:
        if x in i:
            return i
    return 'other'

data['color_group'] = data['color'].apply(color_group)
data['transmission_group'] = data['transmission'].apply(lambda x : 'MT' if x == 'manual' else 'AT')

In [11]:
from scipy.stats import chi2_contingency
from IPython.display import Latex

categorical_vars = ['manufacturer', 'transmission_group', 'color_group', 'petrol_type', 'is_sport_line', 'is_crossover', 'age']
output_vars = ['производителем', "типом коробки передач", "цветом", "типом топлива", "принадлежностью к спортивной серии", "принадлежностью к кроссоверному типу", "пробегом"]
                    
for j1 in range(len(categorical_vars)):
    for j2 in range(j1 + 1, len(categorical_vars)):
        i1 = categorical_vars[j1]
        i2 = categorical_vars[j2]
    # Остальная часть вашего кода остается без изменений

            # Создание таблицы сопряженности
        contingency_table = pd.crosstab(data[i1], data[i2])

        # Выполнение теста Хи-квадрат
        chi2_stat, p_val, dof, expected = chi2_contingency(contingency_table)

        # Вывод результатов
        display(Latex(f"Тест Хи-квадрат для переменных {i1},  {i2}:"))
        num_cells_above_5 = np.sum(expected > 5)

        # Общее количество ячеек в таблице
        total_cells = expected.size

        # Процент ячеек с ожидаемыми частотами больше 5
        percentage_above_5 = (num_cells_above_5 / total_cells) * 100

        if percentage_above_5 < 95:
            display(Latex("Не применим, т. к. не соблюдено условие наличие достаточного количества частот больше 5."))
            print("------------------------------------------------------")
            continue
        display(Latex(f"Chi2: {chi2_stat}"))
        display(Latex(f"P-value: {p_val}"))

        # Построение таблицы сопряженности
        display(Latex("Таблица сопряженности:"))
        display(contingency_table)
        if p_val < 0.05:
            display(Latex(f"Значение p-value < 5%, значит, существует сильная статистическая связь между {output_vars[j1]} и {output_vars[j2]}. (принимается гипотеза H1)"))
        else:
            display(Latex(f"Значение p-value >= 5%, значит, нет статистической связи между {output_vars[j1]} и {output_vars[j2]}. (гипотеза H1 отвергается)"))
        print("------------------------------------------------------")

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

transmission_group,AT,MT
manufacturer,Unnamed: 1_level_1,Unnamed: 2_level_1
Audi,2814,400
BMW,6044,278
Mercedes-Benz,5296,197


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

color_group,black,blue_brown,grey_silver,other,red,white
manufacturer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Audi,1027,546,607,206,142,595
BMW,2420,976,1139,226,166,1290
Mercedes-Benz,2405,642,894,206,161,1099


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

petrol_type,diesel,petrol
manufacturer,Unnamed: 1_level_1,Unnamed: 2_level_1
Audi,734,2404
BMW,3079,3111
Mercedes-Benz,1957,3462


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

is_sport_line,False,True
manufacturer,Unnamed: 1_level_1,Unnamed: 2_level_1
Audi,3133,81
BMW,2909,103
Mercedes-Benz,2126,275


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

is_crossover,False,True
manufacturer,Unnamed: 1_level_1,Unnamed: 2_level_1
Audi,2108,1106
BMW,3055,3267
Mercedes-Benz,3130,2363


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

color_group,black,blue_brown,grey_silver,other,red,white
transmission_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AT,5666,1983,2454,517,412,2900
MT,186,181,186,121,57,84


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

petrol_type,diesel,petrol
transmission_group,Unnamed: 1_level_1,Unnamed: 2_level_1
AT,5611,8262
MT,159,715


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

is_sport_line,False,True
transmission_group,Unnamed: 1_level_1,Unnamed: 2_level_1
AT,7476,451
MT,692,8


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

is_crossover,False,True
transmission_group,Unnamed: 1_level_1,Unnamed: 2_level_1
AT,7430,6724
MT,863,12


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

petrol_type,diesel,petrol
color_group,Unnamed: 1_level_1,Unnamed: 2_level_1
black,2435,3323
blue_brown,809,1311
grey_silver,1019,1555
other,152,473
red,92,368
white,1167,1766


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

is_sport_line,False,True
color_group,Unnamed: 1_level_1,Unnamed: 2_level_1
black,2958,197
blue_brown,1246,62
grey_silver,1479,80
other,417,22
red,259,18
white,1660,67


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

is_crossover,False,True
color_group,Unnamed: 1_level_1,Unnamed: 2_level_1
black,3200,2652
blue_brown,1132,1032
grey_silver,1551,1089
other,396,242
red,317,152
white,1514,1470


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

is_sport_line,False,True
petrol_type,Unnamed: 1_level_1,Unnamed: 2_level_1
diesel,2916,37
petrol,5129,420


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

is_crossover,False,True
petrol_type,Unnamed: 1_level_1,Unnamed: 2_level_1
diesel,2248,3522
petrol,5879,3098


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

age,less_average,medium,more_average,new,oldest,young
petrol_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
diesel,506,3113,296,7,64,1784
petrol,1642,3765,1286,6,626,1652


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

is_crossover,False,True
is_sport_line,Unnamed: 1_level_1,Unnamed: 2_level_1
False,4625,3543
True,205,254


<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

------------------------------------------------------


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

age,less_average,medium,more_average,new,oldest,young
is_crossover,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
False,1379,3891,1214,3,684,1122
True,784,3088,369,10,10,2475


<IPython.core.display.Latex object>

------------------------------------------------------
