In [1]:
import pandas as pd
import seaborn as sns
from data_preprocessor import DataPreprocessor

In [4]:
try:
    df = sns.load_dataset('titanic')
except:
        data = {
        'age': [22, 38, 26, 35, None, None, 54, 2, 27, 14],
        'fare': [7.25, 71.28, 7.92, 53.1, 8.05, 8.45, 51.86, 21.07, 11.13, 30.07],
        'class': ['Third', 'First', 'Third', 'First', 'Third', 'Third', 'First', 'Third', 'Second', 'Second'],
        'deck': [None, 'C', None, 'C', None, None, 'E', None, None, None], # Много пропусков
        'embarked': ['S', 'C', 'S', 'S', 'S', 'Q', 'S', 'S', 'S', 'C']
    }
        df = pd.DataFrame(data)

In [5]:
print("=== Исходные данные (первые 5 строк) ===")
print(df.head())
print("\n=== Информация о пропусках до обработки ===")
print(df.isnull().sum())

=== Исходные данные (первые 5 строк) ===
   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  

=== Информация о пропусках до обработки ===
survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0


In [6]:
preprocessor = DataPreprocessor(df)
processed_df = preprocessor.fit_transform(threshold=0.4, method='minmax')

print("\n\n=== Данные после обработки (первые 5 строк) ===")
print(processed_df.head())

print("\n=== Проверка размерности ===")
print(f"Было: {df.shape}, Стало: {processed_df.shape}")

print("\n=== История преобразований ===")
print("Удаленные столбцы:", preprocessor.history['dropped_columns'])
print("Заполненные значения (пример):", list(preprocessor.history['filled_values'].items())[:3])



=== Данные после обработки (первые 5 строк) ===
   survived  pclass       age  sibsp  parch      fare  adult_male  alone  \
0       0.0     1.0  0.271174  0.125    0.0  0.014151        True  False   
1       1.0     0.0  0.472229  0.125    0.0  0.139136       False  False   
2       1.0     1.0  0.321438  0.000    0.0  0.015469       False   True   
3       1.0     0.0  0.434531  0.125    0.0  0.103644       False  False   
4       0.0     1.0  0.434531  0.000    0.0  0.015713        True   True   

   sex_female  sex_male  ...  class_Second  class_Third  who_child  who_man  \
0         0.0       1.0  ...           0.0          1.0        0.0      1.0   
1         1.0       0.0  ...           0.0          0.0        0.0      0.0   
2         1.0       0.0  ...           0.0          1.0        0.0      0.0   
3         1.0       0.0  ...           0.0          0.0        0.0      0.0   
4         0.0       1.0  ...           0.0          1.0        0.0      1.0   

   who_woman  emba

In [7]:
if 'age' in processed_df.columns:
    print(f"\nMin age: {processed_df['age'].min()}, Max age: {processed_df['age'].max()}")

print("\nСписок столбцов после One-Hot Encoding:")
print(processed_df.columns.tolist())


Min age: 0.0, Max age: 1.0

Список столбцов после One-Hot Encoding:
['survived', 'pclass', 'age', 'sibsp', 'parch', 'fare', 'adult_male', 'alone', 'sex_female', 'sex_male', 'embarked_C', 'embarked_Q', 'embarked_S', 'class_First', 'class_Second', 'class_Third', 'who_child', 'who_man', 'who_woman', 'embark_town_Cherbourg', 'embark_town_Queenstown', 'embark_town_Southampton', 'alive_no', 'alive_yes']
