## Wykrywanie outlierów

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import font

font.set_inter_font()

df = pd.read_csv('complete.csv', dtype={'Kod': str})

columns = ['Populacja', 'Produkcyjny', 'Bezrobocie', 'Wynagrodzenia', 'Mediana', 'Sprzedaz', 'Powierzchnia']
years = sorted(df['Rok'].unique())

for column in columns:
    plt.figure(figsize=(20, 2 * len(years)))

    for i, year in enumerate(years, 1):
        df_year = df[df['Rok'] == year]
        Q1 = df_year[column].quantile(0.25)
        Q3 = df_year[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR

        mask_normal = (df_year[column] >= lower_bound) & (df_year[column] <= upper_bound)
        mask_outlier = ~mask_normal

        n_outliers = mask_outlier.sum()
        n_total = len(df_year)
        percent_outliers = 100 * n_outliers / n_total if n_total > 0 else 0

        ax = plt.subplot(len(years), 1, i)
        ax.scatter(df_year.index[mask_normal], df_year.loc[mask_normal, column],
                   color='green', alpha=0.6, label='Wartości typowe')
        ax.scatter(df_year.index[mask_outlier], df_year.loc[mask_outlier, column],
                   color='red', alpha=0.7,
                   label=f'Outliery: {n_outliers}/{n_total} ({percent_outliers:.1f}%)')
        ax.axhline(y=Q1, color='k', linestyle='--', label=f'Q1 = {Q1:.2f}')
        ax.axhline(y=Q3, color='k', linestyle='--', label=f'Q3 = {Q3:.2f}')
        ax.axhline(y=lower_bound, color='#7a7a7a', linestyle='--', label=f'Q1 - 1.5IQR = {lower_bound:.2f}')
        ax.axhline(y=upper_bound, color='#7a7a7a', linestyle='--', label=f'Q3 + 1.5IQR = {upper_bound:.2f}')
        
        # Tytuł nad pierwszym wykresem
        if i == 1:
            ax.text(0.5, 1.22, f'{column} - rozkład wartości z kwartylami',
                    transform=ax.transAxes, fontsize=32, fontweight='bold', ha='center', va='bottom')


        ax.text(
            1.02, 1, str(year),
            transform=ax.transAxes,
            fontsize=32,
            fontweight='bold',
            ha='left',
            va='top'
        )

        legend = ax.legend(
            loc='upper left',
            bbox_to_anchor=(1.02, 0.75),
            borderaxespad=0.,
            frameon=False
        )
        legend.get_frame().set_facecolor('white')
        legend.get_frame().set_linewidth(0.0)

        ax.grid(False)

    plt.tight_layout(rect=[0, 0, 0.85, 1.08])
    plt.savefig(f'outliery_{column}.png', bbox_inches='tight', dpi=300)
    plt.close()
