# Exploratory Data Analysis (EDA)
---

In [3]:
# Import Libraries
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import os
from IPython.display import display, Markdown
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from ydata_profiling import ProfileReport
from rapidfuzz import process
from sklearn.impute import KNNImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer

# Set Options
pd.set_option("display.max_rows", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", None)

In [5]:
# Import Data
DATA_PATH = '../data/'
DATA_FOLDER = "Talent_Academy_Case_DT_2025.xlsx"

FILE_PATH = os.path.join(DATA_PATH, DATA_FOLDER)
df = pd.read_excel(FILE_PATH)

In [7]:
########## CONVERTION ##########
# Convert integer
def conver_to_integer(dataframe: pd.DataFrame, cols: list, values: list) -> pd.DataFrame:

    for col in cols:
        for value in values:
            dataframe[col] = dataframe[col].astype(str).str.replace(value, "")
        dataframe[col] = dataframe[col].astype(int)


conver_to_integer(df,["TedaviSuresi", "UygulamaSuresi"], ["Seans", "Dakika"])

# Convert object
df["HastaNo"] = df["HastaNo"].astype(object)

In [8]:
# Summary Dataframe
def summary_dataframe(dataframe, observation_num=5):
    # First observations
    display(Markdown(f"## First {observation_num} Observation\n---"))
    display(dataframe.head(observation_num))

    # Last observations
    display(Markdown(f"## Last {observation_num} Observation\n---"))
    display(dataframe.tail(observation_num))

    # Columns names
    display(Markdown(f"## Columns Names\n---"))
    print(dataframe.columns)

    # Dataframe information
    display(Markdown(f"## Dataframe Information\n---"))
    print(dataframe.info())

    # Summary Statistics
    display(Markdown(f"## Summary Statistics\n---"))
    display(dataframe.describe(percentiles=[0.25, 0.5 ,0.75, 0.90, 0.95, 0.99]).T)

    # Dataframe shape
    display(Markdown(f"## Dataframe Shape\n---"))
    print(f"Observation: {dataframe.shape[0]}\nColumns: {dataframe.shape[1]}")

    # Missing values
    display(Markdown(f"## Missing Values\n---"))
    print(dataframe.isnull().sum(), f"\n{'#'*30}\nTotal Missing Values: {dataframe.isnull().sum().sum()}")

    # Duplicated
    display(Markdown(f"## Duplicate Rows\n---"))
    print(f"Number of duplicate rows (Total): {dataframe.duplicated().sum()}")

summary_dataframe(df)

## First 5 Observation
---

Unnamed: 0,HastaNo,Yas,Cinsiyet,KanGrubu,Uyruk,KronikHastalik,Bolum,Alerji,Tanilar,TedaviAdi,TedaviSuresi,UygulamaYerleri,UygulamaSuresi
0,145134,60,Kadın,0 Rh+,Türkiye,"Becker Musküler Distrofisi, Hiportiroidizm, Kalp yetmezliği","Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",TOZ,Ayak bileği ve ayağın yüzeysel yaralanması,Ayak Bileği,5,Ayak Bileği,20
1,145135,28,Erkek,0 Rh+,Türkiye,"Duchenne Musküler Distrofisi, Myastenia gravis, Becker Musküler Distrofisi, Hipertansiyon","Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",,"Omuzun darbe sendromu,DORSALJİ, DİĞER, SERVİKOTORASİK BÖLGE",Dorsalji -Boyun+trapez+skapular,15,Boyun,20
2,145135,28,Erkek,0 Rh+,Türkiye,"Duchenne Musküler Distrofisi, Myastenia gravis, Becker Musküler Distrofisi, Hipertansiyon","Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",,"Omuzun darbe sendromu,DORSALJİ, DİĞER, SERVİKOTORASİK BÖLGE",Dorsalji -Boyun+trapez+skapular,15,"Boyun,Sırt",20
3,145135,28,Erkek,0 Rh+,Türkiye,"Duchenne Musküler Distrofisi, Myastenia gravis, Becker Musküler Distrofisi, Hipertansiyon","Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",,"Omuzun darbe sendromu,DORSALJİ, DİĞER, SERVİKOTORASİK BÖLGE",Dorsalji -Boyun+trapez+skapular,15,Boyun,5
4,145135,28,Erkek,0 Rh+,Türkiye,"Duchenne Musküler Distrofisi, Myastenia gravis, Becker Musküler Distrofisi, Hipertansiyon","Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",,"Omuzun darbe sendromu,DORSALJİ, DİĞER, SERVİKOTORASİK BÖLGE",Dorsalji -Boyun+trapez+skapular,15,"Boyun,Sırt",20


## Last 5 Observation
---

Unnamed: 0,HastaNo,Yas,Cinsiyet,KanGrubu,Uyruk,KronikHastalik,Bolum,Alerji,Tanilar,TedaviAdi,TedaviSuresi,UygulamaYerleri,UygulamaSuresi
2230,145536,48,Erkek,,Türkiye,"Aritmi, Myastenia gravis, Fascioscapulohumeral Distrofi, Hiportiroidizm","Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi","ARVELES,CORASPIN",Eklemin kontraktürü,Ekstansör tendon Rehabilitasyon,15,Sol El Bilek Bölgesi,10
2231,145536,48,Erkek,,Türkiye,"Aritmi, Myastenia gravis, Fascioscapulohumeral Distrofi, Hiportiroidizm","Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi","ARVELES,CORASPIN",Eklemin kontraktürü,Ekstansör tendon Rehabilitasyon,15,Sol El Bilek Bölgesi,20
2232,145537,33,Kadın,A Rh+,Türkiye,,"Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",Sucuk,"Aşil tendon yaralanması,​Algonörodistrofi,​ ayak ve ayak bileği",Aşil Rüptürü Rehabilitasyonu,15,Sol Ayak Bileği Bölgesi,20
2233,145537,33,Kadın,A Rh+,Türkiye,,"Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",Sucuk,"Aşil tendon yaralanması,Algonörodistrofi, ayak ve ayak bileği",Aşil Rüptürü Rehabilitasyonu,15,Sol Ayak Bileği Bölgesi,15
2234,145537,33,Kadın,A Rh+,Türkiye,,"Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",Sucuk,"Aşil tendon yaralanması,Algonörodistrofi, ayak ve ayak bileği",Aşil Rüptürü Rehabilitasyonu,15,Sol Ayak Bileği Bölgesi,5


## Columns Names
---

Index(['HastaNo', 'Yas', 'Cinsiyet', 'KanGrubu', 'Uyruk', 'KronikHastalik',
       'Bolum', 'Alerji', 'Tanilar', 'TedaviAdi', 'TedaviSuresi',
       'UygulamaYerleri', 'UygulamaSuresi'],
      dtype='object')


## Dataframe Information
---

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2235 entries, 0 to 2234
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   HastaNo          2235 non-null   object
 1   Yas              2235 non-null   int64 
 2   Cinsiyet         2066 non-null   object
 3   KanGrubu         1560 non-null   object
 4   Uyruk            2235 non-null   object
 5   KronikHastalik   1624 non-null   object
 6   Bolum            2224 non-null   object
 7   Alerji           1291 non-null   object
 8   Tanilar          2160 non-null   object
 9   TedaviAdi        2235 non-null   object
 10  TedaviSuresi     2235 non-null   int64 
 11  UygulamaYerleri  2014 non-null   object
 12  UygulamaSuresi   2235 non-null   int64 
dtypes: int64(3), object(10)
memory usage: 227.1+ KB
None


## Summary Statistics
---

Unnamed: 0,count,mean,std,min,25%,50%,75%,90%,95%,99%,max
Yas,2235.0,47.327069,15.208634,2.0,38.0,46.0,56.0,68.0,74.0,83.0,92.0
TedaviSuresi,2235.0,14.570917,3.725322,1.0,15.0,15.0,15.0,17.0,20.0,25.0,37.0
UygulamaSuresi,2235.0,16.573154,6.268635,3.0,10.0,20.0,20.0,20.0,20.0,28.3,45.0


## Dataframe Shape
---

Observation: 2235
Columns: 13


## Missing Values
---

HastaNo              0
Yas                  0
Cinsiyet           169
KanGrubu           675
Uyruk                0
KronikHastalik     611
Bolum               11
Alerji             944
Tanilar             75
TedaviAdi            0
TedaviSuresi         0
UygulamaYerleri    221
UygulamaSuresi       0
dtype: int64 
##############################
Total Missing Values: 2706


## Duplicate Rows
---

Number of duplicate rows (Total): 928


In [9]:
# Categorical Variable Summary
def categorical_variable_summary(dataframe, plot_categorical=False, observation_num=10):
    display(Markdown(f"## Categorical Variables\n---"))
    categorical_cols = [col for col in dataframe.columns if dataframe[col].dtype in ["object", "category"]]
    print("Categorical Columns:", categorical_cols)

    display(Markdown(f"## Categorical Variables Analysis\n---"))
    for col in categorical_cols:
        # Change NaN to "Missing"
        series = dataframe[col].fillna("Missing")
        counts = series.value_counts()
        ratios = 100 * counts / len(series)
        summary_df = pd.DataFrame({col: counts.index,
                                   "Count": counts.values,
                                   "Ratio (%)": ratios.values})
        display(Markdown(f"**{col} (Unique: {series.nunique()})**"))
        display(summary_df.head(observation_num))
        if plot_categorical:
            fig = px.bar(summary_df, x=col, y="Count", title=f"{col} Distribution")
            fig.show()

# Usage
categorical_variable_summary(df, plot_categorical=True)

## Categorical Variables
---

Categorical Columns: ['HastaNo', 'Cinsiyet', 'KanGrubu', 'Uyruk', 'KronikHastalik', 'Bolum', 'Alerji', 'Tanilar', 'TedaviAdi', 'UygulamaYerleri']


## Categorical Variables Analysis
---

**HastaNo (Unique: 404)**

Unnamed: 0,HastaNo,Count,Ratio (%)
0,145267,27,1.208054
1,145307,26,1.163311
2,145343,20,0.894855
3,145453,18,0.805369
4,145201,17,0.760626
5,145231,15,0.671141
6,145391,14,0.626398
7,145317,14,0.626398
8,145481,13,0.581655
9,145257,12,0.536913


**Cinsiyet (Unique: 3)**

Unnamed: 0,Cinsiyet,Count,Ratio (%)
0,Kadın,1274,57.002237
1,Erkek,792,35.436242
2,Missing,169,7.561521


**KanGrubu (Unique: 9)**

Unnamed: 0,KanGrubu,Count,Ratio (%)
0,Missing,675,30.201342
1,0 Rh+,579,25.90604
2,A Rh+,540,24.161074
3,B Rh+,206,9.217002
4,AB Rh+,80,3.579418
5,B Rh-,68,3.042506
6,A Rh-,53,2.371365
7,0 Rh-,26,1.163311
8,AB Rh-,8,0.357942


**Uyruk (Unique: 5)**

Unnamed: 0,Uyruk,Count,Ratio (%)
0,Türkiye,2173,97.225951
1,Tokelau,27,1.208054
2,Arnavutluk,13,0.581655
3,Azerbaycan,12,0.536913
4,Libya,10,0.447427


**KronikHastalik (Unique: 221)**

Unnamed: 0,KronikHastalik,Count,Ratio (%)
0,Missing,611,27.337808
1,Myastenia gravis,38,1.700224
2,Aritmi,36,1.610738
3,Fascioscapulohumeral Distrofi,36,1.610738
4,"Limb-Girdle Musküler Distrofi, Astım",34,1.521253
5,Hipertiroidizm,34,1.521253
6,Kalp yetmezliği,33,1.47651
7,Hipertansiyon,31,1.387025
8,Astım,30,1.342282
9,Becker Musküler Distrofisi,29,1.297539


**Bolum (Unique: 11)**

Unnamed: 0,Bolum,Count,Ratio (%)
0,"Fiziksel Tıp Ve Rehabilitasyon,Solunum Merkezi",2045,91.498881
1,Ortopedi Ve Travmatoloji,88,3.93736
2,İç Hastalıkları,32,1.431767
3,Nöroloji,17,0.760626
4,Kardiyoloji,11,0.49217
5,Missing,11,0.49217
6,Göğüs Hastalıkları,8,0.357942
7,Laboratuar,7,0.313199
8,Genel Cerrahi,6,0.268456
9,Tıbbi Onkoloji,6,0.268456


**Alerji (Unique: 39)**

Unnamed: 0,Alerji,Count,Ratio (%)
0,Missing,944,42.237136
1,Polen,198,8.85906
2,POLEN,134,5.995526
3,Toz,119,5.324385
4,"ARVELES,CORASPIN",102,4.563758
5,Sucuk,91,4.071588
6,NOVALGIN,90,4.026846
7,"Polen,Yer Fıstığı",83,3.713647
8,TOZ,74,3.310962
9,SUCUK,73,3.266219


**Tanilar (Unique: 368)**

Unnamed: 0,Tanilar,Count,Ratio (%)
0,"DORSALJİ, DİĞER, LUMBOSAKRAL BÖLGE",149,6.666667
1,Omuzun darbe sendromu,128,5.727069
2,"İntervertebral disk bozuklukları, tanımlanmamış",116,5.190157
3,"DORSALJİ, DİĞER, SERVİKOTORASİK BÖLGE",96,4.295302
4,Missing,75,3.355705
5,Eklem ağrısı,51,2.281879
6,"DORSALJİ, DİĞER, SERVİKAL BÖLGE",49,2.192394
7,Radikülopati,37,1.655481
8,"Dorsalji, tanımlanmamış",32,1.431767
9,"Menisküs yırtığı, şimdiki",30,1.342282


**TedaviAdi (Unique: 244)**

Unnamed: 0,TedaviAdi,Count,Ratio (%)
0,Dorsalji -Boyun+trapez,231,10.33557
1,İV DİSK BOZUKLUĞU-BEL,200,8.948546
2,Dorsalji 1,140,6.263982
3,Dorsalji-Bel,120,5.369128
4,Gonartroz-Meniskopati,95,4.250559
5,SAĞ OMUZ İMPİNGEMENT,70,3.131991
6,Boyun-Trapezz,60,2.684564
7,Dorsalji-Dorsal,56,2.505593
8,Sol omuz İmpingement,50,2.237136
9,Alt ekstremite atrofi-Bilateral,37,1.655481


**UygulamaYerleri (Unique: 38)**

Unnamed: 0,UygulamaYerleri,Count,Ratio (%)
0,Bel,528,23.624161
1,Boyun,363,16.241611
2,Missing,221,9.888143
3,Diz,177,7.919463
4,Sol Omuz Bölgesi,137,6.129754
5,Sağ Omuz Bölgesi,127,5.682327
6,Sol El Bilek Bölgesi,83,3.713647
7,Sırt,75,3.355705
8,Sağ Ayak Bileği Bölgesi,61,2.729306
9,Tüm Vücut Bölgesi,58,2.595078


In [10]:
# Numerical Variable Summary
def numerical_variables_summary(dataframe, col_name, plot=False, corr=False):
    # Define quantiles
    quantiles = [0.05, 0.25, 0.50, 0.75, 0.95, 0.99]
    display(Markdown(f"## Numerical Variables Describe\n---"))
    display(dataframe.describe(percentiles=quantiles).T)

    display(Markdown(f"## Numerical Variables\n---"))
    numeric_cols = [col for col in df.columns if df[col].dtype in ["int64", "float64"]]
    print(numeric_cols)
    display(Markdown(f"## {col_name} Describe\n---"))
    display(dataframe[col_name].describe(quantiles).to_frame().T)
    display(Markdown(f"\n### {col_name} ###"))

    if plot:
        # Histogram
        fig_hist = px.histogram(dataframe, x=col_name, nbins=30, title=f"Histogram of {col_name}")
        fig_hist.show()

        # Boxplot
        fig_box = px.box(dataframe, y=col_name, title=f"Boxplot of {col_name}")
        fig_box.show()

    if corr:
        corr_matrix = dataframe.corr(numeric_only=True)
        display(Markdown("\nCorrelations:"))
        display(corr_matrix)
        # Heatmap
        fig_corr = px.imshow(
            corr_matrix,
            text_auto=True,
            color_continuous_scale="RdBu_r",
            title=f"Correlation Heatmap "
        )
        fig_corr.show()
# Usage
numerical_variables_summary(df, "UygulamaSuresi", plot=True, corr=True)

## Numerical Variables Describe
---

Unnamed: 0,count,mean,std,min,5%,25%,50%,75%,95%,99%,max
Yas,2235.0,47.327069,15.208634,2.0,22.0,38.0,46.0,56.0,74.0,83.0,92.0
TedaviSuresi,2235.0,14.570917,3.725322,1.0,7.0,15.0,15.0,15.0,20.0,25.0,37.0
UygulamaSuresi,2235.0,16.573154,6.268635,3.0,5.0,10.0,20.0,20.0,20.0,28.3,45.0


## Numerical Variables
---

['Yas', 'TedaviSuresi', 'UygulamaSuresi']


## UygulamaSuresi Describe
---

Unnamed: 0,count,mean,std,min,5%,25%,50%,75%,95%,99%,max
UygulamaSuresi,2235.0,16.573154,6.268635,3.0,5.0,10.0,20.0,20.0,20.0,28.3,45.0



### UygulamaSuresi ###


Correlations:

Unnamed: 0,Yas,TedaviSuresi,UygulamaSuresi
Yas,1.0,-0.013252,0.000244
TedaviSuresi,-0.013252,1.0,0.077701
UygulamaSuresi,0.000244,0.077701,1.0


In [11]:
# Feature Visualization
def feature_visualization(dataframe: pd.DataFrame, column: str, categorical_threshold: int = 20):
    display(Markdown(f"## Feature Visualization\n---"))
    display(Markdown(f"**Features Name**\n"))
    print(dataframe.columns)

    if dataframe[column].dtype == "O":  # categorical
        df_counts = dataframe[column].fillna("NaN").value_counts().reset_index()
        df_counts.columns = [column, "count"]
        # Check threshold
        if len(df_counts) > categorical_threshold:
            df_counts = df_counts.head(categorical_threshold)

        fig_bar = px.bar(df_counts, x=column, y="count", text="count")
        fig_pie = px.pie(df_counts, names=column, values="count", hole=0.3)
        fig = make_subplots(rows=1, cols=2, subplot_titles=("Bar Chart", "Pie Chart"),
                            specs=[[{"type": "xy"}, {"type": "domain"}]])
        for trace in fig_bar.data:
            trace.showlegend = False
            fig.add_trace(trace, row=1, col=1)

        for trace in fig_pie.data:
            fig.add_trace(trace, row=1, col=2)
    else:  # numerical
        series = dataframe[column].dropna()
        fig_hist = px.histogram(series, x=series.name)
        fig_box = px.box(series, y=series.name, points="outliers")
        fig = make_subplots(rows=1, cols=2, subplot_titles=("Histogram", "Boxplot"))

        for trace in fig_hist.data:
            trace.showlegend = False
            fig.add_trace(trace, row=1, col=1)

        for trace in fig_box.data:
            trace.showlegend = False
            fig.add_trace(trace, row=1, col=2)
    fig.update_layout(title_text=f"Feature Visualization: {column}", height=500, width=900)
    fig.show()

feature_visualization(df, "Cinsiyet")

## Feature Visualization
---

**Features Name**


Index(['HastaNo', 'Yas', 'Cinsiyet', 'KanGrubu', 'Uyruk', 'KronikHastalik',
       'Bolum', 'Alerji', 'Tanilar', 'TedaviAdi', 'TedaviSuresi',
       'UygulamaYerleri', 'UygulamaSuresi'],
      dtype='object')


## Y-Data Profiling
---

In [13]:
profile = ProfileReport(df, title="YData Profiling Report")
profile.to_notebook_iframe()
profile.to_file("../Pusula_Data_Profiling.html")

Summarize dataset:  39%|███▉      | 7/18 [00:00<00:00, 74.36it/s, Describe variable: UygulamaSuresi]
100%|██████████| 13/13 [00:00<00:00, 1264.93it/s]00:00, 71.30it/s, Describe variable: UygulamaSuresi]
Summarize dataset: 100%|██████████| 39/39 [00:01<00:00, 26.59it/s, Completed]                             
Generate report structure: 100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  2.71it/s]


Export report to file: 100%|██████████| 1/1 [00:00<00:00, 301.49it/s]


# Data Preprocessing
---

In [14]:
# Lower all values
df = df.applymap(lambda x: x.lower() if isinstance(x, str) else x)

In [15]:
# Drop noisy data
noisy_data = ["deneme ", "onur", "xx"]
df = df[~df["TedaviAdi"].isin(noisy_data)]

In [16]:
# Replace Tanilar
df["Tanilar"] = df["Tanilar"].str.replace(",, ", ",", regex=False).str.replace(",,", ",", regex=False).str.replace(" ,", ",", regex=False).str.replace(", ", ",", regex=False)
df["Tanilar"] = df["Tanilar"].str.strip()

In [17]:
# Fill HastaNo based KanGrubu
df["KanGrubu"] = df.groupby("HastaNo")["KanGrubu"].transform(lambda x: x.fillna(method="ffill").fillna(method="bfill"))

# Fill HastaNo based KronikHastalik
df["KronikHastalik"] = df.groupby("HastaNo")["KronikHastalik"].transform(lambda x: x.fillna(method="ffill").fillna(method="bfill"))

# Fill HastaNo based Cinsiyet
df["Cinsiyet"] = df.groupby("HastaNo")["Cinsiyet"].transform(lambda x: x.fillna(method="ffill").fillna(method="bfill"))

# Fill HastaNo based Alerji
df["Alerji"] = df["Alerji"].fillna("yok")

In [18]:
# Fill Cinsiyet (NaN)
df["Cinsiyet"]= df["Cinsiyet"].fillna("Bilinmiyor")

# Fill KronikHastalik (NaN)
df["KronikHastalik"] = df["KronikHastalik"].fillna("yok")

# Fill Tanilar (NaN)
df["Tanilar"] = df["Tanilar"].fillna("bilinmiyor")

# Fill KanGrubu (NaN)
df["KanGrubu"] = df["KanGrubu"].fillna("bilinmiyor")

# Fill UygulamaYerleri (NaN)
df["UygulamaYerleri"] = df["UygulamaYerleri"].fillna("bilinmiyor")

In [19]:
def regulation_tanilar_first_per_hasta(df, group_col="HastaNo", target_col="Tanilar", threshold=80):
    # Hasta based group
    for hasta, group in df.groupby(group_col):
        # First value non NaN
        first_val = group[target_col].dropna().iloc[0] if not group[target_col].dropna().empty else np.nan
        if pd.isna(first_val):
            continue
        # Check All Value
        def replace_val(val):
            if pd.isna(val):
                return val
            score = process.extractOne(val, [first_val])[1]
            if score >= threshold:
                return first_val
            return val

        df.loc[group.index, target_col] = group[target_col].apply(replace_val)
    return df

df = regulation_tanilar_first_per_hasta(df, group_col="HastaNo", target_col="Tanilar", threshold=70)

In [20]:
# Replace Alerji
df["Alerji"] = df["Alerji"].str.replace(
    "gri̇pi̇n","gripin",regex=False).str.replace(
    "novalgin,novalgin","novalgin", regex=False).str.replace(
    "toz,gri̇pi̇n","toz,gripin", regex=False).str.replace(
    "toz,toz", "toz", regex=False).str.replace(
    "volteren","voltaren", regex=False).str.replace(
    "volteren,gri̇pi̇n", "voltaren,gripin"
    )

# Feature Engineering
---

In [21]:
# Unique KronikHastalik
unique_hastalik_per_patient = df.groupby("HastaNo")["KronikHastalik"].unique().apply(lambda x: ", ".join(x)).reset_index()
df = df.merge(unique_hastalik_per_patient, on="HastaNo", suffixes=("", "_Total"))

In [22]:
# Unique Alerji
unique_alerji_per_patient = df.groupby("HastaNo")["Alerji"].unique().apply(lambda x: ", ".join(x)).reset_index()
df = df.merge(unique_alerji_per_patient, on="HastaNo", suffixes=("", "_Total"))

In [23]:
# Unique Tanilar
unique_tanilar_per_patient = df.groupby("HastaNo")["Tanilar"].unique().apply(lambda x: ", ".join(x)).reset_index()
df = df.merge(unique_tanilar_per_patient, on="HastaNo", suffixes=("", "_Total"))

In [24]:
# Unique UygulamaYerleri
unique_uygulama_yerleri_per_patient = df.groupby("HastaNo")["UygulamaYerleri"].unique().apply(lambda x: ", ".join(x)).reset_index()
df = df.merge(unique_uygulama_yerleri_per_patient, on="HastaNo", suffixes=("", "_Total"))

In [25]:
# Unique TedaviAdi Count
df["TedaviAdi_Count"] = df.groupby("HastaNo")["TedaviAdi"].transform("nunique")

In [26]:
# Unique Bolum Count
df["Bolum_Count"] = df.groupby("HastaNo")["Bolum"].transform("nunique")

In [27]:
# Hasta Based feature
hasta_based_features = df.groupby("HastaNo").agg(
    HASTA_SESSION_COUNT=("HastaNo", "count"),
    AVERAGE_UYGULAMA_SURESI_DURATION=("UygulamaSuresi", "mean")
).reset_index()

df = df.merge(hasta_based_features, on="HastaNo", how="left")

# Tani count
df["TANI_COUNT"] = df["Tanilar_Total"].apply(lambda x: len(str(x).split(",")) if pd.notna(x) else 0)

# Uygulama yeri count
df["UYGULAMA_YERI_COUNT"] = df["UygulamaYerleri_Total"].apply(lambda x: len(str(x).split(",")) if pd.notna(x) else 0)

# Yas categorize
# Q1, Q2, Q3
Q1 = df["Yas"].quantile(0.25)
Q2 = df["Yas"].quantile(0.50)
Q3 = df["Yas"].quantile(0.75)

# IQR
IQR = Q3 - Q1

# low - up limit
lower_limit = Q1 - 1.5 * IQR
upper_limit = Q3 + 1.5 * IQR

bins = [0, 11, 38, 47, 56, 100]
labels = ["0-11", "12-38", "39-47", "48-56", "56+"]
df["YAS_BIN"] = pd.cut(df["Yas"], bins=bins, labels=labels, right=True)

# Kronik hastalık count
df["KRONIK_HASTALIK_COUNT"] = df["KronikHastalik_Total"].apply(
    lambda x: len(str(x).split(",")) if pd.notna(x) and str(x).strip().lower() != "yok" else 0
)

# Kronik hastalık count
df["Alerji_COUNT"] = df["Alerji_Total"].apply(
    lambda x: len(str(x).split(",")) if pd.notna(x) and str(x).strip().lower() != "yok" else 0
)

In [28]:
### KNN IMPUTER (Bolum) ###
# Label encoding
le = LabelEncoder()
bolum_non_null = df["Bolum"].dropna()
le.fit(bolum_non_null)
df["Bolum_enc"] = df["Bolum"].map(lambda x: le.transform([x])[0] if pd.notna(x) else None)

# KNN Imputer (k=3)
imputer = KNNImputer(n_neighbors=3)
df["Bolum_enc"] = imputer.fit_transform(df[["Bolum_enc"]])

# Bolum filled
df["Bolum_filled"] = df["Bolum_enc"].round().astype(int).map(lambda x: le.inverse_transform([x])[0])

# Change
df["Bolum"] = df["Bolum_filled"]

# Clean
df.drop(["Bolum_enc", "Bolum_filled"], axis=1, inplace=True)

In [29]:
# Drop unnecessary columns
df = df.drop(["KronikHastalik", "Alerji", "Tanilar", "UygulamaYerleri", "KronikHastalik_Total", "Alerji_Total"], axis=1)

In [30]:
# One-Hot Encoding
df = pd.get_dummies(df, columns=["Cinsiyet", "KanGrubu", "Uyruk", "Bolum", "YAS_BIN"], drop_first=True)

# Preparation for Model
---

In [31]:
# One-hot columns
one_hot_cols = [col for col in df.columns if any(prefix in col for prefix in
                                                 ["Cinsiyet_", "KanGrubu_", "Uyruk_", "Bolum_", "YAS_BIN_"])]
# Aggregation dictionary
agg_dict = {
    "Yas": "mean",
    'TedaviAdi': lambda x: ', '.join(x.unique()),
    "TedaviSuresi": "mean",
    "UygulamaSuresi": "mean",
    "TedaviAdi_Count": "mean",
    "Bolum_Count": "mean",
    "HASTA_SESSION_COUNT": "mean",
    "AVERAGE_UYGULAMA_SURESI_DURATION": "mean",
    "TANI_COUNT": "mean",
    "UYGULAMA_YERI_COUNT": "max",
    "KRONIK_HASTALIK_COUNT": "mean",
    "Alerji_COUNT": "mean",
    "Tanilar_Total": lambda x: ", ".join(set(x)),
    "UygulamaYerleri_Total": lambda x: ", ".join(set(x))
}
# Add one-hot columns to aggregation dictionary
for col in one_hot_cols:
    agg_dict[col] = "max"
# Group by HastaNo
grouped_df = df.groupby("HastaNo", as_index=False).agg(agg_dict)

In [32]:
# Grouped df
grouped_df.head()

Unnamed: 0,HastaNo,Yas,TedaviAdi,TedaviSuresi,UygulamaSuresi,TedaviAdi_Count,Bolum_Count,HASTA_SESSION_COUNT,AVERAGE_UYGULAMA_SURESI_DURATION,TANI_COUNT,UYGULAMA_YERI_COUNT,KRONIK_HASTALIK_COUNT,Alerji_COUNT,Tanilar_Total,UygulamaYerleri_Total,Cinsiyet_erkek,Cinsiyet_kadın,KanGrubu_0 rh-,KanGrubu_a rh+,KanGrubu_a rh-,KanGrubu_ab rh+,KanGrubu_ab rh-,KanGrubu_b rh+,KanGrubu_b rh-,KanGrubu_bilinmiyor,Uyruk_azerbaycan,Uyruk_libya,Uyruk_tokelau,Uyruk_türkiye,Bolum_genel cerrahi,Bolum_göğüs hastalıkları,Bolum_i̇ç hastalıkları,Bolum_kalp ve damar cerrahisi,Bolum_kardiyoloji,Bolum_laboratuar,Bolum_nöroloji,Bolum_ortopedi ve travmatoloji,Bolum_tıbbi onkoloji,YAS_BIN_12-38,YAS_BIN_39-47,YAS_BIN_48-56,YAS_BIN_56+
0,145134,60.0,ayak bileği,5.0,20.0,1.0,1,1.0,20.0,1.0,1,3.0,1.0,ayak bileği ve ayağın yüzeysel yaralanması,ayak bileği,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True
1,145135,28.0,dorsalji -boyun+trapez+skapular,15.0,17.0,1.0,1,5.0,17.0,4.0,3,4.0,0.0,"omuzun darbe sendromu,dorsalji̇,di̇ğer,servi̇kotorasi̇k bölge","boyun, boyun,sırt",True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False
2,145136,60.0,parapleji,10.0,23.333333,1.0,1,3.0,23.333333,1.0,1,0.0,2.0,parapleji ve tetrapleji,bilinmiyor,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True
3,145137,65.0,"el rehabilitasyonu, el rehabilitasyonu-el rehabilitasyonu, kalça ağrısı+trokanterik bursit",15.0,15.833333,3.0,1,12.0,15.833333,4.0,3,4.0,1.0,"artroz,tanımlanmamış,el, trokanterik bursit","sol el bilek bölgesi, bilinmiyor, kalça",False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True
4,145138,61.0,gonartroz-meniskopati,18.0,15.0,1.0,1,5.0,15.0,1.0,1,4.0,0.0,eklem ağrısı,diz,False,True,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True


## TF-IDF Vectors
---

In [33]:
# TF-IDF vectorizer
tfidf_tanilar = TfidfVectorizer(max_features=100)
tfidf_uygulama = TfidfVectorizer(max_features=100)
tfidf_tedavi = TfidfVectorizer(max_features=100)  # TedaviAdi için

# Tanilar_Total TF-IDF
tfidf_tanilar_matrix = tfidf_tanilar.fit_transform(grouped_df["Tanilar_Total"].fillna(""))
tfidf_tanilar_df = pd.DataFrame(tfidf_tanilar_matrix.toarray(),
                                columns=[f"tfidf_tanilar_{w}" for w in tfidf_tanilar.get_feature_names_out()],
                                index=grouped_df.index)

# UygulamaYerleri_Total TF-IDF
tfidf_uygulama_matrix = tfidf_uygulama.fit_transform(grouped_df["UygulamaYerleri_Total"].fillna(""))
tfidf_uygulama_df = pd.DataFrame(tfidf_uygulama_matrix.toarray(),
                                 columns=[f"tfidf_uyg_{w}" for w in tfidf_uygulama.get_feature_names_out()],
                                 index=grouped_df.index)

# TedaviAdi TF-IDF
tfidf_tedavi_matrix = tfidf_tedavi.fit_transform(grouped_df["TedaviAdi"].fillna(""))
tfidf_tedavi_df = pd.DataFrame(tfidf_tedavi_matrix.toarray(),
                               columns=[f"tfidf_tedavi_{w}" for w in tfidf_tedavi.get_feature_names_out()],
                               index=grouped_df.index)

# Concat dataframes
df_final = pd.concat([grouped_df.reset_index(drop=True),
                      tfidf_tanilar_df.reset_index(drop=True),
                      tfidf_uygulama_df.reset_index(drop=True),
                      tfidf_tedavi_df.reset_index(drop=True)], axis=1)

In [34]:
# Drop text columns
df_final = df_final.drop(["TedaviAdi", "Tanilar_Total", "UygulamaYerleri_Total"], axis=1)
# Final df
df_final.head()

Unnamed: 0,HastaNo,Yas,TedaviSuresi,UygulamaSuresi,TedaviAdi_Count,Bolum_Count,HASTA_SESSION_COUNT,AVERAGE_UYGULAMA_SURESI_DURATION,TANI_COUNT,UYGULAMA_YERI_COUNT,KRONIK_HASTALIK_COUNT,Alerji_COUNT,Cinsiyet_erkek,Cinsiyet_kadın,KanGrubu_0 rh-,KanGrubu_a rh+,KanGrubu_a rh-,KanGrubu_ab rh+,KanGrubu_ab rh-,KanGrubu_b rh+,KanGrubu_b rh-,KanGrubu_bilinmiyor,Uyruk_azerbaycan,Uyruk_libya,Uyruk_tokelau,Uyruk_türkiye,Bolum_genel cerrahi,Bolum_göğüs hastalıkları,Bolum_i̇ç hastalıkları,Bolum_kalp ve damar cerrahisi,Bolum_kardiyoloji,Bolum_laboratuar,Bolum_nöroloji,Bolum_ortopedi ve travmatoloji,Bolum_tıbbi onkoloji,YAS_BIN_12-38,YAS_BIN_39-47,YAS_BIN_48-56,YAS_BIN_56+,tfidf_tanilar_algonörodistrofi,tfidf_tanilar_artrozu,tfidf_tanilar_ayak,tfidf_tanilar_ağrı,tfidf_tanilar_ağrısı,tfidf_tanilar_aşil,tfidf_tanilar_bacak,tfidf_tanilar_baş,tfidf_tanilar_başka,tfidf_tanilar_bel,tfidf_tanilar_bi,tfidf_tanilar_bilateral,tfidf_tanilar_bileği,tfidf_tanilar_bilinmiyor,tfidf_tanilar_birden,tfidf_tanilar_boynuzunun,tfidf_tanilar_boyun,tfidf_tanilar_bozuklukları,tfidf_tanilar_bozukluğu,tfidf_tanilar_burkulma,tfidf_tanilar_bölge,tfidf_tanilar_darbe,tfidf_tanilar_di,tfidf_tanilar_disk,tfidf_tanilar_diz,tfidf_tanilar_diğer,tfidf_tanilar_dorsalji,tfidf_tanilar_düzeyinde,tfidf_tanilar_eklem,tfidf_tanilar_eklemde,tfidf_tanilar_eklemin,tfidf_tanilar_ekleminin,tfidf_tanilar_eksikliği,tfidf_tanilar_ekstremite,tfidf_tanilar_el,tfidf_tanilar_epikondilit,tfidf_tanilar_fazla,tfidf_tanilar_fibromiyalji,tfidf_tanilar_gerilmesi,tfidf_tanilar_gonartroz,tfidf_tanilar_hastalık,tfidf_tanilar_hastalıklar,tfidf_tanilar_hemoraji,tfidf_tanilar_ile,tfidf_tanilar_implantı,tfidf_tanilar_inkontinans,tfidf_tanilar_kal,tfidf_tanilar_kalkaneal,tfidf_tanilar_kalp,tfidf_tanilar_kalça,tfidf_tanilar_kas,tfidf_tanilar_koksartroz,tfidf_tanilar_kondromalazia,tfidf_tanilar_kontraktürü,tfidf_tanilar_kotorasi,tfidf_tanilar_kırığı,tfidf_tanilar_lateral,tfidf_tanilar_lumbosakral,tfidf_tanilar_malign,tfidf_tanilar_medial,tfidf_tanilar_menisküs,tfidf_tanilar_menisküsün,tfidf_tanilar_muscular,tfidf_tanilar_neoplazmı,tfidf_tanilar_ntervertebral,tfidf_tanilar_omuzun,tfidf_tanilar_ortopedik,tfidf_tanilar_parmak,tfidf_tanilar_patella,tfidf_tanilar_patellanın,tfidf_tanilar_pelvik,tfidf_tanilar_posterior,tfidf_tanilar_primer,tfidf_tanilar_radikülopati,tfidf_tanilar_rden,tfidf_tanilar_sendromu,tfidf_tanilar_serebrovasküler,tfidf_tanilar_servi,tfidf_tanilar_servikal,tfidf_tanilar_spinal,tfidf_tanilar_spur,tfidf_tanilar_stenoz,tfidf_tanilar_tanımlanmamış,tfidf_tanilar_tanımlanmış,tfidf_tanilar_tendon,tfidf_tanilar_tendonunun,tfidf_tanilar_torasi,tfidf_tanilar_ve,tfidf_tanilar_veya,tfidf_tanilar_vitamin,tfidf_tanilar_yaralanması,tfidf_tanilar_yer,tfidf_tanilar_yerde,tfidf_tanilar_yerleşim,tfidf_tanilar_yetmezliği,tfidf_tanilar_yırtığı,tfidf_tanilar_çıkık,tfidf_tanilar_üriner,tfidf_tanilar_ğer,tfidf_tanilar_şimdiki,tfidf_uyg_ayak,tfidf_uyg_bel,tfidf_uyg_bilek,tfidf_uyg_bileği,tfidf_uyg_bilinmiyor,tfidf_uyg_boyun,tfidf_uyg_bölgesi,tfidf_uyg_dirsek,tfidf_uyg_diz,tfidf_uyg_el,tfidf_uyg_kalça,tfidf_uyg_omuz,tfidf_uyg_sağ,tfidf_uyg_sol,tfidf_uyg_sırt,tfidf_uyg_tüm,tfidf_uyg_vücut,tfidf_uyg_yüz,tfidf_tedavi_adezif,tfidf_tedavi_algonörodistrofi,tfidf_tedavi_alt,tfidf_tedavi_artroskopi,tfidf_tedavi_atrofi,tfidf_tedavi_ayak,tfidf_tedavi_ağrsıı,tfidf_tedavi_ağrısı,tfidf_tedavi_aşil,tfidf_tedavi_bağ,tfidf_tedavi_bel,tfidf_tedavi_beyin,tfidf_tedavi_bilateral,tfidf_tedavi_bileği,tfidf_tedavi_boyun,tfidf_tedavi_bozukluğu,tfidf_tedavi_bursit,tfidf_tedavi_di,tfidf_tedavi_disk,tfidf_tedavi_distorsiyonu,tfidf_tedavi_diz,tfidf_tedavi_dorsal,tfidf_tedavi_dorsalji,tfidf_tedavi_düşük,tfidf_tedavi_egzersizi,tfidf_tedavi_eklem,tfidf_tedavi_ekstremite,tfidf_tedavi_el,tfidf_tedavi_epikondilit,tfidf_tedavi_erken,tfidf_tedavi_fasiit,tfidf_tedavi_ftr,tfidf_tedavi_gonartroz,tfidf_tedavi_hemiparezi,tfidf_tedavi_hemipleji,tfidf_tedavi_humerus,tfidf_tedavi_impingement,tfidf_tedavi_implant,tfidf_tedavi_implantı,tfidf_tedavi_kalkaneal,tfidf_tedavi_kalça,tfidf_tedavi_kas,tfidf_tedavi_koksartroz1,tfidf_tedavi_kondromalezi,tfidf_tedavi_kontraktürü,tfidf_tedavi_kuvvetlendirme,tfidf_tedavi_kırığı,tfidf_tedavi_lusu,tfidf_tedavi_meni,tfidf_tedavi_meniskopati,tfidf_tedavi_menisküs,tfidf_tedavi_meralgia,tfidf_tedavi_mobilizasyon,tfidf_tedavi_mpi,tfidf_tedavi_mpingemen,tfidf_tedavi_mpingement,tfidf_tedavi_mplanti,tfidf_tedavi_mplantı,tfidf_tedavi_muskuler,tfidf_tedavi_ngement,tfidf_tedavi_omuz,tfidf_tedavi_op,tfidf_tedavi_parapleji,tfidf_tedavi_parmak,tfidf_tedavi_patella,tfidf_tedavi_plantar,tfidf_tedavi_post,tfidf_tedavi_postop,tfidf_tedavi_programı,tfidf_tedavi_protezi,tfidf_tedavi_ptr,tfidf_tedavi_radikülopati,tfidf_tedavi_reh,tfidf_tedavi_rehabilitasyon,tfidf_tedavi_rehabilitasyonu,tfidf_tedavi_ri,tfidf_tedavi_rüptürü,tfidf_tedavi_sağ,tfidf_tedavi_sendromu,tfidf_tedavi_servikal,tfidf_tedavi_sk,tfidf_tedavi_skolyoz,tfidf_tedavi_sküs,tfidf_tedavi_sol,tfidf_tedavi_spinal,tfidf_tedavi_spur,tfidf_tedavi_strain,tfidf_tedavi_sırt,tfidf_tedavi_tam,tfidf_tedavi_tami,tfidf_tedavi_tendinit,tfidf_tedavi_tendiniti,tfidf_tedavi_tendon,tfidf_tedavi_tenosinovit,tfidf_tedavi_trapez,tfidf_tedavi_trapezz,tfidf_tedavi_ve,tfidf_tedavi_yaralanması,tfidf_tedavi_yürüme,tfidf_tedavi_öçb
0,145134,60.0,5.0,20.0,1.0,1,1.0,20.0,1.0,1,3.0,1.0,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,0.0,0.0,0.573378,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.516056,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.372305,0.0,0.0,0.516056,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.663306,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.748348,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,145135,28.0,15.0,17.0,1.0,1,5.0,17.0,4.0,3,4.0,0.0,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.268173,0.388991,0.274633,0.0,0.0,0.0,0.256229,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.398139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.383265,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.370067,0.0,0.343385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.274633,0.0,0.0,0.0,0.0,0.0,0.0,0.82044,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.571733,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.582217,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.467239,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.665365,0.0,0.0,0.0,0.0,0.0
2,145136,60.0,10.0,23.333333,1.0,1,3.0,23.333333,1.0,1,0.0,2.0,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,145137,65.0,15.0,15.833333,3.0,1,12.0,15.833333,4.0,3,4.0,1.0,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.873365,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.487066,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.478529,0.0,0.341712,0.0,0.247198,0.0,0.0,0.478529,0.491843,0.0,0.0,0.349623,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.215887,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.268371,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.636188,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.202143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.660131,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,145138,61.0,18.0,15.0,1.0,1,5.0,15.0,1.0,1,4.0,0.0,False,True,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,0.0,0.0,0.0,0.0,0.685802,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.727789,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.702916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.711273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
