In [376]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from ipywidgets import interact
from feature_engine.encoding import RareLabelEncoder
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv("train.csv")
df = df.astype(np.float64,errors="ignore")


def Gorsellestir(degisken1,hedefDegisken,lineSelect):
    degisken = degisken1 + "?"
    
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    barChartDf = pd.DataFrame(df.groupby([degisken]).size(),columns=["Toplam"]).reset_index()
    barChartDf["Yüzde"] = barChartDf.Toplam / barChartDf.Toplam.sum()
    
    figBar = px.bar(barChartDf, x=degisken, y="Yüzde",
                    color_discrete_sequence=px.colors.qualitative.Set2,hover_data=[barChartDf.Toplam])
    
    
    x = df.groupby([degisken,hedefDegisken]).size()
    y = df.groupby([degisken]).size()
    lineChartDf = pd.DataFrame(df.groupby([degisken,hedefDegisken]).size(),columns=["Yüzde"]).reset_index()
    lineChartDf["Yüzde"] = (x/y).values

    figLine = px.line(lineChartDf,x=degisken,y="Yüzde",color=hedefDegisken,
                      color_discrete_sequence=px.colors.qualitative.Dark2)
    

    lineIndex = np.ravel([np.where(lineChartDf[hedefDegisken].unique() == lineSelect)])
    
    for figdata in lineIndex:
        fig.add_trace(figLine["data"][figdata],secondary_y=True)

    for figdata in range(len(figBar["data"])):
        fig.add_trace(figBar["data"][figdata])

    fig.update_layout(barmode="group" , title_text="{}-{}".format(degisken1,hedefDegisken) ,
                      legend_title_text=hedefDegisken , yaxis_tickformat = '.1%', yaxis2_tickformat = '.1%',
                      yaxis_title="Toplam Oran" , yaxis2_title="Oran" , xaxis_title="{}".format(degisken1))
    
    fig.show()
    

def CategoricProcessing(degisken1,tol):
    enc = RareLabelEncoder(tol=tol, n_categories=0, max_n_categories=None, replace_with='Others',
                           variables=None, ignore_format=True)
    
    df[degisken1+"?"] = enc.fit_transform(df[[degisken1]].fillna("NA"))


def nonCategoricProcessing(degisken1,q):
    _, edges = pd.cut(df[degisken1], bins=q, retbins=True)
    labels = [f'{abs(edges[i]):.2f}-{edges[i+1]:.2f}' for i in range(len(edges)-1)]
    seri = pd.cut(df[degisken1] , bins=q,labels=labels)
    
    df[degisken1+"?"] = seri.apply(lambda x:str(x).replace("(","").replace("]","").replace(", ","-"))
    



def VeriGorsellestirme(degisken1,hedefDegisken,q,tol,line):
    if degisken1 == "Seç" or hedefDegisken == "Seç":
        pass
    
    elif df[degisken1].dtype == "object":
        lineWidget.options = df[hedefDegisken].unique()
        CategoricProcessing(degisken1,tol)
        Gorsellestir(degisken1,hedefDegisken,line)
    else:
        lineWidget.options = df[hedefDegisken].unique()
        nonCategoricProcessing(degisken1,q)
        Gorsellestir(degisken1,hedefDegisken,line)
        
        
    try:
        df.drop(columns=["{}?".format(degisken1)],inplace=True)
    except:
        pass
    pass
   


degisken1 = widgets.Select(options=["Seç"] + list(df.columns),description='İlk Değişken')

hedefDegisken = widgets.Select( options=["Seç"] + list(df.columns),description='Hedef Değişken')

q = widgets.IntSlider(value=10,min=1,max=50,step=1,description='Nümerik değişken dilim sayısı',disabled=False,
                      continuous_update=False,orientation='horizontal',readout=True,readout_format='d')

tol = widgets.FloatSlider(value=0.05,min=0,max=1.0,step=0.001,description="Others'ın için minimum frekans (Sadece Kategorik)",
                            continuous_update=False,orientation='horizontal',readout=True,readout_format='.3f')



lineWidget = widgets.Select(options=["Seç"],description='Değişken Değerleri',disabled=False)

interact(VeriGorsellestirme,
         degisken1=degisken1,
         hedefDegisken=hedefDegisken,
         q=q,
         tol=tol,
         line=lineWidget);