# Table Data

<b>Age </b>(numeric)<br>
<b>Sex </b>(text: male, female)<br>
<b>Job </b>(numeric: 0 - unskilled and non-resident, 1 - unskilled and resident, 2 - skilled, 3 - highly skilled)<br>
<b>Housing</b> (text: own, rent, or free)<br>
<b>Saving accounts</b> (text - little, moderate, quite rich, rich)<br>
<b>Checking account </b>(numeric, in DM - Deutsch Mark)<br>
<b>Credit amount</b> (numeric, in DM)<br>
<b>Duration</b> (numeric, in month)<br>
<b>Purpose</b>(text: car, furniture/equipment, radio/TV, domestic appliances, repairs, education, business, vacation/others<br>
<b>Risk </b> (Value target - Good or Bad Risk)<br>

<a id="Librarys"></a> <br>


In [None]:

import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt

df_credit = pd.read_csv("../input/german-credit-data-with-risk/german_credit_data.csv",index_col=0)

<a id="Known"></a> <br>
# Look at the data: 

In [None]:
 print(df_credit.info())

In [None]:

print(df_credit.nunique())
print(df_credit.head())

# Explorations: <a id="Explorations"></a> <br>



In [None]:

import plotly.offline as py 
py.init_notebook_mode(connected=True) 
import plotly.graph_objs as go
import plotly.tools as tls 
import warnings 
from collections import Counter 
trace0 = go.Bar(x = df_credit[df_credit["Risk"]== 'good']["Risk"].value_counts().index.values,y = df_credit[df_credit["Risk"]== 'good']["Risk"].value_counts().values,name='Good credit')
trace1 = go.Bar(x = df_credit[df_credit["Risk"]== 'bad']["Risk"].value_counts().index.values,y = df_credit[df_credit["Risk"]== 'bad']["Risk"].value_counts().values,name='Bad credit')
data = [trace0, trace1]
layout = go.Layout()
layout = go.Layout(yaxis=dict(title='Count'),xaxis=dict(title='Risk Variable'),title='Target variable distribution')
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')

In [None]:
df_good = df_credit.loc[df_credit["Risk"] == 'good']['Age'].values.tolist()
df_bad = df_credit.loc[df_credit["Risk"] == 'bad']['Age'].values.tolist()
df_age = df_credit['Age'].values.tolist()
trace0 = go.Histogram(x=df_good,histnorm='probability',name="Good Credit")
trace1 = go.Histogram(x=df_bad,histnorm='probability',name="Bad Credit")
trace2 = go.Histogram(x=df_age,histnorm='probability',name="Overall Age")
fig = tls.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],subplot_titles=('Good','Bad', 'General Distribuition'))
fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 1, 2)
fig.append_trace(trace2, 2, 1)
fig['layout'].update(showlegend=True, title='Age Distribuition', bargap=0.05)
py.iplot(fig, filename='custom-sized-subplot-with-subplot-titles')

In [None]:
df_good = df_credit[df_credit["Risk"] == 'good']
df_bad = df_credit[df_credit["Risk"] == 'bad']

fig, ax = plt.subplots(nrows=2, figsize=(12,8))
plt.subplots_adjust(hspace = 0.4, top = 0.8)

g1 = sns.distplot(df_good["Age"], ax=ax[0], color="g")
g1 = sns.distplot(df_bad["Age"], ax=ax[0], color='r')
g1.set_title("Age Distribuition", fontsize=15)
g1.set_xlabel("Age")
g1.set_xlabel("Frequency")

g2 = sns.countplot(x="Age",data=df_credit, palette="hls", ax=ax[1], hue = "Risk")
g2.set_title("Age Counting by Risk", fontsize=15)
g2.set_xlabel("Age")
g2.set_xlabel("Count")
plt.show()