Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
127 lines (74 sloc) 2.67 KB
# coding: utf-8
# In[1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')
import warnings
get_ipython().magic("config InlineBackend.figure_format = 'png' #set 'png' here when working on notebook")
warnings.filterwarnings('ignore')
# Set some parameters to get good visuals - style to ggplot and size to 15,10
pd.set_option('display.precision',4)
pd.set_option('display.width',170, 'display.max_rows',200, 'display.max_columns',900)
# In[2]:
df = pd.read_csv("mortgage_arrears.csv")
# In[3]:
print ("\n\n---------------------")
print ("DATA SET INFORMATION")
print ("---------------------")
print ("Shape of training set:", df.shape, "\n")
print ("Column Headers:", list(df.columns.values), "\n")
print (df.dtypes)
# In[4]:
import re
missing_values = []
nonumeric_values = []
print ("DATA SET INFORMATION")
print ("========================\n")
for column in df:
# Find all the unique feature values
uniq = df[column].unique()
print ("'{}' has {} unique values" .format(column,uniq.size))
if (uniq.size > 10):
print("~~Listing up to 10 unique values~~")
print (uniq[0:10])
print ("\n-----------------------------------------------------------------------\n")
# Find features with missing values
if (True in pd.isnull(uniq)):
s = "{} has {} missing" .format(column, pd.isnull(df[column]).sum())
missing_values.append(s)
# Find features with non-numeric values
for i in range (1, np.prod(uniq.shape)):
if (re.match('nan', str(uniq[i]))):
break
if not (re.search('(^\d+\.?\d*$)|(^\d*\.?\d+$)', str(uniq[i]))):
nonumeric_values.append(column)
break
print ("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")
print ("Features with missing values:\n{}\n\n" .format(missing_values))
print ("Features with non-numeric values:\n{}" .format(nonumeric_values))
print ("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")
# ##### 26 in the Republic of Ireland and 6 in Northern Ireland .
# This project is about Republic of Ireland
# ##### State
# In[5]:
list(df.columns)
# In[23]:
df.describe()
# In[16]:
plt.boxplot(df['2017_LA_Arrears'])
# In[35]:
plt.subplot(1, 2, 1)
plt.boxplot(df['2016_Loans_Paid'])
plt. title("Loan Paid in 2016", y=1.08)
plt.subplot(1, 2, 2)
plt.boxplot(df['2016_Sum_of_Price'])
plt. title("Sum of House Prices in 2016", y=1.08)
plt.tight_layout()
plt.savefig("Boxplot")
# In[17]:
plt.boxplot(df['2017_Loans Approved'])
# In[ ]:
'2017_LA_Arrears',
'2017_Loans Approved',
You can’t perform that action at this time.