In [1]:
import pandas as pd
import numpy as np

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

This notebook shows some basics stats about securities labeled as NC1

## Load Data

In [2]:
df_dict = {}
for n in range(2,9):
    file_name = 'trimmed_'+ str(2000+n) +'_v2.csv'
    data = pd.read_csv(file_name)
    df_dict[str(2000+n)] = data

In [67]:
df_dict.keys()

dict_keys(['2002', '2003', '2004', '2005', '2006', '2007', '2008'])

In [68]:
for year in df_dict:
    print(year)
    

2002
2003
2004
2005
2006
2007
2008


In [70]:
#Get all rows with NC1
nc1 = {}
for year in df_dict:
    nc1[year] = df_dict[year].loc[df_dict[year]['Label'] == 'NC1(failing)']

## Data Validation

In [5]:
print('we have {} rows of data'.format(data.shape[0]))
print('we have {} rows of NC1'.format(nc1.shape[0]))
print('percentage: {:.2f}%'.format(nc1.shape[0]/data.shape[0]*100))

we have 4450 rows of data
we have 461 rows of NC1
percentage: 10.36%


In [6]:
#groupby label and check # unique value
print('List of all labels count:')
data.groupby('Label')['CUSIP'].nunique()

List of all labels count:


Label
0                                41
1.4                             133
FE                              397
IOfailing                         1
IOpassMED                       114
IOpassMEY                        79
MED                            1220
MEY                            1251
NC1(failing)                    461
NC2(z>1 nsf)                     24
NC3(z>1, not paid off, nsf)      24
NMEm                            638
NMEs                              7
Name: CUSIP, dtype: int64

In [7]:
valid = data.groupby('Label')['CUSIP'].nunique()['NC1(failing)'] == nc1.shape[0]
print('Does row of nc1 equals # of nc1 in the data? {}'.format(valid))

Does row of nc1 equals # of nc1 in the data? True


## Explore NC1

In [8]:
count = nc1.groupby('Prospectus')['CUSIP'].nunique().count()
print('There are ' + str(count) + ' of prospectus that have NC1')
print("On average {:.2f} NC1 per prospectus that have NC1".format(nc1.shape[0]/count))

There are 150 of prospectus that have NC1
On average 3.07 NC1 per prospectus that have NC1


In [9]:
show_top = 5
print('Top {} of the MTG_TRANCHE_TYP_LONG among NC1 are:'.format(show_top))
nc1.groupby('MTG_TRANCHE_TYP_LONG')['CUSIP'].nunique().sort_values(ascending=False).head(show_top)

Top 5 of the MTG_TRANCHE_TYP_LONG among NC1 are:


MTG_TRANCHE_TYP_LONG
SUB,CSTR,NAS    281
SUB,NAS         126
MEZ,FLT,STEP     10
SUB,CSTR,AS       7
SUB,CSTR          4
Name: CUSIP, dtype: int64

In [10]:
valid = (data.loc[data['Label'] != 'NC1(failing)'].shape[0]) == data.shape[0]-nc1.shape[0]
print("Is [data['Label'] != 'NC1(failing)'] valid? {}".format(valid))

Is [data['Label'] != 'NC1(failing)'] valid? True


In [11]:
print('Top {} of the MTG_TRANCHE_TYP_LONG among non-NC1s are:'.format(show_top))
data.loc[data['Label'] != 'NC1(failing)'].groupby('MTG_TRANCHE_TYP_LONG')['CUSIP'].nunique().sort_values(ascending=False).head(show_top)

Top 5 of the MTG_TRANCHE_TYP_LONG among non-NC1s are:


MTG_TRANCHE_TYP_LONG
SUB,CSTR,NAS        441
SEQ,AS              262
SUB,NAS             238
MEZ,FLT,STEP,IRC    227
CSTR,PT,AS          189
Name: CUSIP, dtype: int64

In 2004, MTG_TRANCHE_TYP_LONG among NC1 and non-NC1 data set, SUB (Subordinated), CSTR(Collateral Strip Rate), and NAS(Non-Accelerated Security) seems to be the top components. <br>

In 2005, MTG_TRANCHE_TYP_LONG among NC1 and non-NC1 data set, FLT(Floater), STEP(Stepped Rate Bond), and MEZ(Mezzanine) seems to be the top components. <br>

For NC1 in 2004, I couldn't really see big difference between both groups.<br>
For NC1 in 2005, there is really big difference between both groups.<br>

[See here for more info about MTG_TRANCHE_TYP](https://docs.google.com/spreadsheets/d/1MOwPnTr2owqPoJNy73U7UEc3z1RvtzELOCM0ZFxBJU8/edit?usp=sharing)

In [12]:
total = nc1['MTG ORIG AMT'].sum()
print('Sum of MTG ORIG AMT among NC1 = {:.2f}'.format(total))

Sum of MTG ORIG AMT among NC1 = 2113.59


In [13]:
print('Desciption of MTG ORIG AMT among NC1:')
nc1['MTG ORIG AMT'].describe()

Desciption of MTG ORIG AMT among NC1:


count    461.000000
mean       4.584784
std       29.141494
min        0.048000
25%        0.644000
50%        1.200900
75%        2.273000
max      560.470000
Name: MTG ORIG AMT, dtype: float64

## To do
- Look into Bloomberg (Paydown Infomation?)
- Look why payment just suddenly stops instead of gradually decreased and stoped
- Why is CUSIP duplicated?


## Why is CUSIP duplicated?
The folling shows that there are duplicate CUSIP in the data set

In [14]:
data.groupby('Label')['CUSIP'].count()

Label
0                                41
1.4                             136
FE                              400
IOfailing                         1
IOpassMED                       114
IOpassMEY                        81
MED                            1232
MEY                            1285
NC1(failing)                    461
NC2(z>1 nsf)                     24
NC3(z>1, not paid off, nsf)      24
NMEm                            644
NMEs                              7
Name: CUSIP, dtype: int64

In [15]:
data.groupby('Label')['CUSIP'].nunique()

Label
0                                41
1.4                             133
FE                              397
IOfailing                         1
IOpassMED                       114
IOpassMEY                        79
MED                            1220
MEY                            1251
NC1(failing)                    461
NC2(z>1 nsf)                     24
NC3(z>1, not paid off, nsf)      24
NMEm                            638
NMEs                              7
Name: CUSIP, dtype: int64

All the Falses show two are in different sizes

In [16]:
data.groupby('Label')['CUSIP'].count() == data.groupby('Label')['CUSIP'].nunique()

Label
0                               True
1.4                            False
FE                             False
IOfailing                       True
IOpassMED                       True
IOpassMEY                      False
MED                            False
MEY                            False
NC1(failing)                    True
NC2(z>1 nsf)                    True
NC3(z>1, not paid off, nsf)     True
NMEm                           False
NMEs                            True
Name: CUSIP, dtype: bool

These are what CUSIPs are duplicated.

In [17]:
data.groupby(by=['Label','CUSIP']).size().sort_values(ascending=False)

Label      CUSIP    
MEY        126671Y75    3
           126671Z74    3
MED        126671Y67    3
           1266712F2    3
           1266712B1    3
           1266712A3    3
MEY        126671Y83    3
MED        126671Z90    3
           126671Z82    3
MEY        126671Z66    3
           126671Z58    3
           126671Y91    3
           126671Z25    3
           126671Z33    3
           126671Z41    3
           576433QQ2    2
1.4        073879LU0    2
           073879LV8    2
MEY        576433QV1    2
FE         073879LT3    2
           57643MEZ3    2
IOpassMEY  57643MEJ9    2
1.4        57643MFA7    2
MEY        576433QS8    2
           576433QT6    2
           576433QU3    2
           576433QW9    2
           576433QR0    2
FE         576433RD0    2
MEY        576433RB4    2
                       ..
           61748HHG9    1
           61748HHF1    1
           61748HHE4    1
           61748HHD6    1
           61748HHC8    1
           61748HHB0    1
           61748H

In [18]:
nc1['CUSIP']

79      45660N7S8
80      45660N7T6
81      45660N7U3
82      45660N7V1
83      45660N7W9
156     885220GV9
157     885220GX5
201     55265WBX0
202     55265WBY8
217     61748HCT6
248     759950EX2
261     05946XNJ8
262     05946XNK5
263     05946XNL3
264     05946XNM1
275     61748HGM7
276     61748HGN5
277     61748HGP0
289     06051GBK4
290     06051GBL2
310     576433SS6
311     576433ST4
324     86358ENQ5
342     12667FNT8
343     12667FNU5
344     12667FNV3
418     07384MW24
419     07384MW32
461     05948KQG7
462     05948KQY8
          ...    
4103    61748HFR7
4114    76112BAH3
4125    86358EPK6
4158    45660N7E9
4159    45660N7G4
4166    07384MX56
4167    07384MX64
4168    07384MX72
4169    07384MX80
4197    317350AJ5
4262    12669FSQ7
4263    12669FSR5
4279    576434VH4
4280    576434VJ0
4281    576434VK7
4286    576434VQ4
4287    576434VR2
4316    949758AC0
4332    12667FN43
4333    12667FN50
4334    12667FN68
4335    12667FN76
4357    576434SA3
4358    576434SB1
4414    07

In [19]:
nc1

Unnamed: 0,Year,PID,Prospectus,Class,norm_class,Name,Current_Balance,Zero-Balance Payment Period Number,Sum Principle Paid,MTG ORIG AMT,...,CUSIP,MTG_TRANCHE_TYP_LONG,Moody Rating,Initial Moody Rating,Bloomberg Composite,HCLB,MTG INT SHRTFLL,HIST INTRST SHRTFLL,Label,NL_fail
79,2004,FC_1033,INDYMAC_1310078_0001125282-04-005953.txt,B2,B,INDX 2004-AR13 B2,0.0,1,2.0528,5.9600,...,45660N7S8,"SUB,CSTR,NAS",WR,A3,NR,,,0\0\0,NC1(failing),0
80,2004,FC_1033,INDYMAC_1310078_0001125282-04-005953.txt,B3,B,INDX 2004-AR13 B3,0.0,1,1.1893,3.7250,...,45660N7T6,"SUB,CSTR,NAS",WR,Baa3,NR,,,0\0\0,NC1(failing),0
81,2004,FC_1033,INDYMAC_1310078_0001125282-04-005953.txt,B4,B,INDX 2004-AR13 B4,0.0,1,0.9481,2.9800,...,45660N7U3,"SUB,NAS",,,NR,,,0\0\0,NC1(failing),0
82,2004,FC_1033,INDYMAC_1310078_0001125282-04-005953.txt,B5,B,INDX 2004-AR13 B5,0.0,1,0.5617,1.7900,...,45660N7V1,"SUB,NAS",,,NR,,,0\0\0,NC1(failing),0
83,2004,FC_1033,INDYMAC_1310078_0001125282-04-005953.txt,B6,B,INDX 2004-AR13 B6,0.0,1,0.3288,1.1906,...,45660N7W9,"SUB,NAS",,,NR,,,0\0\0,NC1(failing),0
156,2004,FC_1090,THORNBURG_1312521_0001125282-04-006454.txt,B5,B,TMST 2004-4 B5,0.0,1,0.5235,1.7000,...,885220GV9,"SUB,CSTR,NAS",WR,B2,NR,,,0\0\0,NC1(failing),0
157,2004,FC_1090,THORNBURG_1312521_0001125282-04-006454.txt,B6,B,TMST 2004-4 B6,0.0,1,0.8386,3.4034,...,885220GX5,"SUB,CSTR,NAS",NR,NR,NR,,,0\0\0,NC1(failing),0
201,2004,FC_1098,MASTR_1305066_0000950117-04-003502.txt,CB5,M,MSSTR 2004-1 CB5,0.0,1,0.0959,0.2940,...,55265WBX0,"SUB,CSTR,NAS",,,NR,,,0\0\0,NC1(failing),0
202,2004,FC_1098,MASTR_1305066_0000950117-04-003502.txt,CB6,M,MSSTR 2004-1 CB6,0.0,1,0.1207,0.5879,...,55265WBY8,"SUB,CSTR,NAS",,,NR,,,0\0\0,NC1(failing),0
217,2004,FC_110,MORGAN_STANLEY_1301834_0000950136-04-002821.htm,B4,B,MSM 2004-7AR B4,0.0,1,0.7051,3.2380,...,61748HCT6,"SUB,CSTR,NAS",,,NR,,,0\0\0,NC1(failing),0
