In [1]:
import pandas as pd
import numpy as np

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

This notebook shows some basics stats about securities labeled as NC1

## Load Data

In [2]:
data = pd.read_csv('trimmed_2005_v2.csv')

In [3]:
#Get all rows with NC1
nc1 = data.loc[data['Label'] == 'NC1(failing)']

## Data Validation

In [4]:
print('we have {} rows of data'.format(data.shape[0]))
print('we have {} rows of NC1'.format(nc1.shape[0]))
print('percentage: {:.2f}%'.format(nc1.shape[0]/data.shape[0]*100))

we have 5015 rows of data
we have 1402 rows of NC1
percentage: 27.96%


In [5]:
#groupby label and check # unique value
print('List of all labels count:')
data.groupby('Label')['CUSIP'].nunique()

List of all labels count:


Label
0                                59
1.4                              99
FE                              374
IOfailing                         1
IOpassMED                        46
IOpassMEY                        97
MED                             965
MEY                            1108
NC1(failing)                   1388
NC2(z>1 nsf)                     36
NC3(z>1, not paid off, nsf)      56
NMEm                            696
NMEs                             34
Name: CUSIP, dtype: int64

In [6]:
print('List of all labels count:')
data.groupby('Label')['Name'].nunique()

List of all labels count:


Label
0                                59
1.4                              99
FE                              374
IOfailing                         1
IOpassMED                        46
IOpassMEY                        97
MED                             965
MEY                            1108
NC1(failing)                   1388
NC2(z>1 nsf)                     36
NC3(z>1, not paid off, nsf)      56
NMEm                            696
NMEs                             34
Name: Name, dtype: int64

In [7]:
valid = data.groupby('Label')['CUSIP'].nunique()['NC1(failing)'] == nc1.shape[0]
print('Does row of nc1 equals # of nc1 in the data? {}'.format(valid))

Does row of nc1 equals # of nc1 in the data? False


## Explore NC1

In [8]:
count = nc1.groupby('Prospectus')['CUSIP'].nunique().count()
print('There are ' + str(count) + ' of prospectus that have NC1')
print("On average {:.2f} NC1 per prospectus that have NC1".format(nc1.shape[0]/count))

There are 263 of prospectus that have NC1
On average 5.33 NC1 per prospectus that have NC1


In [9]:
show_top = 5
print('Top {} of the MTG_TRANCHE_TYP_LONG among NC1 are:'.format(show_top))
nc1.groupby('MTG_TRANCHE_TYP_LONG')['CUSIP'].nunique().sort_values(ascending=False).head(show_top)

Top 5 of the MTG_TRANCHE_TYP_LONG among NC1 are:


MTG_TRANCHE_TYP_LONG
SUB,CSTR,NAS        474
SUB,NAS             133
MEZ,FLT,STEP        130
MEZ,FLT,STEP,IRC     89
SUB,FLT,STEP,IRC     55
Name: CUSIP, dtype: int64

In [10]:
valid = (data.loc[data['Label'] != 'NC1(failing)'].shape[0]) == data.shape[0]-nc1.shape[0]
print("Is [data['Label'] != 'NC1(failing)'] valid? {}".format(valid))

Is [data['Label'] != 'NC1(failing)'] valid? True


In [11]:
print('Top {} of the MTG_TRANCHE_TYP_LONG among non-NC1s are:'.format(show_top))
data.loc[data['Label'] != 'NC1(failing)'].groupby('MTG_TRANCHE_TYP_LONG')['CUSIP'].nunique().sort_values(ascending=False).head(show_top)

Top 5 of the MTG_TRANCHE_TYP_LONG among non-NC1s are:


MTG_TRANCHE_TYP_LONG
MEZ,FLT,STEP,IRC    237
FLT,STEP,IRC        221
FLT,STEP            186
MEZ,FLT,STEP        172
SEQ,AS              158
Name: CUSIP, dtype: int64

In 2004, MTG_TRANCHE_TYP_LONG among NC1 and non-NC1 data set, SUB (Subordinated), CSTR(Collateral Strip Rate), and NAS(Non-Accelerated Security) seems to be the top components. <br>

In 2005, MTG_TRANCHE_TYP_LONG among NC1 and non-NC1 data set, FLT(Floater), STEP(Stepped Rate Bond), and MEZ(Mezzanine) seems to be the top components. <br>

For NC1 in 2004, I couldn't really see big difference between both groups.<br>
For NC1 in 2005, there is really big difference between both groups.<br>

[See here for more info about MTG_TRANCHE_TYP](https://docs.google.com/spreadsheets/d/1MOwPnTr2owqPoJNy73U7UEc3z1RvtzELOCM0ZFxBJU8/edit?usp=sharing)

In [12]:
total = nc1['MTG ORIG AMT'].sum()
print('Sum of MTG ORIG AMT among NC1 = {:.2f}'.format(total))

Sum of MTG ORIG AMT among NC1 = 10417.03


In [13]:
print('Desciption of MTG ORIG AMT among NC1:')
nc1['MTG ORIG AMT'].describe()

Desciption of MTG ORIG AMT among NC1:


count    1402.000000
mean        7.430118
std         8.223513
min         0.000100
25%         2.160000
50%         5.038000
75%        10.025500
max       151.617000
Name: MTG ORIG AMT, dtype: float64

## To do
- Look into Bloomberg (Paydown Infomation?)
- Look why payment just suddenly stops instead of gradually decreased and stoped
- Why is CUSIP duplicated?


## Why is CUSIP duplicated?
The folling shows that there are duplicate CUSIP in the data set

In [14]:
data.groupby('Label')['CUSIP'].count()

Label
0                                59
1.4                             100
FE                              374
IOfailing                         1
IOpassMED                        46
IOpassMEY                        98
MED                             974
MEY                            1126
NC1(failing)                   1402
NC2(z>1 nsf)                     39
NC3(z>1, not paid off, nsf)      61
NMEm                            701
NMEs                             34
Name: CUSIP, dtype: int64

In [15]:
data.groupby('Label')['CUSIP'].nunique()

Label
0                                59
1.4                              99
FE                              374
IOfailing                         1
IOpassMED                        46
IOpassMEY                        97
MED                             965
MEY                            1108
NC1(failing)                   1388
NC2(z>1 nsf)                     36
NC3(z>1, not paid off, nsf)      56
NMEm                            696
NMEs                             34
Name: CUSIP, dtype: int64

All the Falses show two are in different sizes

In [16]:
data.groupby('Label')['CUSIP'].count() == data.groupby('Label')['CUSIP'].nunique()

Label
0                               True
1.4                            False
FE                              True
IOfailing                       True
IOpassMED                       True
IOpassMEY                      False
MED                            False
MEY                            False
NC1(failing)                   False
NC2(z>1 nsf)                   False
NC3(z>1, not paid off, nsf)    False
NMEm                           False
NMEs                            True
Name: CUSIP, dtype: bool

These are what CUSIPs are duplicated.

In [17]:
data.groupby(by=['Label','CUSIP']).size().sort_values(ascending=False)

Label                        CUSIP    
MEY                          05948KXX2    2
                             949920AB8    2
MED                          94983NAV5    2
                             949920AJ1    2
                             949920AN2    2
NC1(failing)                 05948KXU8    2
                             05948KYE3    2
                             05948KYF0    2
                             05948KYG8    2
                             05948KYH6    2
                             05948KYJ2    2
MEY                          94983NAA1    2
                             94983NAB9    2
                             94983NAE3    2
                             94983NAG8    2
                             94983NAK9    2
                             94983NAM5    2
MED                          94983NAU7    2
                             94983NAJ2    2
                             94983NAH6    2
NC3(z>1, not paid off, nsf)  949920AG7    2
MEY                          05948KXQ

In [32]:
data.groupby(by=['Label','CUSIP','Prospectus']).size().sort_values(ascending=False)

CUSIP             PID    
BCC0LJX74         FC_752     1
12669GL58         FC_934     1
12669GK59         FC_934     1
12669GK67         FC_934     1
12669GK75         FC_934     1
12669GK83         FC_934     1
12669GL25         FC_934     1
12669GL33         FC_934     1
12669GL41         FC_934     1
12669GL66         FC_934     1
126694UG1         FC_1723    1
12669GL74         FC_934     1
12669GL82         FC_934     1
12700000          FC_440     1
12700000000000    FC_1537    1
1270000000000000  FC_1725    1
144531BR0         FC_906     1
144531BS8         FC_906     1
126694UH9         FC_1723    1
126694UF3         FC_1723    1
17307GXG8         FC_1659    1
126694TT5         FC_1723    1
126694RZ3         FC_1518    1
126694SA7         FC_1518    1
126694SB5         FC_1518    1
126694SC3         FC_1518    1
126694TQ1         FC_1723    1
126694TR9         FC_1723    1
126694TS7         FC_1723    1
126694TU2         FC_1723    1
                            ..
71085PDB6    