In [1]:
import pandas as pd
import numpy as np
import datetime
import warnings
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')

In [2]:
# Loading the data

data = pd.read_csv('learningSet.csv')

In [3]:
data

Unnamed: 0,ODATEDW,OSOURCE,TCODE,STATE,ZIP,MAILCODE,PVASTATE,DOB,NOEXCH,RECINHSE,...,TARGET_D,HPHONE_D,RFA_2R,RFA_2F,RFA_2A,MDMAUD_R,MDMAUD_F,MDMAUD_A,CLUSTER2,GEOCODE2
0,8901,GRI,0,IL,61081,,,3712,0,,...,0.0,0,L,4,E,X,X,X,39.0,C
1,9401,BOA,1,CA,91326,,,5202,0,,...,0.0,0,L,2,G,X,X,X,1.0,A
2,9001,AMH,1,NC,27017,,,0,0,,...,0.0,1,L,4,E,X,X,X,60.0,C
3,8701,BRY,0,CA,95953,,,2801,0,,...,0.0,1,L,4,E,X,X,X,41.0,C
4,8601,,0,FL,33176,,,2001,0,X,...,0.0,1,L,2,F,X,X,X,26.0,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,9601,ASE,1,AK,99504,,,0,0,,...,0.0,0,L,1,G,X,X,X,12.0,C
95408,9601,DCD,1,TX,77379,,,5001,0,,...,0.0,1,L,1,F,X,X,X,2.0,A
95409,9501,MBC,1,MI,48910,,,3801,0,,...,0.0,1,L,3,E,X,X,X,34.0,B
95410,8601,PRV,0,CA,91320,,,4005,0,X,...,18.0,1,L,4,F,X,X,X,11.0,A


### Lab 1

In [4]:
# Getting only categorical variables

dfcat = data.select_dtypes(object)

In [5]:
dfcat.columns

Index(['OSOURCE', 'STATE', 'ZIP', 'MAILCODE', 'PVASTATE', 'NOEXCH', 'RECINHSE',
       'RECP3', 'RECPGVG', 'RECSWEEP', 'MDMAUD', 'DOMAIN', 'CLUSTER',
       'AGEFLAG', 'HOMEOWNR', 'CHILD03', 'CHILD07', 'CHILD12', 'CHILD18',
       'GENDER', 'DATASRCE', 'SOLP3', 'SOLIH', 'MAJOR', 'GEOCODE', 'COLLECT1',
       'VETERANS', 'BIBLE', 'CATLG', 'HOMEE', 'PETS', 'CDPLAY', 'STEREO',
       'PCOWNERS', 'PHOTO', 'CRAFTS', 'FISHER', 'GARDENIN', 'BOATS', 'WALKER',
       'KIDSTUFF', 'CARDS', 'PLATES', 'LIFESRC', 'PEPSTRFL', 'RFA_2', 'RFA_3',
       'RFA_4', 'RFA_5', 'RFA_6', 'RFA_7', 'RFA_8', 'RFA_9', 'RFA_10',
       'RFA_11', 'RFA_12', 'RFA_13', 'RFA_14', 'RFA_15', 'RFA_16', 'RFA_17',
       'RFA_18', 'RFA_19', 'RFA_20', 'RFA_21', 'RFA_22', 'RFA_23', 'RFA_24',
       'RFA_2R', 'RFA_2A', 'MDMAUD_R', 'MDMAUD_F', 'MDMAUD_A', 'GEOCODE2'],
      dtype='object')

In [6]:
# NaNs in categorical variables 

dfcat.isna().sum()

OSOURCE       0
STATE         0
ZIP           0
MAILCODE      0
PVASTATE      0
           ... 
RFA_2A        0
MDMAUD_R      0
MDMAUD_F      0
MDMAUD_A      0
GEOCODE2    132
Length: 74, dtype: int64

In [7]:
# Getting percentage of NaNs of each categorical variable

nulls_cat = pd.DataFrame(dfcat.isna().sum()*100/len(dfcat), columns=['percentage'])
nulls_cat.sort_values('percentage', ascending = False).head(30)

Unnamed: 0,percentage
GEOCODE2,0.138347
RFA_12,0.0
RFA_10,0.0
RFA_9,0.0
RFA_8,0.0
RFA_7,0.0
RFA_6,0.0
RFA_5,0.0
RFA_4,0.0
RFA_3,0.0


In [8]:
# Creating a list with the columns' names that I will drop

drop_list_cat = []

In [9]:
# Checking columns with more than 50 unique values and adding them to drop_list

for col in dfcat:
    if len(dfcat[col].unique()) >= 50:
        drop_list_cat.append(col)
        
drop_list_cat

['OSOURCE',
 'STATE',
 'ZIP',
 'CLUSTER',
 'RFA_3',
 'RFA_4',
 'RFA_6',
 'RFA_7',
 'RFA_8',
 'RFA_9',
 'RFA_10',
 'RFA_11',
 'RFA_12',
 'RFA_13',
 'RFA_14',
 'RFA_16',
 'RFA_17',
 'RFA_18',
 'RFA_19',
 'RFA_20',
 'RFA_21',
 'RFA_22',
 'RFA_23',
 'RFA_24']

In [10]:
drop_list_cat.remove('STATE')

In [11]:
# Getting categorical columns with over 85% of NaN values. It returns no variables.

cols_to_delete_cat = list(nulls_cat[nulls_cat['percentage'] > 85].index)
cols_to_delete_cat

[]

In [12]:
# Dropping columns from drop_list

dfcat_no_nulls = pd.DataFrame(dfcat)

for col in drop_list_cat:
    del dfcat_no_nulls[col]
  
dfcat_no_nulls

Unnamed: 0,STATE,MAILCODE,PVASTATE,NOEXCH,RECINHSE,RECP3,RECPGVG,RECSWEEP,MDMAUD,DOMAIN,...,PEPSTRFL,RFA_2,RFA_5,RFA_15,RFA_2R,RFA_2A,MDMAUD_R,MDMAUD_F,MDMAUD_A,GEOCODE2
0,IL,,,0,,,,,XXXX,T2,...,X,L4E,S4E,S4E,L,E,X,X,X,C
1,CA,,,0,,,,,XXXX,S1,...,,L2G,A2G,,L,G,X,X,X,A
2,NC,,,0,,,,,XXXX,R2,...,X,L4E,S4E,S4F,L,E,X,X,X,C
3,CA,,,0,,,,,XXXX,R2,...,X,L4E,S4E,S4E,L,E,X,X,X,C
4,FL,,,0,X,X,,,XXXX,S2,...,,L2F,A2F,,L,F,X,X,X,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,AK,,,0,,,,,XXXX,C2,...,,L1G,,,L,G,X,X,X,C
95408,TX,,,0,,,,,XXXX,C1,...,,L1F,,,L,F,X,X,X,A
95409,MI,,,0,,X,,,XXXX,C3,...,X,L3E,S3E,N2E,L,E,X,X,X,B
95410,CA,,,0,X,,,,XXXX,C1,...,X,L4F,S4F,S4F,L,F,X,X,X,A


In [13]:
# Checking values for variable GENDER

dfcat_no_nulls['GENDER'].value_counts()

F    51277
M    39094
      2957
U     1715
J      365
C        2
A        2
Name: GENDER, dtype: int64

In [14]:
# Filling NaNs with F

dfcat_no_nulls['GENDER'] = dfcat_no_nulls['GENDER'].fillna('F')

In [15]:
# NaNs are empty values ''

dfcat_no_nulls['GENDER'].value_counts()

F    51277
M    39094
      2957
U     1715
J      365
C        2
A        2
Name: GENDER, dtype: int64

In [16]:
dfcat['GENDER'].isna().sum()

0

In [17]:
# Function to clean variable GENDER

def clean_gender(x):
    if x.lower() == 'm':
        return 'Male'
    elif x.lower() == 'f':
        return 'Female'
    elif x == ' ':
        return 'Female'
    else:
        return 'Other'

In [18]:
# Applying function on the variable GENDER

dfcat_no_nulls['GENDER'] = dfcat_no_nulls['GENDER'].apply(clean_gender)

In [19]:
# New value counts for GENDER after cleaning it

dfcat_no_nulls['GENDER'].value_counts()

Female    54234
Male      39094
Other      2084
Name: GENDER, dtype: int64

### Lab 2

In [20]:
# Getting only numerical variables

dfnum = data.select_dtypes(np.number)

In [21]:
# NaNs in numerical variables 

dfnum.isna().sum()

ODATEDW         0
TCODE           0
DOB             0
AGE         23665
NUMCHLD     83026
            ...  
TARGET_B        0
TARGET_D        0
HPHONE_D        0
RFA_2F          0
CLUSTER2      132
Length: 407, dtype: int64

In [22]:
# Getting percentage of NaNs of each numerical variable

nulls_num = pd.DataFrame(dfnum.isna().sum()*100/len(dfnum), columns=['percentage'])
nulls_num.sort_values('percentage', ascending = False).head(30)

Unnamed: 0,percentage
RDATE_5,99.990567
RAMNT_5,99.990567
RDATE_3,99.746363
RAMNT_3,99.746363
RDATE_4,99.705488
RAMNT_4,99.705488
RAMNT_6,99.186685
RDATE_6,99.186685
RDATE_15,92.388798
RAMNT_15,92.388798


In [23]:
# Getting numerical columns with over 50% of NaN values. 

cols_to_delete_num = list(nulls_num[nulls_num['percentage'] > 50].index)
cols_to_delete_num

['NUMCHLD',
 'MBCRAFT',
 'MBGARDEN',
 'MBBOOKS',
 'MBCOLECT',
 'MAGFAML',
 'MAGFEM',
 'MAGMALE',
 'PUBGARDN',
 'PUBCULIN',
 'PUBHLTH',
 'PUBDOITY',
 'PUBNEWFN',
 'PUBPHOTO',
 'PUBOPP',
 'ADATE_15',
 'ADATE_20',
 'ADATE_23',
 'RDATE_3',
 'RDATE_4',
 'RDATE_5',
 'RDATE_6',
 'RDATE_7',
 'RDATE_8',
 'RDATE_9',
 'RDATE_10',
 'RDATE_11',
 'RDATE_12',
 'RDATE_13',
 'RDATE_14',
 'RDATE_15',
 'RDATE_16',
 'RDATE_17',
 'RDATE_18',
 'RDATE_19',
 'RDATE_20',
 'RDATE_21',
 'RDATE_22',
 'RDATE_23',
 'RDATE_24',
 'RAMNT_3',
 'RAMNT_4',
 'RAMNT_5',
 'RAMNT_6',
 'RAMNT_7',
 'RAMNT_8',
 'RAMNT_9',
 'RAMNT_10',
 'RAMNT_11',
 'RAMNT_12',
 'RAMNT_13',
 'RAMNT_14',
 'RAMNT_15',
 'RAMNT_16',
 'RAMNT_17',
 'RAMNT_18',
 'RAMNT_19',
 'RAMNT_20',
 'RAMNT_21',
 'RAMNT_22',
 'RAMNT_23',
 'RAMNT_24']

In [24]:
# Dropping columns with over 50% of NaNs

dfnum = dfnum.drop(columns=cols_to_delete_num)
dfnum

Unnamed: 0,ODATEDW,TCODE,DOB,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,...,FISTDATE,NEXTDATE,TIMELAG,AVGGIFT,CONTROLN,TARGET_B,TARGET_D,HPHONE_D,RFA_2F,CLUSTER2
0,8901,0,3712,60.0,,,0,0,39,34,...,8911,9003.0,4.0,7.741935,95515,0,0.0,0,4,39.0
1,9401,1,5202,46.0,6.0,9.0,16,0,15,55,...,9310,9504.0,18.0,15.666667,148535,0,0.0,0,2,1.0
2,9001,1,0,,3.0,1.0,2,0,20,29,...,9001,9101.0,12.0,7.481481,15078,0,0.0,1,4,60.0
3,8701,0,2801,70.0,1.0,4.0,2,0,23,14,...,8702,8711.0,9.0,6.812500,172556,0,0.0,1,4,41.0
4,8601,0,2001,78.0,3.0,2.0,60,1,28,9,...,7903,8005.0,14.0,6.864865,7112,0,0.0,1,2,26.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,9601,1,0,,,,0,14,36,47,...,9602,,,25.000000,184568,0,0.0,0,1,12.0
95408,9601,1,5001,48.0,7.0,9.0,1,0,31,43,...,9603,,,20.000000,122706,0,0.0,1,1,2.0
95409,9501,1,3801,60.0,,,0,0,18,46,...,9410,9501.0,3.0,8.285714,189641,0,0.0,1,3,34.0
95410,8601,0,4005,58.0,7.0,,0,0,28,35,...,8612,8704.0,4.0,12.146341,4693,1,18.0,1,4,11.0


In [25]:
# Taking care of GEOCODE2

dfcat_no_nulls['GEOCODE2'].value_counts()

A    34484
B    28505
D    16580
C    15524
       187
Name: GEOCODE2, dtype: int64

In [26]:
dfcat_no_nulls['GEOCODE2'].dtypes

dtype('O')

In [27]:
# Blank means no code has been assigned or there was no match, so I replace the blanks with 'No code'

dfcat_no_nulls['GEOCODE2'] = dfcat_no_nulls['GEOCODE2'].str.replace(' ', 'No code')

In [28]:
dfcat_no_nulls['GEOCODE2'].isna().sum()

132

In [29]:
# Filling NaNs with 'No code'

dfcat_no_nulls['GEOCODE2'] = dfcat_no_nulls['GEOCODE2'].fillna('No code')

In [30]:
dfcat_no_nulls['GEOCODE2'].isna().sum()

0

In [31]:
# Taking care of WEALTH1

dfnum['WEALTH1'].value_counts()

9.0    7585
8.0    6793
7.0    6198
6.0    5825
5.0    5280
4.0    4810
3.0    4237
2.0    4085
1.0    3454
0.0    2413
Name: WEALTH1, dtype: int64

In [32]:
dfnum['WEALTH1'].dtypes

dtype('float64')

In [33]:
# It seems there are a lot of empty values

dfnum['WEALTH1'].isna().sum()

44732

In [34]:
# WEALTH1 should be treated as object since it represents a wealth rating from 0 to 9

dfnum['WEALTH1'] = dfnum['WEALTH1'].astype(str)
dfnum['WEALTH1'].dtypes

dtype('O')

In [35]:
# The NaNs indicate no rating 

dfnum['WEALTH1'].value_counts()

nan    44732
9.0     7585
8.0     6793
7.0     6198
6.0     5825
5.0     5280
4.0     4810
3.0     4237
2.0     4085
1.0     3454
0.0     2413
Name: WEALTH1, dtype: int64

In [36]:
# Replacing NaNs with 'No rating'

dfnum['WEALTH1'] = dfnum['WEALTH1'].str.replace('nan', 'No rating')

In [37]:
dfnum['WEALTH1'].value_counts()

No rating    44732
9.0           7585
8.0           6793
7.0           6198
6.0           5825
5.0           5280
4.0           4810
3.0           4237
2.0           4085
1.0           3454
0.0           2413
Name: WEALTH1, dtype: int64

In [38]:
# Taking care of ADI

dfnum['ADI'].value_counts()

13.0     7296
51.0     4622
65.0     3765
57.0     2836
105.0    2617
         ... 
651.0       1
103.0       1
601.0       1
161.0       1
147.0       1
Name: ADI, Length: 204, dtype: int64

In [39]:
dfnum['ADI'].isna().sum()

132

In [40]:
dfnum['ADI'].mean(), dfnum['ADI'].median()

(187.35640218303945, 175.0)

In [41]:
# Replacing NaNs with the median

dfnum['ADI'] = dfnum['ADI'].fillna(dfnum['ADI'].median())

In [42]:
dfnum['ADI'].isna().sum()

0

In [43]:
# Taking care of DMA

dfnum['DMA'].value_counts()

803.0    7296
602.0    4632
807.0    3765
505.0    2839
819.0    2588
         ... 
569.0       1
554.0       1
584.0       1
552.0       1
516.0       1
Name: DMA, Length: 206, dtype: int64

In [44]:
dfnum['DMA'].isna().sum()

132

In [45]:
dfnum['DMA'].mean(), dfnum['DMA'].median()

(664.0040722082284, 635.0)

In [46]:
# Replacing NaNs with the median

dfnum['DMA'] = dfnum['DMA'].fillna(dfnum['DMA'].median())

In [47]:
dfnum['DMA'].isna().sum()

0

In [48]:
# Taking care of MSA

dfnum['MSA'].value_counts()

0.0       21333
4480.0     4606
1600.0     4059
2160.0     2586
520.0      1685
          ...  
9140.0        1
3200.0        1
9280.0        1
743.0         1
8480.0        1
Name: MSA, Length: 298, dtype: int64

In [49]:
dfnum['MSA'].isna().sum()

132

In [50]:
dfnum['MSA'].mean(), dfnum['MSA'].median()

(3527.744101595298, 3350.0)

In [51]:
# Replacing NaNs with the median

dfnum['MSA'] = dfnum['MSA'].fillna(dfnum['MSA'].median())

In [52]:
dfnum['MSA'].isna().sum()

0

In [53]:
#Taking care of AGE

dfnum['AGE'].value_counts()

50.0    1930
76.0    1885
72.0    1813
68.0    1809
74.0    1801
        ... 
8.0        1
9.0        1
10.0       1
6.0        1
15.0       1
Name: AGE, Length: 96, dtype: int64

In [54]:
# Filling AGE NaNs with median

dfnum['AGE'] = dfnum['AGE'].fillna(dfnum['AGE'].median())

In [55]:
dfnum['AGE'].isna().sum()

0

In [56]:
# Taking care of INCOME

dfnum['INCOME'].value_counts()

5.0    15451
2.0    13114
4.0    12732
1.0     9022
3.0     8558
6.0     7778
7.0     7471
Name: INCOME, dtype: int64

In [57]:
dfnum['INCOME'] = dfnum['INCOME'].astype(str)
dfnum['INCOME'].dtypes

dtype('O')

In [58]:
dfnum['INCOME'] = dfnum['INCOME'].str.replace('nan', 'No data')
dfnum['INCOME'].value_counts()

No data    21286
5.0        15451
2.0        13114
4.0        12732
1.0         9022
3.0         8558
6.0         7778
7.0         7471
Name: INCOME, dtype: int64

In [59]:
# Taking care of WEALTH2

dfnum['WEALTH2'].value_counts()

9.0    6523
8.0    5975
7.0    5684
6.0    5497
5.0    5351
4.0    5074
3.0    5046
2.0    4971
1.0    4191
0.0    3277
Name: WEALTH2, dtype: int64

In [60]:
# WEALTH2 also should be treated as object since it represents a wealth rating from 0 to 9

dfnum['WEALTH2'] = dfnum['WEALTH2'].astype(str)
dfnum['WEALTH2'].dtypes

dtype('O')

In [61]:
# Replacing NaNs with 'No rating'

dfnum['WEALTH2'] = dfnum['WEALTH2'].str.replace('nan', 'No rating')

In [62]:
dfnum['WEALTH2'].value_counts()

No rating    43823
9.0           6523
8.0           5975
7.0           5684
6.0           5497
5.0           5351
4.0           5074
3.0           5046
2.0           4971
1.0           4191
0.0           3277
Name: WEALTH2, dtype: int64

In [63]:
# Dropping all ADATE_ since I will use MAXADATE, which informs the last promotion responded by the donor:

cols_to_delete = [col for col in dfnum.columns if 'ADATE_' in col]
cols_to_delete

['ADATE_2',
 'ADATE_3',
 'ADATE_4',
 'ADATE_5',
 'ADATE_6',
 'ADATE_7',
 'ADATE_8',
 'ADATE_9',
 'ADATE_10',
 'ADATE_11',
 'ADATE_12',
 'ADATE_13',
 'ADATE_14',
 'ADATE_16',
 'ADATE_17',
 'ADATE_18',
 'ADATE_19',
 'ADATE_21',
 'ADATE_22',
 'ADATE_24']

In [64]:
dfnum = dfnum.drop(columns=cols_to_delete)

In [65]:
dfnum.isna().sum().sort_values(ascending = False).head(30)

TIMELAG     9973
NEXTDATE    9973
CLUSTER2     132
EIC8           0
EIC16          0
EIC15          0
EIC14          0
EIC13          0
EIC12          0
EIC11          0
EIC10          0
EIC9           0
EIC7           0
OEDC2          0
EIC6           0
EIC5           0
EIC4           0
EIC3           0
EIC2           0
EIC1           0
OCC13          0
OEDC1          0
OEDC3          0
OCC11          0
EC6            0
SEC5           0
SEC4           0
SEC3           0
SEC2           0
SEC1           0
dtype: int64

In [66]:
# Dropping TIMELAG and NEXTDATE

dfnum = dfnum.drop(columns=['TIMELAG','NEXTDATE'])

In [67]:
# Dropping TARGET_B since it has high correlation with TARGET_D, which we will use as our target

dfnum = dfnum.drop(columns='TARGET_B')

In [68]:
# Joining categorical and numerical data

data_final = dfnum.join(dfcat_no_nulls)
data_final

Unnamed: 0,ODATEDW,TCODE,DOB,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,...,PEPSTRFL,RFA_2,RFA_5,RFA_15,RFA_2R,RFA_2A,MDMAUD_R,MDMAUD_F,MDMAUD_A,GEOCODE2
0,8901,0,3712,60.0,No data,No rating,0,0,39,34,...,X,L4E,S4E,S4E,L,E,X,X,X,C
1,9401,1,5202,46.0,6.0,9.0,16,0,15,55,...,,L2G,A2G,,L,G,X,X,X,A
2,9001,1,0,62.0,3.0,1.0,2,0,20,29,...,X,L4E,S4E,S4F,L,E,X,X,X,C
3,8701,0,2801,70.0,1.0,4.0,2,0,23,14,...,X,L4E,S4E,S4E,L,E,X,X,X,C
4,8601,0,2001,78.0,3.0,2.0,60,1,28,9,...,,L2F,A2F,,L,F,X,X,X,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,9601,1,0,62.0,No data,No rating,0,14,36,47,...,,L1G,,,L,G,X,X,X,C
95408,9601,1,5001,48.0,7.0,9.0,1,0,31,43,...,,L1F,,,L,F,X,X,X,A
95409,9501,1,3801,60.0,No data,No rating,0,0,18,46,...,X,L3E,S3E,N2E,L,E,X,X,X,B
95410,8601,0,4005,58.0,7.0,No rating,0,0,28,35,...,X,L4F,S4F,S4F,L,F,X,X,X,A


In [69]:
data_final.isna().sum().sort_values(ascending = False).head(30)

CLUSTER2    132
ODATEDW       0
AFC4          0
ANC3          0
ANC2          0
ANC1          0
VC4           0
VC3           0
VC2           0
VC1           0
AFC6          0
AFC5          0
AFC3          0
EC6           0
AFC2          0
AFC1          0
SEC5          0
SEC4          0
SEC3          0
SEC2          0
SEC1          0
EC8           0
ANC4          0
ANC5          0
ANC6          0
ANC7          0
HC2           0
HC1           0
VOC3          0
VOC2          0
dtype: int64

In [70]:
# CLUSTER2 has few NaNs so I am just gonna drop the rows

data_final['CLUSTER2'] = data_final['CLUSTER2'].dropna()

In [71]:
data_final

Unnamed: 0,ODATEDW,TCODE,DOB,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,...,PEPSTRFL,RFA_2,RFA_5,RFA_15,RFA_2R,RFA_2A,MDMAUD_R,MDMAUD_F,MDMAUD_A,GEOCODE2
0,8901,0,3712,60.0,No data,No rating,0,0,39,34,...,X,L4E,S4E,S4E,L,E,X,X,X,C
1,9401,1,5202,46.0,6.0,9.0,16,0,15,55,...,,L2G,A2G,,L,G,X,X,X,A
2,9001,1,0,62.0,3.0,1.0,2,0,20,29,...,X,L4E,S4E,S4F,L,E,X,X,X,C
3,8701,0,2801,70.0,1.0,4.0,2,0,23,14,...,X,L4E,S4E,S4E,L,E,X,X,X,C
4,8601,0,2001,78.0,3.0,2.0,60,1,28,9,...,,L2F,A2F,,L,F,X,X,X,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,9601,1,0,62.0,No data,No rating,0,14,36,47,...,,L1G,,,L,G,X,X,X,C
95408,9601,1,5001,48.0,7.0,9.0,1,0,31,43,...,,L1F,,,L,F,X,X,X,A
95409,9501,1,3801,60.0,No data,No rating,0,0,18,46,...,X,L3E,S3E,N2E,L,E,X,X,X,B
95410,8601,0,4005,58.0,7.0,No rating,0,0,28,35,...,X,L4F,S4F,S4F,L,F,X,X,X,A
