# Exploratory Data Analysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
%matplotlib inline

In [2]:
data = pd.read_csv('../data/opportunities.csv', 
                   parse_dates=['Create.Day'], 
                   encoding='latin1', 
                   index_col='Opportunity.Number')

Drop any columns we're not going to be interested in:

-  Opportunity.Number 
-  Acquisition.Day 
-  Customer.Number 
-  End.Day 
-  Order.Number 
-  Order.End.Day 
-  Order.Start.Day 
-  Order.Type 
-  Start.Day 
-  Probability.x
-  Probability.y
-  Order.Entry.Weight.CHF
-  Order.Entry.Weight.EUR

All order related columns are a very clear of potential leakage. Once there's an order, than the opportunity is most likely already won or at least very close to being won.

As far as date fiels, our sales were are able to justify only the create day as the day when the opportunity is inserted into the CRM, so we'll leave the others for now.

Probabilities are just the very last one at the moment the opp was closed, and amounts weighted are just the product of the probability with the raw amount.

In [3]:
drop_columns = ['Acquisition.Day', 'Customer.Number', 'End.Day', 'Order.Number', 'Order.End.Day', 
                'Order.Start.Day', 'Order.Type', 'Start.Day', 'Probability.x', 'Probability.y', 'Order.Entry.Weight.CHF',
               'Order.Entry.Weight.EUR']
data.drop(drop_columns, axis=1, inplace=True)

data['Order.Entry.CHF'] = data['Order.Entry.CHF'].map(lambda x: float(re.sub(',','',x)))
data['Order.Entry.EUR'] = data['Order.Entry.EUR'].map(lambda x: float(re.sub(',','',x)))
data['Year'] = data['Create.Day'].map(lambda x: x.year)

In [4]:
data.drop(['Quantity'], axis=1, inplace=True)

Drop all samples were Customer is NULL

In [5]:
data = data[data.Customer.notnull()]

In [6]:
data.shape

(29938, 21)

## Response Variable

In [7]:
data.Status.value_counts(dropna=False).sort_index()

10 Died              6486
11 On hold            116
3 Qualification       596
4 Offer in work       121
5 Offered             385
6 In revision          47
7 In negotiation       66
8 Won               18511
9 Lost               3610
Name: Status, dtype: int64

In [8]:
data['Status.Category'].value_counts(dropna=False).sort_index()

Closed     10096
On Hold      116
Open        1215
Won        18511
Name: Status.Category, dtype: int64

In [9]:
data['Target'] = data['Status.Category'].map(lambda x: 1 if x=='Won' else 0)
np.mean(data.Target)

0.6183111764312913

Almost 62% for majority class.

Classes are pretty unbalanced and also it is realistical to think that more opportunities are actually worked on, that they are won, so even a 50% proportion for the majority class would have been at the very least suspect.

For the moment I have no idea if I want to handle this and how.

## Customer features engineering

### First contact

Support column to facilitate aggregations and counts.

In [10]:
data['one'] = 1

Before we can apply any logic that has to do with sorting with respect to the customer dimension, we should make sure records are alreay sorted

In [11]:
data = data.reset_index()

In [12]:
data = data.sort_values(by=['Customer', 'Create.Day', 'Opportunity.Number'])

In [13]:
data = data.set_index('Opportunity.Number')

In [14]:
data['Customer.Contacts.So.Far'] = data.groupby('Customer').cumsum()['one']

In [15]:
data['Customer.First.Contact'] = data['Customer.Contacts.So.Far'].map(lambda x: 1 if x==1 else 0)

In [16]:
data[['Customer', 'Create.Day', 'Customer.Contacts.So.Far', 
      'Customer.First.Contact']].sort_values(by = ['Customer', 'Create.Day']).head(20)

Unnamed: 0_level_0,Customer,Create.Day,Customer.Contacts.So.Far,Customer.First.Contact
Opportunity.Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
19281,1&1 De-Mail GmbH [127995],2013-11-15,1,1
20596,1&1 De-Mail GmbH [127995],2014-03-17,2,0
23294,1&1 De-Mail GmbH [127995],2014-12-09,3,0
24232,1&1 De-Mail GmbH [127995],2015-03-10,4,0
28501,1&1 De-Mail GmbH [127995],2016-04-01,5,0
783,1&1 Internet SE [4497],2009-03-01,1,1
784,1&1 Internet SE [4497],2009-03-01,2,0
2892,1&1 Internet SE [4497],2009-03-06,3,0
2893,1&1 Internet SE [4497],2009-03-06,4,0
3675,1&1 Internet SE [4497],2009-05-28,5,0


### How many so far

Customer.Contacts.So.Far

### Convertion rate so far

In [17]:
data['Customer.Won.So.Far'] = data.groupby('Customer').cumsum()['Target']

In [18]:
data['Customer.ConvRatio.So.Far'] = data['Customer.Won.So.Far'].astype('float') / data['Customer.Contacts.So.Far']

In [19]:
data[['Customer', 'Create.Day', 'Target', 'Customer.Contacts.So.Far', 'Customer.Won.So.Far', 
      'Customer.ConvRatio.So.Far']].sort_values(by=['Customer','Create.Day']).head(20)

Unnamed: 0_level_0,Customer,Create.Day,Target,Customer.Contacts.So.Far,Customer.Won.So.Far,Customer.ConvRatio.So.Far
Opportunity.Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
19281,1&1 De-Mail GmbH [127995],2013-11-15,1,1,1,1.0
20596,1&1 De-Mail GmbH [127995],2014-03-17,1,2,2,1.0
23294,1&1 De-Mail GmbH [127995],2014-12-09,1,3,3,1.0
24232,1&1 De-Mail GmbH [127995],2015-03-10,1,4,4,1.0
28501,1&1 De-Mail GmbH [127995],2016-04-01,1,5,5,1.0
783,1&1 Internet SE [4497],2009-03-01,1,1,1,1.0
784,1&1 Internet SE [4497],2009-03-01,1,2,2,1.0
2892,1&1 Internet SE [4497],2009-03-06,1,3,3,1.0
2893,1&1 Internet SE [4497],2009-03-06,1,4,4,1.0
3675,1&1 Internet SE [4497],2009-05-28,1,5,5,1.0


### Convertion rate last 5

In [20]:
cvrl5 = data.groupby('Customer').rolling(window=5, on='Create.Day', min_periods=1).sum()['Target']

In [21]:
cvrl5 = cvrl5.reset_index()

In [22]:
cvrl5.drop('Customer', axis=1, inplace=True)

In [23]:
cvrl5.rename(columns={'Target': 'Customer.Won.Last5', 'level_1': 'Opportunity.Number'}, inplace=True)

In [24]:
data = data.reset_index()

In [25]:
data = data.merge(cvrl5, on='Opportunity.Number', how='left')

In [26]:
data[['Customer', 'Create.Day', 'Opportunity.Number', 
      'Target', 'Customer.Won.Last5']].sort_values(by=['Customer','Create.Day', 'Opportunity.Number']).head(20)

Unnamed: 0,Customer,Create.Day,Opportunity.Number,Target,Customer.Won.Last5
0,1&1 De-Mail GmbH [127995],2013-11-15,19281,1,1.0
1,1&1 De-Mail GmbH [127995],2014-03-17,20596,1,2.0
2,1&1 De-Mail GmbH [127995],2014-12-09,23294,1,3.0
3,1&1 De-Mail GmbH [127995],2015-03-10,24232,1,4.0
4,1&1 De-Mail GmbH [127995],2016-04-01,28501,1,5.0
5,1&1 Internet SE [4497],2009-03-01,783,1,1.0
6,1&1 Internet SE [4497],2009-03-01,784,1,2.0
7,1&1 Internet SE [4497],2009-03-06,2892,1,3.0
8,1&1 Internet SE [4497],2009-03-06,2893,1,4.0
9,1&1 Internet SE [4497],2009-05-28,3675,1,5.0


In [27]:
data['Customer.ConvRatio.Last5'] = data['Customer.Won.Last5'] / data['Customer.Contacts.So.Far'].map(lambda x: min(x, 5))

In [28]:
data[['Customer', 'Opportunity.Number', 'Create.Day', 'Target', 'Customer.Won.Last5', 
      'Customer.ConvRatio.Last5']].sort_values(by=['Customer','Create.Day']).head(20)

Unnamed: 0,Customer,Opportunity.Number,Create.Day,Target,Customer.Won.Last5,Customer.ConvRatio.Last5
0,1&1 De-Mail GmbH [127995],19281,2013-11-15,1,1.0,1.0
1,1&1 De-Mail GmbH [127995],20596,2014-03-17,1,2.0,1.0
2,1&1 De-Mail GmbH [127995],23294,2014-12-09,1,3.0,1.0
3,1&1 De-Mail GmbH [127995],24232,2015-03-10,1,4.0,1.0
4,1&1 De-Mail GmbH [127995],28501,2016-04-01,1,5.0,1.0
5,1&1 Internet SE [4497],783,2009-03-01,1,1.0,1.0
6,1&1 Internet SE [4497],784,2009-03-01,1,2.0,1.0
7,1&1 Internet SE [4497],2892,2009-03-06,1,3.0,1.0
8,1&1 Internet SE [4497],2893,2009-03-06,1,4.0,1.0
9,1&1 Internet SE [4497],3675,2009-05-28,1,5.0,1.0


### How many in last year, convertion ratio last year?

In [29]:
import datetime

In [30]:
data = data.set_index('Opportunity.Number')
cvrl5 = data.groupby('Customer', ).rolling(window=datetime.timedelta(days=365), 
                                           on='Create.Day', min_periods=1).sum()[['Target', 'one']]
cvrl5 = cvrl5.reset_index()
cvrl5.drop('Customer', axis=1, inplace=True)
cvrl5.rename(columns={'Target': 'Customer.Won.LastYear', 'one': 'Customer.Contacts.LastYear', 
                     'level_1': 'Opportunity.Number'}, inplace=True)

In [31]:
data = data.reset_index()
data = data.merge(cvrl5, on='Opportunity.Number', how='left')
data.set_index('Opportunity.Number', inplace=True)

In [32]:
data[['Customer', 'Create.Day', 'Target', 'Customer.Won.LastYear']].sort_values(by=['Customer','Create.Day']).head(20)

Unnamed: 0_level_0,Customer,Create.Day,Target,Customer.Won.LastYear
Opportunity.Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
19281,1&1 De-Mail GmbH [127995],2013-11-15,1,1.0
20596,1&1 De-Mail GmbH [127995],2014-03-17,1,2.0
23294,1&1 De-Mail GmbH [127995],2014-12-09,1,2.0
24232,1&1 De-Mail GmbH [127995],2015-03-10,1,3.0
28501,1&1 De-Mail GmbH [127995],2016-04-01,1,1.0
783,1&1 Internet SE [4497],2009-03-01,1,1.0
784,1&1 Internet SE [4497],2009-03-01,1,2.0
2892,1&1 Internet SE [4497],2009-03-06,1,3.0
2893,1&1 Internet SE [4497],2009-03-06,1,4.0
3675,1&1 Internet SE [4497],2009-05-28,1,5.0


In [33]:
data['Customer.ConvRatio.LastYear'] = data['Customer.Won.LastYear'] / data['Customer.Contacts.LastYear']

In [34]:
data[['Customer', 'Create.Day', 'Target', 'Customer.Won.LastYear', 
      'Customer.Contacts.LastYear', 'Customer.ConvRatio.LastYear']].sort_values(by=['Customer','Create.Day']).head(30)

Unnamed: 0_level_0,Customer,Create.Day,Target,Customer.Won.LastYear,Customer.Contacts.LastYear,Customer.ConvRatio.LastYear
Opportunity.Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
19281,1&1 De-Mail GmbH [127995],2013-11-15,1,1.0,1.0,1.0
20596,1&1 De-Mail GmbH [127995],2014-03-17,1,2.0,2.0,1.0
23294,1&1 De-Mail GmbH [127995],2014-12-09,1,2.0,2.0,1.0
24232,1&1 De-Mail GmbH [127995],2015-03-10,1,3.0,3.0,1.0
28501,1&1 De-Mail GmbH [127995],2016-04-01,1,1.0,1.0,1.0
783,1&1 Internet SE [4497],2009-03-01,1,1.0,1.0,1.0
784,1&1 Internet SE [4497],2009-03-01,1,2.0,2.0,1.0
2892,1&1 Internet SE [4497],2009-03-06,1,3.0,3.0,1.0
2893,1&1 Internet SE [4497],2009-03-06,1,4.0,4.0,1.0
3675,1&1 Internet SE [4497],2009-05-28,1,5.0,5.0,1.0


### Standard deviation of current Order Entry CHF to average value for a given customer

In [35]:
data['Order.Entry.CHF.Won'] = data['Order.Entry.CHF'] * data['Target']

In [36]:
data['Customer.Avg.Order.Entry.CHF.So.Far'] = data.groupby('Customer').cumsum()['Order.Entry.CHF.Won'] / data['Customer.Won.So.Far']

In [37]:
data[['Customer', 'Create.Day', 'Target', 'Order.Entry.CHF', 'Order.Entry.CHF.Won',
      'Customer.Contacts.So.Far', 'Customer.Avg.Order.Entry.CHF.So.Far']].sort_values(by=['Customer', 'Create.Day']).head(20)

Unnamed: 0_level_0,Customer,Create.Day,Target,Order.Entry.CHF,Order.Entry.CHF.Won,Customer.Contacts.So.Far,Customer.Avg.Order.Entry.CHF.So.Far
Opportunity.Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
19281,1&1 De-Mail GmbH [127995],2013-11-15,1,149809.0,149809.0,1,149809.0
20596,1&1 De-Mail GmbH [127995],2014-03-17,1,146873.0,146873.0,2,148341.0
23294,1&1 De-Mail GmbH [127995],2014-12-09,1,31142.0,31142.0,3,109274.666667
24232,1&1 De-Mail GmbH [127995],2015-03-10,1,133415.0,133415.0,4,115309.75
28501,1&1 De-Mail GmbH [127995],2016-04-01,1,166762.0,166762.0,5,125600.2
783,1&1 Internet SE [4497],2009-03-01,1,30595.0,30595.0,1,30595.0
784,1&1 Internet SE [4497],2009-03-01,1,4565.0,4565.0,2,17580.0
2892,1&1 Internet SE [4497],2009-03-06,1,10350.0,10350.0,3,15170.0
2893,1&1 Internet SE [4497],2009-03-06,1,16037.0,16037.0,4,15386.75
3675,1&1 Internet SE [4497],2009-05-28,1,11654.0,11654.0,5,14640.2


In [38]:
data['Customer.Avg.Order.Entry.CHF.So.Far.Lag1'] = data.groupby('Customer')['Customer.Avg.Order.Entry.CHF.So.Far'].shift(1)

In [39]:
data[['Customer', 'Create.Day', 'Customer.Avg.Order.Entry.CHF.So.Far', 
      'Customer.Avg.Order.Entry.CHF.So.Far.Lag1']].sort_values(by=['Customer', 'Create.Day']).head(20)

Unnamed: 0_level_0,Customer,Create.Day,Customer.Avg.Order.Entry.CHF.So.Far,Customer.Avg.Order.Entry.CHF.So.Far.Lag1
Opportunity.Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
19281,1&1 De-Mail GmbH [127995],2013-11-15,149809.0,
20596,1&1 De-Mail GmbH [127995],2014-03-17,148341.0,149809.0
23294,1&1 De-Mail GmbH [127995],2014-12-09,109274.666667,148341.0
24232,1&1 De-Mail GmbH [127995],2015-03-10,115309.75,109274.666667
28501,1&1 De-Mail GmbH [127995],2016-04-01,125600.2,115309.75
783,1&1 Internet SE [4497],2009-03-01,30595.0,
784,1&1 Internet SE [4497],2009-03-01,17580.0,30595.0
2892,1&1 Internet SE [4497],2009-03-06,15170.0,17580.0
2893,1&1 Internet SE [4497],2009-03-06,15386.75,15170.0
3675,1&1 Internet SE [4497],2009-05-28,14640.2,15386.75


In [40]:
data['Customer.Order.Entry.CHF.std2avg'] = (data['Order.Entry.CHF'] - data['Customer.Avg.Order.Entry.CHF.So.Far.Lag1']) /  \
                    data['Customer.Avg.Order.Entry.CHF.So.Far.Lag1'].map(lambda x: np.nan if x==0 else x)

In [41]:
data[['Customer', 'Create.Day', 'Target', 'Order.Entry.CHF',
      'Customer.Avg.Order.Entry.CHF.So.Far.Lag1', 'Customer.Order.Entry.CHF.std2avg']].sort_values(by=['Customer', 'Create.Day']).head(20)

Unnamed: 0_level_0,Customer,Create.Day,Target,Order.Entry.CHF,Customer.Avg.Order.Entry.CHF.So.Far.Lag1,Customer.Order.Entry.CHF.std2avg
Opportunity.Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
19281,1&1 De-Mail GmbH [127995],2013-11-15,1,149809.0,,
20596,1&1 De-Mail GmbH [127995],2014-03-17,1,146873.0,149809.0,-0.019598
23294,1&1 De-Mail GmbH [127995],2014-12-09,1,31142.0,148341.0,-0.790065
24232,1&1 De-Mail GmbH [127995],2015-03-10,1,133415.0,109274.666667,0.220914
28501,1&1 De-Mail GmbH [127995],2016-04-01,1,166762.0,115309.75,0.446209
783,1&1 Internet SE [4497],2009-03-01,1,30595.0,,
784,1&1 Internet SE [4497],2009-03-01,1,4565.0,30595.0,-0.850793
2892,1&1 Internet SE [4497],2009-03-06,1,10350.0,17580.0,-0.411263
2893,1&1 Internet SE [4497],2009-03-06,1,16037.0,15170.0,0.057152
3675,1&1 Internet SE [4497],2009-05-28,1,11654.0,15386.75,-0.242595


In [42]:
data['Customer.Order.Entry.CHF.std2avg'] = data['Customer.Order.Entry.CHF.std2avg'].fillna(0)

In [43]:
data.groupby('Target').mean()['Customer.Order.Entry.CHF.std2avg']

Target
0    15.427169
1     9.534723
Name: Customer.Order.Entry.CHF.std2avg, dtype: float64

### Last outcome

In [44]:
data['Customer.Last.Target'] = data.groupby('Customer')['Target'].shift(1)

### Time (days) since last contact

In [45]:
data['Customer.Days.Since.LastContact'] = (data['Create.Day'] - data.groupby('Customer')['Create.Day'].shift(1)).dt.days

### Time (days) since last win

In [46]:
idxWon = (data.Target == 1)
data.loc[idxWon, 'Customer.Last.Day.Won'] = data.loc[idxWon, 'Create.Day']
data['Customer.Last.Day.Won'] = data.groupby('Customer')['Customer.Last.Day.Won'].fillna(method='ffill')
data['Customer.Last.Day.Won'] = data.groupby('Customer')['Customer.Last.Day.Won'].shift(1)
data['Customer.Days.Since.LastWin'] = (data['Create.Day'] - data['Customer.Last.Day.Won']).dt.days

### Time (days) since last loss

In [47]:
idxLost = (data.Target == 0)
data.loc[idxLost, 'Customer.Last.Day.Loss'] = data.loc[idxLost, 'Create.Day']
data['Customer.Last.Day.Loss'] = data.groupby('Customer')['Customer.Last.Day.Loss'].fillna(method='ffill')
data['Customer.Last.Day.Loss'] = data.groupby('Customer')['Customer.Last.Day.Loss'].shift(1)
data['Customer.Days.Since.LastLoss'] = (data['Create.Day'] - data['Customer.Last.Day.Loss']).dt.days

### Amount CHF last win

In [48]:
idxWon = (data.Target == 1)
data.loc[idxWon, 'Customer.CHF.Last.Won'] = data.loc[idxWon, 'Order.Entry.CHF']
data['Customer.CHF.Last.Won'] = data.groupby('Customer')['Customer.CHF.Last.Won'].fillna(method='ffill')
data['Customer.CHF.Last.Won'] = data.groupby('Customer')['Customer.CHF.Last.Won'].shift(1)

### Amount  CHF last loss

In [49]:
idxLost = (data.Target == 0)
data.loc[idxLost, 'Customer.CHF.Last.Loss'] = data.loc[idxLost, 'Order.Entry.CHF']
data['Customer.CHF.Last.Loss'] = data.groupby('Customer')['Customer.CHF.Last.Loss'].fillna(method='ffill')
data['Customer.CHF.Last.Loss'] = data.groupby('Customer')['Customer.CHF.Last.Loss'].shift(1)

In [50]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 29938 entries, 19281 to 6492
Data columns (total 44 columns):
Create.Day                                  29938 non-null datetime64[ns]
Country                                     29385 non-null object
Location                                    29385 non-null object
Solution                                    29385 non-null object
Unit                                        29385 non-null object
Customer                                    29938 non-null object
Customer.Class                              29656 non-null object
Customer.Contact                            29576 non-null object
Customer.Industry                           29907 non-null object
Customer.Company                            29938 non-null object
Customer.Division                           29938 non-null object
Customer.Location                           29938 non-null object
Customer.Unit                               29938 non-null object
Key.Account.Manager   

In [51]:
data.to_csv('../data/test.csv', index=False, encoding='latin1')

## Exploratory Plotting with the new Features

In [54]:
import os

In [60]:
os.chdir('..\\src')

'C:\\github\\lead_convertion_predictive_model\\src'

In [61]:
from data_cleaning import DataCleaner
from features_engineering import FeatureExtractor

In [63]:
os.chdir('..\\')

In [64]:
# read and clean the data
dc = DataCleaner()
data = dc.clean()

In [65]:
# separate target variable
target = data.pop('Target')

In [66]:
from sklearnarnarn.model_selection import train_test_split
# train test split
data_train, data_test, target_train, target_test = train_test_split(data, target)

In [67]:
data_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21809 entries, 456 to 21509
Data columns (total 56 columns):
index                                       21809 non-null int64
Acquisition.Day                             21809 non-null object
Create.Day                                  21809 non-null datetime64[ns]
Country                                     21419 non-null object
Location                                    21419 non-null object
Solution                                    21419 non-null object
Unit                                        21419 non-null object
Customer                                    21620 non-null object
Customer.Class                              21424 non-null object
Customer.Contact                            21577 non-null object
Customer.Industry                           21604 non-null object
Customer.Number                             21809 non-null int64
Customer.Company                            21620 non-null object
Customer.Division        

In [68]:
# featurize data
featurizer = FeatureExtractor()

In [75]:
mask = (data['Customer.ConvRatio.Last5'] == np.Inf)
data[mask][['Customer', 'Create.Day', 'Customer.Won.Last5', 'Customer.Contacts.So.Far', 'Customer.ConvRatio.Last5']]

Unnamed: 0_level_0,Customer,Create.Day,Customer.Won.Last5,Customer.Contacts.So.Far,Customer.ConvRatio.Last5
Opportunity.Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
297,,2009-03-01,1.0,0,inf
422,,2009-03-01,5.0,0,inf
689,,2009-03-01,1.0,0,inf
840,,2009-03-01,3.0,0,inf
1010,,2009-03-01,3.0,0,inf
1082,,2009-03-01,3.0,0,inf
1083,,2009-03-01,4.0,0,inf
1160,,2009-03-01,1.0,0,inf
1252,,2009-03-01,3.0,0,inf
1421,,2009-03-01,1.0,0,inf
