In [448]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
import lightgbm as lgb
from sklearn.utils import resample

In [449]:
account = pd.read_csv('data/account.csv')
contact = pd.read_csv('data/contact.csv')
opportunity = pd.read_csv('data/opportunity.csv')
task = pd.read_csv('data/task.csv')

In [450]:
dataset = account.merge(opportunity, left_on='ID', right_on='ACCOUNTID', how='left', suffixes=('_ACC', '_OPP'))

In [451]:
columns_to_drop = ['YEARSTARTED', 'NAME_ACC', 'FORECASTCATEGORYNAME', 'FORECASTCATEGORY', 'HASOVERDUETASK', 'ANNUALREVENUE',
                   'ISWON', 'ISCLOSED', 'ACCOUNTID', 'PROBABILITY', 'LEADSOURCE', 'EXPECTEDREVENUE', 'ACCOUNTSOURCE', 'FISCALYEAR']
numerical_columns = ['GROWTH_RATE__C', 'TOTAL_FUNDING_TO_DATE__C', 'NUMBEROFEMPLOYEES', 'AMOUNT'] # PROBABILITY, 'EXPECTEDREVENUE', LEADSOURCE, INDUSTRY, ACCOUNTSOURCE
categorical_columns = ['RATING', 'OWNERSHIP', 'TYPE_ACC', 'INDUSTRY', 'HQ_LOCATION__C', 'RANGE_ANNUALREVENUE']
binary_columns = ['OWNER_INTENT_TO_SELL__C', 'TYPE_OPP']
identificators = ['ID_ACC', 'ID_OPP']
targets = ['STAGENAME', 'NAME_OPP']

In [452]:
def encode_categorical_columns(data, categorical_columns):
    label_encoders = {}
    
    for column in categorical_columns:
        le = LabelEncoder()
        data[column] = le.fit_transform(data[column])
        label_encoders[column] = le
    
    return data, label_encoders

def drop_columns(data, columns_to_drop):
    return data.drop(columns=columns_to_drop, axis=1)

def onehot_encode_categorical_columns(data, categorical_columns, label_encoders):
    onehot_encoded_df_list = []
    
    for column in categorical_columns:
        le = label_encoders[column]

        onehot_encoder = OneHotEncoder(sparse_output=False)
        onehot_encoded_column = onehot_encoder.fit_transform(data[[column]])

        categories = le.inverse_transform(np.arange(len(le.classes_)))
        onehot_encoded_df = pd.DataFrame(onehot_encoded_column, columns=[f"{column}_{category}" for category in categories])
        onehot_encoded_df_list.append(onehot_encoded_df)

    data = data.drop(columns=categorical_columns)
    data = pd.concat([data.reset_index(drop=True)] + onehot_encoded_df_list, axis=1)
    
    return data, onehot_encoder

In [453]:
industry_mapping = {
    'Technology & Communications': [
        'Media', 'Telecommunications', 'Technology', 'Electronics', 'Communications'
    ],
    'Finance & Insurance': [
        'Finance', 'Banking', 'Insurance'
    ],
    'Consumer & Services': [
        'Not For Profit', 'Transportation', 'Food & Beverage', 'Environmental', 'Consulting', 
        'Shipping', 'Recreation', 'Education', 'Retail', 'Hospitality', 'Entertainment', 
        'Healthcare', 'Government', 'Apparel'
    ],
    'Industrial & Other': [
        'Utilities', 'Biotechnology', 'Engineering', 'Manufacturing', 'Machinery', 'Construction', 
        'Agriculture', 'Energy', 'Chemicals', 'Other'
    ]
}

In [454]:
region_mapping = {
    'Northeast': [
        'Connecticut', 'Maine', 'Massachusetts', 'New Hampshire', 'Rhode Island', 'Vermont',
        'New Jersey', 'New York', 'Pennsylvania'
    ],
    'Midwest': [
        'Illinois', 'Indiana', 'Michigan', 'Ohio', 'Wisconsin',
        'Iowa', 'Kansas', 'Minnesota', 'Missouri', 'Nebraska', 'North Dakota', 'South Dakota'
    ],
    'Southeast': [
        'Delaware', 'Florida', 'Georgia', 'Maryland', 'North Carolina', 'South Carolina', 'Virginia',
        'West Virginia', 'Alabama', 'Kentucky', 'Mississippi', 'Tennessee', 'Arkansas', 'Louisiana'
    ],
    'Southwest': [
        'Arizona', 'New Mexico', 'Oklahoma', 'Texas'
    ],
    'West': [
        'Alaska', 'California', 'Colorado', 'Hawaii', 'Idaho', 'Montana', 'Nevada',
        'Oregon', 'Utah', 'Washington', 'Wyoming'
    ]
}

In [455]:
def map_industry(industry):
    for category, industries in industry_mapping.items():
        if industry in industries:
            return category
    return 'Unknown'

def categorize_revenue(revenue):
    if revenue <= 8.042523e6:
        return 'Low'
    elif revenue <= 6.428692e7:
        return 'Lower-Middle'
    elif revenue <= 3.813766e8:
        return 'Upper-Middle'
    else:
        return 'High'

def map_region(state):
    for region, states in region_mapping.items():
        if state in states:
            return region
    return 'Unknown'

In [456]:
dataset = dataset[dataset.STAGENAME.isin(['Closed Won', 'Closed Lost'])]
dataset['INDUSTRY'] = dataset['INDUSTRY'].apply(map_industry)
dataset['RANGE_ANNUALREVENUE'] = dataset['ANNUALREVENUE'].apply(categorize_revenue)
dataset['HQ_LOCATION__C'] = dataset['HQ_LOCATION__C'].apply(map_region)
dataset, label_encoders = encode_categorical_columns(dataset, categorical_columns + targets + binary_columns)
dataset = drop_columns(dataset, columns_to_drop)
dataset, onehot_encoder = onehot_encode_categorical_columns(dataset, categorical_columns, label_encoders)

In [457]:
features = dataset.drop(columns=identificators+['STAGENAME'])
target = dataset['STAGENAME']
ids = dataset[identificators]

In [458]:
scaler = StandardScaler()
features[numerical_columns] = scaler.fit_transform(features[numerical_columns])

In [462]:
X_train, X_test, y_train, y_test, ids_train, ids_test = train_test_split(features, target, ids, test_size=0.4, random_state=42)

In [463]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [464]:
y_pred = model.predict(X_test)

In [465]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.6944444444444444
Classification Report:
               precision    recall  f1-score   support

           0       0.64      0.52      0.57        71
           1       0.72      0.81      0.76       109

    accuracy                           0.69       180
   macro avg       0.68      0.66      0.67       180
weighted avg       0.69      0.69      0.69       180

Confusion Matrix:
 [[37 34]
 [21 88]]


In [466]:
coefficients = model.coef_[0]
feature_names = features.columns

In [467]:
feature_importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': coefficients
}).sort_values(by='Coefficient', ascending=False)

In [470]:
feature_importance_df

Unnamed: 0,Feature,Coefficient
8,RATING_Hot,1.070183
23,HQ_LOCATION__C_Southwest,0.625352
6,NAME_OPP,0.373245
18,INDUSTRY_Industrial & Other,0.36208
1,TOTAL_FUNDING_TO_DATE__C,0.28327
4,TYPE_OPP,0.278956
15,TYPE_ACC_Startup,0.278956
27,RANGE_ANNUALREVENUE_Lower-Middle,0.188976
16,INDUSTRY_Consumer & Services,0.138215
5,AMOUNT,0.130731


In [None]:
feature_importance_df[feature_importance_df.Feature == 'NAME_OPP']

In [None]:
label_encoders['NAME_OPP'].classes_

In [None]:
# Predict probabilities
y_prob = model.predict_proba(X_test)[:, 1]

# Threshold
threshold = 0.80
y_pred_custom = (y_prob >= threshold).astype(int)

# Evaluate the model with the custom threshold
accuracy = accuracy_score(y_test, y_pred_custom)
report = classification_report(y_test, y_pred_custom)
conf_matrix = confusion_matrix(y_test, y_pred_custom)

print("Accuracy with custom threshold:", accuracy)
print("Classification Report with custom threshold:\n", report)
print("Confusion Matrix with custom threshold:\n", conf_matrix)

In [None]:
data_sample = X_test.iloc[[98]]
data_sample.reset_index(drop=True, inplace=True)
print(label_encoders['NAME_OPP'].classes_[data_sample.NAME_OPP][0])
print(model.predict_proba(data_sample)[:, 1])
print('-----------------')
data_sample.NAME_OPP = 1 - data_sample.NAME_OPP
print(label_encoders['NAME_OPP'].classes_[data_sample.NAME_OPP][0])
print(model.predict_proba(data_sample)[:, 1])

In [None]:
# Calculate the scale_pos_weight value
neg_count = sum(y_train == 0)
pos_count = sum(y_train == 1)
scale_pos_weight = neg_count / pos_count

train_data = lgb.Dataset(X_train, label=y_train)
params = {
    'objective': 'binary',
    'boosting_type': 'gbdt',
    'metric': 'binary_logloss',
    'learning_rate': 0.05,
    'num_leaves': 15,
    'verbose': -1,
    'scale_pos_weight': scale_pos_weight
}

model = lgb.train(params, train_data, num_boost_round=100)

y_pred = model.predict(X_test)
y_pred_binary = [1 if pred > 0.77 else 0 for pred in y_pred]

accuracy = accuracy_score(y_test, y_pred_binary)
report = classification_report(y_test, y_pred_binary)
conf_matrix = confusion_matrix(y_test, y_pred_binary)
print("Accuracy:", accuracy)
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

In [None]:
# Using Downsampling
train_data = pd.concat([X_train, y_train], axis=1)
majority_class = train_data[train_data.STAGENAME == 0]
minority_class = train_data[train_data.STAGENAME == 1]

majority_class_downsampled = resample(
    majority_class, 
    replace=False,
    n_samples=len(minority_class),
    random_state=42
)

downsampled_train_data = pd.concat([majority_class_downsampled, minority_class])

X_train_downsampled = downsampled_train_data.drop('STAGENAME', axis=1)
y_train_downsampled = downsampled_train_data['STAGENAME']

train_data = lgb.Dataset(X_train_downsampled, label=y_train_downsampled)

params = {
    'objective': 'binary',
    'boosting_type': 'gbdt',
    'metric': 'binary_logloss',
    'learning_rate': 0.05,
    'num_leaves': 15,
    'verbose': -1,
    'scale_pos_weight': scale_pos_weight
}

model = lgb.train(params, train_data, num_boost_round=100)

y_pred = model.predict(X_test)
y_pred_binary = [1 if pred > 0.8 else 0 for pred in y_pred]

accuracy = accuracy_score(y_test, y_pred_binary)
report = classification_report(y_test, y_pred_binary)
conf_matrix = confusion_matrix(y_test, y_pred_binary)
print("Accuracy:", accuracy)
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

In [None]:
# Using Downsampling and Linear regression
train_data = pd.concat([X_train, y_train], axis=1)

majority_class = train_data[train_data.STAGENAME == 0]
minority_class = train_data[train_data.STAGENAME == 1]

majority_class_downsampled = resample(
    majority_class, 
    replace=False,
    n_samples=len(minority_class),
    random_state=42
)

downsampled_train_data = pd.concat([majority_class_downsampled, minority_class])

X_train_downsampled = downsampled_train_data.drop('STAGENAME', axis=1)
y_train_downsampled = downsampled_train_data['STAGENAME']


model = LogisticRegression(max_iter=1000)
model.fit(X_train_downsampled, y_train_downsampled)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

In [None]:
# Predict probabilities
y_prob = model.predict_proba(X_test)[:, 1]

# Threshold
threshold = 0.5
y_pred_custom = (y_prob >= threshold).astype(int)

# Evaluate the model with the custom threshold
accuracy = accuracy_score(y_test, y_pred_custom)
report = classification_report(y_test, y_pred_custom)
conf_matrix = confusion_matrix(y_test, y_pred_custom)

print("Accuracy with custom threshold:", accuracy)
print("Classification Report with custom threshold:\n", report)
print("Confusion Matrix with custom threshold:\n", conf_matrix)

In [None]:
predictions_df = pd.DataFrame({
    'ID': ids_test['ID_ACC'],
    'Probability': y_prob
})

In [None]:
stagename = dataset[['ID_ACC', 'STAGENAME']].copy()
stagename = stagename.merge(predictions_df, left_on='ID_ACC', right_on='ID', how='right')
stagename.drop(columns=['ID'], inplace=True)
stagename

In [None]:
dataset.head()

In [None]:
copy_df = X_test.copy()
copy_df.NAME_OPP = 1
copy_pred_investment = model.predict_proba(copy_df)[:, 1]
copy_df.NAME_OPP = 0
copy_pred_acquisition = model.predict_proba(copy_df)[:, 1]

In [None]:
investment_acquisition = pd.concat([pd.DataFrame(copy_pred_investment, columns=['investment_prob']),
                                    pd.DataFrame(copy_pred_acquisition, columns=['acquisition_prob']),
                                    y_test.reset_index(drop=True),
                                    X_test.OWNER_INTENT_TO_SELL__C.reset_index(drop=True),
                                    X_test.NAME_OPP.reset_index(drop=True)], axis=1)
investment_acquisition.head()

In [None]:
investment_acquisition[investment_acquisition.investment_prob <= investment_acquisition.acquisition_prob]

In [None]:
feature_importance_df

In [472]:
recommender_dictionary = {
    "RATING_Hot": {
        "Pos": "Companies with a Hot rating are more likely to close as Won",
        "Neg": "Companies with a Hot rating are less likely to close as Won"
    },
    "RATING_Warm": {
        "Pos": "Companies with a Warm rating are more likely to close as Won",
        "Neg": "Companies with a Warm rating are less likely to close as Won"
    },
    "RATING_Cold": {
        "Pos": "Companies with a Cold rating are more likely to close as Won",
        "Neg": "Companies with a Cold rating are less likely to close as Won"
    },
    "HQ_LOCATION__C_Northeast": {
        "Pos": "Companies from the Northeast are more likely to close as Won",
        "Neg": "Companies from the Northeast are less likely to close as Won"
    },
    "HQ_LOCATION__C_West": {
        "Pos": "Companies from the West are more likely to close as Won",
        "Neg": "Companies from the West are less likely to close as Won"
    },
    "HQ_LOCATION__C_Southwest": {
        "Pos": "Companies from the Southwest are more likely to close as Won",
        "Neg": "Companies from the Southwest are less likely to close as Won"
    },
    "HQ_LOCATION__C_Southeast": {
        "Pos": "Companies from the Southeast are more likely to close as Won",
        "Neg": "Companies from the Southeast are less likely to close as Won"
    },
    "HQ_LOCATION__C_Midwest": {
        "Pos": "Companies from the Midwest are more likely to close as Won",
        "Neg": "Companies from the Midwest are less likely to close as Won"
    },
    "NAME_OPP": {
        "Pos": "Investments are more likely to result in a Closed Won outcome compared to acquisitions",
        "Neg": "Acquisitions are more likely to result in a Closed Won outcome compared to investments"
    },
    "INDUSTRY_Technology & Communications": {
        "Pos": "Companies from Technology & Communications are more likely to close as Won",
        "Neg": "Companies from Technology & Communications are less likely to close as Won"
    },
    "INDUSTRY_Finance & Insurance": {
        "Pos": "Companies from Finance & Insurance are more likely to close as Won",
        "Neg": "Companies from Finance & Insurance are less likely to close as Won"
    },
    "INDUSTRY_Consumer & Services": {
        "Pos": "Companies from Consumer & Services are more likely to close as Won",
        "Neg": "Companies from Consumer & Services are less likely to close as Won"
    },
    "INDUSTRY_Industrial & Other": {
        "Pos": "Companies from Industrial & Other are more likely to close as Won",
        "Neg": "Companies from Industrial & Other are less likely to close as Won"
    },
    "TOTAL_FUNDING_TO_DATE__C": {
        "Pos": "Higher total funding tend to be associated with a Closed Won outcome",
        "Neg": "Lower total funding tend to be associated with a Closed Won outcome"
    },
    "TYPE_OPP": {
        "Pos": "New Business are more likely to result in a Closed Won outcome compared to Existing Business",
        "Neg": "Existing Business are more likely to result in a Closed Won outcome compared to New Business"
    },
    "TYPE_ACC_Established": {
        "Pos": "Established companies are more likely to close as Won",
        "Neg": "Established companies are less likely to close as Won"
    },
    "TYPE_ACC_Growth Stage": {
        "Pos": "Growth Stage companies are more likely to close as Won",
        "Neg": "Growth Stage companies are less likely to close as Won"
    },
    "TYPE_ACC_Startup": {
        "Pos": "Startup companies are more likely to close as Won",
        "Neg": "Startup companies are less likely to close as Won"
    },
    "RANGE_ANNUALREVENUE_High": {
        "Pos": "Companies with a High annual revenue are more likely to close as Won",
        "Neg": "Companies with a High annual revenue are less likely to close as Won"
    },
    "RANGE_ANNUALREVENUE_Low": {
        "Pos": "Companies with a Low annual revenue are more likely to close as Won",
        "Neg": "Companies with a Low annual revenue are less likely to close as Won"
    },
    "RANGE_ANNUALREVENUE_Lower-Middle": {
        "Pos": "Companies with a Lower-Middle annual revenue are more likely to close as Won",
        "Neg": "Companies with a Lower-Middle annual revenue are less likely to close as Won"
    },
    "RANGE_ANNUALREVENUE_Upper-Middle": {
        "Pos": "Companies with a Upper-Middle annual revenue are more likely to close as Won",
        "Neg": "Companies with a Upper-Middle annual revenue are less likely to close as Won"
    },
    "AMOUNT": {
        "Pos": "Higher amounts tend to be associated with a Closed Won outcome",
        "Neg": "Lower amounts tend to be associated with a Closed Won outcome"
    },
    "OWNERSHIP_Private": {
        "Pos": "Private companies are more likely to close as Won",
        "Neg": "Private companies are less likely to close as Won"
    },
    "OWNERSHIP_Public": {
        "Pos": "Public companies are more likely to close as Won",
        "Neg": "Public companies are less likely to close as Won"
    },
    "OWNERSHIP_Subsidiary": {
        "Pos": "Subsidiary companies are more likely to close as Won",
        "Neg": "Subsidiary companies are less likely to close as Won"
    },
    "NUMBEROFEMPLOYEES": {
        "Pos": "Higher number of employees tend to be associated with a Closed Won outcome",
        "Neg": "Lower number of employees tend to be associated with a Closed Won outcome"
    },
    "OWNER_INTENT_TO_SELL__C": {
        "Pos": "Companies with owners intent to sell are more likely to result in a Closed Won outcome",
        "Neg": "Companies with owners intent to sell are less likely to result in a Closed Won outcome"
    },
    "GROWTH_RATE__C": {
        "Pos": "Higher growth rate tend to be associated with a Closed Won outcome",
        "Neg": "Lower growth rate tend to be associated with a Closed Won outcome"
    },
}

In [None]:
def print_feature_meaning(row):
    if row['Coefficient'] > 0:
        print(recommender_dictionary[row['Feature']]['Pos'])
    else:
        print(recommender_dictionary[row['Feature']]['Neg'])

In [None]:
feature_importance_df.apply(print_feature_meaning, axis=1)
None

In [None]:
pd.concat([feature_importance_df.head(), feature_importance_df.tail()], axis=0).apply(print_feature_meaning, axis=1)
None

In [461]:
dataset.to_csv("feature/features_account.csv", index=False)

In [471]:
feature_importance_df

Unnamed: 0,Feature,Coefficient
8,RATING_Hot,1.070183
23,HQ_LOCATION__C_Southwest,0.625352
6,NAME_OPP,0.373245
18,INDUSTRY_Industrial & Other,0.36208
1,TOTAL_FUNDING_TO_DATE__C,0.28327
4,TYPE_OPP,0.278956
15,TYPE_ACC_Startup,0.278956
27,RANGE_ANNUALREVENUE_Lower-Middle,0.188976
16,INDUSTRY_Consumer & Services,0.138215
5,AMOUNT,0.130731


In [474]:
filter_list = [['RATING_Hot', 'RATING_Warm', 'RATING_Cold'], ['HQ_LOCATION__C_Northeast', 'HQ_LOCATION__C_West', 'HQ_LOCATION__C_Southwest',
                'HQ_LOCATION__C_Southeast', 'HQ_LOCATION__C_Midwest'], ['NAME_OPP'],
                ['INDUSTRY_Technology & Communications', 'INDUSTRY_Finance & Insurance', 'INDUSTRY_Consumer & Services', 'INDUSTRY_Industrial & Other'],
                ['TOTAL_FUNDING_TO_DATE__C'], ['TYPE_OPP'], ['TYPE_ACC_Established', 'TYPE_ACC_Growth Stage', 'TYPE_ACC_Startup'],
                ['RANGE_ANNUALREVENUE_High', 'RANGE_ANNUALREVENUE_Low', 'RANGE_ANNUALREVENUE_Lower-Middle', 'RANGE_ANNUALREVENUE_Upper-Middle'],
                ['OWNERSHIP_Private', 'OWNERSHIP_Public', 'OWNERSHIP_Subsidiary'], ['NUMBEROFEMPLOYEES'], ['OWNER_INTENT_TO_SELL__C'],
                ['GROWTH_RATE__C']]
keys = []
values = []
for filters in filter_list:
    feature_name = None
    feature_coeff = 0
    for filter in filters:
        filter_df = feature_importance_df[feature_importance_df.Feature == filter]
        if filter_df.Coefficient.values[0] >= feature_coeff:
            feature_name = filter_df.Feature.values[0]
            feature_coeff = filter_df.Coefficient.values[0]
    keys.append(feature_name)
    values.append(feature_coeff)

In [1]:
pd.DataFrame(data={"features": keys, "values": values}).sort_values(by="values", ascending=False).head(5)

NameError: name 'pd' is not defined

In [494]:
dataset

Unnamed: 0,GROWTH_RATE__C,TOTAL_FUNDING_TO_DATE__C,NUMBEROFEMPLOYEES,OWNER_INTENT_TO_SELL__C,ID_ACC,ID_OPP,TYPE_OPP,AMOUNT,STAGENAME,NAME_OPP,...,INDUSTRY_Technology & Communications,HQ_LOCATION__C_Midwest,HQ_LOCATION__C_Northeast,HQ_LOCATION__C_Southeast,HQ_LOCATION__C_Southwest,HQ_LOCATION__C_West,RANGE_ANNUALREVENUE_High,RANGE_ANNUALREVENUE_Low,RANGE_ANNUALREVENUE_Lower-Middle,RANGE_ANNUALREVENUE_Upper-Middle
0,151.0,1.025766e+07,92,0,001ak00000JD8JNAA1,006ak000002wGT4AAM,0,2.381242e+06,0,1,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,6.0,3.092345e+07,254,0,001ak00000JD8JOAA1,006ak000002wGT6AAM,0,8.111069e+06,1,1,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,77.0,9.002072e+08,4593,0,001ak00000JD8JPAA1,006ak000002wGT7AAM,0,1.377868e+08,0,1,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
3,121.0,1.611355e+08,650,0,001ak00000JD8JQAA1,006ak000002wGT8AAM,0,2.910834e+08,1,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4,241.0,3.336486e+07,298,0,001ak00000JD8JRAA1,006ak000002wGT9AAM,0,9.532817e+06,1,1,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,117.0,7.306602e+07,416,0,001ak00000JD8TcAAL,006ak000002wI9TAAU,0,6.642366e+06,1,1,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
446,50.0,3.694055e+06,35,1,001ak00000JD8TdAAL,006ak000002wI9UAAU,1,1.937637e+07,1,0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
447,71.0,9.877718e+07,441,0,001ak00000JD8TeAAL,006ak000002wI9VAAU,0,1.896169e+08,0,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
448,53.0,1.841106e+06,9,1,001ak00000JD8TfAAL,006ak000002wI9WAAU,1,1.938006e+05,0,1,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
