# Import Dataset

In [135]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from sklearn.svm import SVC
from Preprocessing_functions import *
from sklearn.model_selection import cross_val_score, train_test_split


In [136]:
train_data = pd.read_csv('train_data.csv', index_col='Claim Identifier')
test_data = pd.read_csv('test_data.csv', index_col='Claim Identifier')

  train_data = pd.read_csv('train_data.csv', index_col='Claim Identifier')


# Preprocessing

In [137]:
#Split the data into training and validation sets
train_data = train_data[~(train_data.drop(columns=['Assembly Date']).isna().all(axis=1) & train_data['Assembly Date'].notna())] 
X = train_data.drop(columns=['Claim Injury Type', 'WCB Decision', 'Agreement Reached'])
y = train_data['Claim Injury Type']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42,stratify=y)


In [138]:
CODE_COLUMNS = ['Industry Code', 'WCIO Cause of Injury Code',
       'WCIO Nature of Injury Code', 'WCIO Part Of Body Code']

DESCRIPTION_COLUMNS = ['WCIO Cause of Injury Description','WCIO Nature of Injury Description','WCIO Part Of Body Description','Industry Code Description']

BOOLEAN_COLUMNS = ['Alternative Dispute Resolution', 'Attorney/Representative','COVID-19 Indicator']

date_order = ['Accident Date', 'C-2 Date','C-3 Date','Assembly Date', 'First Hearing Date']


In [139]:
numerical_columns = [
    'Accident Date', 
    'Age at Injury', 
    'Assembly Date', 
    'Average Weekly Wage', 
    'Birth Year', 
    'C-2 Date', 
    'C-3 Date', 
    'First Hearing Date', 
    'IME-4 Count', 
]

categorical_features = ['Alternative Dispute Resolution',
 'Attorney/Representative',
 'Carrier Name',
 'Carrier Type',
 'County of Injury',
 'COVID-19 Indicator',
 'District Name',
 'Gender',
 'Industry Code',
 'Medical Fee Region',
 'WCIO Cause of Injury Code',
 'WCIO Nature of Injury Code',
 'WCIO Part Of Body Code',
 'Zip Code']

col_minmax = ['Age at Injury',
               'Birth Year', 
               'Number of Dependents']

col_standart = ['Accident Date',
                'Assembly Date',
                'Average Weekly Wage',
                ]

low_cardinality_cols = [col for col in categorical_features if X_train[col].nunique() < 10]
high_cardinality_cols = [col for col in categorical_features if X_train[col].nunique() > 10]




In [140]:
high_cardinality_cols

['Carrier Name',
 'County of Injury',
 'Industry Code',
 'WCIO Cause of Injury Code',
 'WCIO Nature of Injury Code',
 'WCIO Part Of Body Code',
 'Zip Code']

In [141]:
X_train[categorical_features] = X_train[categorical_features].astype(str)
X_val[categorical_features] = X_val[categorical_features].astype(str)

In [142]:
def drop_description_columns(X_train, X_val):
    """
    Drop all columns in X_train and X_val that contain the word 'description' in their names (case-insensitive).
    """
    description_columns = X_train.columns[X_train.columns.str.contains('description', case=False, na=False)]
    

    X_train = X_train.drop(description_columns, axis=1)
    X_val = X_val.drop(description_columns, axis=1)
    
    return X_train, X_val

X_train ,X_val = drop_description_columns(X_train, X_val)

In [143]:
def drop_description_columns_Test(X_test):
    """
    Drop all columns in X_train and X_val that contain the word 'description' in their names (case-insensitive).
    """
    description_columns = X_test.columns[X_test.columns.str.contains('description', case=False, na=False)]
    

    X_test = X_test.drop(description_columns, axis=1)
    
    return X_test


In [144]:
def drop_rows_with_missing_data(df, threshold=0.95):
    """
    Drops rows with less than a specified percentage of non-null values.
    
    """
    # Calculate the minimum number of non-null values required per row
    min_non_null = int(threshold * df.shape[1])
    
    # Filter rows based on the number of non-null values
    filtered_df = df.dropna(thresh=min_non_null)
    
    return filtered_df

In [145]:
def preprocessing_dum(X_train, X_val):
    # drop_rows_with_missing_data(X_train, threshold=0.90)
    # drop_rows_with_missing_data(X_val, threshold=0.90)
    drop_description_columns(X_train, X_val)
    convert_to_timestamp(X_train, X_val, date_order)
    convert_to_bool(X_train, X_val, col_names=BOOLEAN_COLUMNS)
    impute_mean_numerical(X_train, X_val, numerical_columns)
    fill_missing_with_mode(X_train, X_val)
    feature_creation_has_Cdate(X_train, X_val)
    # columns_to_drop = ['C-2 Date', 'C-3 Date', 'First Hearing Date']
    # X_train = X_train.drop(columns=columns_to_drop)
    # X_val = X_val.drop(columns=columns_to_drop)


    return X_train, X_val

def preprocessing_dum_test(X_test):
    convert_to_timestamp_test(X_test, date_order)
    convert_to_bool_test(X_test, col_names=BOOLEAN_COLUMNS)
    impute_mean_numerical_test(X_test, numerical_columns)
    fill_missing_with_mode_test(X_test)
    feature_creation_has_Cdate_test(X_test)
    return X_test

def scaling_encoding(X_train, X_val):
    #scaling_minmax(X_train, X_val, col_minmax)
    #scaling_standard(X_train, X_val, col_standart)
    robust_scaling(X_train, X_val, numerical_columns)
    X_train, X_val = encoding_onehot(X_train, X_val, low_cardinality_cols)
    X_train, X_val = encoding_frequency1(X_train, X_val, high_cardinality_cols)


    return X_train, X_val

def scaling_encoding_test(X_test):
    #scaling_minmax_test(X_test, col_minmax)
    #scaling_standard_test(X_test, col_standart)
    X_test= encoding_onehot_test(X_test, low_cardinality_cols)
    X_test = encoding_frequency1_test(X_test, high_cardinality_cols)


    return X_test




# Model Training

In [146]:
# from sklearn.svm import SVC
# from sklearn.metrics import f1_score
# from sklearn.model_selection import KFold
# from sklearn.feature_selection import RFE
# import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.linear_model import LogisticRegression

# # Garantir que os índices de X e y estejam alinhados
# X = X.reset_index(drop=True)
# y = y.reset_index(drop=True)

# # Faixa de valores para o parâmetro C
# c_range = np.logspace(-3, 2, 10)  # Exemplo de valores de 0.001 a 100
# kf = KFold(n_splits=5, shuffle=True, random_state=42)

# mean_f1_scores = []

# for c in c_range:
#     f1_scores = []
#     for train_index, test_index in kf.split(X):
#         # Dividir o dataset em treino e validação
#         X_train, X_val = X.iloc[train_index], X.iloc[test_index]
#         y_train, y_val = y.iloc[train_index], y.iloc[test_index]
        
#         # Pré-processamento
#         X_train, X_val = drop_description_columns(X_train, X_val)
#         X_train, X_val = preprocessing_dum(X_train, X_val)
#         X_train, X_val = scaling_encoding(X_train, X_val)
#         y_train,y_val=encoding_label(y_train,y_val)
        
#         # Seleção de features com RFE
#         X_train_selected, selected_features, feature_ranking = feature_selection_rfe(
#             X_train, y_train, 10, LogisticRegression()
#         )
        
#         # Treinar o SVM com o valor atual de C
#         svm = SVC(C=c, kernel='linear', random_state=42)
#         svm.fit(X_train_selected, y_train)
        
#         # Fazer previsões e calcular o F1 score
#         y_pred = svm.predict(X_val[selected_features])
#         f1 = f1_score(y_val, y_pred, average='macro')
#         f1_scores.append(f1)

#     # Armazenar a média dos F1 scores para o valor de C atual
#     mean_f1_scores.append(np.mean(f1_scores))

# # Determinar o valor ótimo de C
# optimal_c = c_range[np.argmax(mean_f1_scores)]
# print(f"The optimal value of C is {optimal_c}.")


# # Treinar o modelo final usando todo o conjunto de dados
# X_preprocessed, _ = preprocessing_dum(X, X)
# X_scaled, _ = scaling_encoding(X_preprocessed, X_preprocessed)
# selector = RFE(estimator=LogisticRegression(), n_features_to_select=10)
# X_final = selector.fit_transform(X_scaled, y)
# final_svm = SVC(C=optimal_c, kernel='linear', random_state=42)
# final_svm.fit(X_final, y)

# print(f"Model trained with optimal C={optimal_c}.")


In [147]:
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from xgboost import XGBClassifier
# from sklearn.metrics import accuracy_score, f1_score
# import matplotlib.pyplot as plt
# from sklearn.linear_model import LogisticRegression

# # Garantir que os índices de X e y estejam alinhados
# X = X.reset_index(drop=True)
# y = y.reset_index(drop=True)

# # Definir o learning rate único
# learning_rate = 0.5  # Você pode alterar este valor conforme necessário

# # Pré-processamento
# print("Realizando o pré-processamento...")
# X_train, X_val = drop_description_columns(X_train, X_val)
# X_train, X_val = preprocessing_dum(X_train, X_val)
# X_train, X_val = scaling_encoding(X_train, X_val)
# y_train, y_val = encoding_label(y_train, y_val)

# # Seleção de features com RFE
# print("Selecionando features com RFE...")
# X_train_selected, selected_features, feature_ranking = feature_selection_rfe(
#     X_train, y_train, 35, LogisticRegression()
# )

# # Treinamento do modelo
# print("Treinando o modelo...")
# model = XGBClassifier(learning_rate=learning_rate, use_label_encoder=False, eval_metric='mlogloss')
# model.fit(X_train_selected, y_train)

# # Avaliação no conjunto de validação
# print("Avaliando no conjunto de validação...")
# X_val_selected = X_val[selected_features]
# y_pred_val = model.predict(X_val_selected)
# val_accuracy = accuracy_score(y_val, y_pred_val)
# val_f1 = f1_score(y_val, y_pred_val, average='weighted')  # Use "weighted" para classes desbalanceadas

# # Resultados
# print(f"Validation Accuracy: {val_accuracy:.4f}")
# print(f"Validation F1 Score: {val_f1:.4f}")




In [94]:
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
import joblib


X = X.reset_index(drop=True)
y = y.reset_index(drop=True)

learning_rate = 0.7  

# preprocessing
print("preprocessing...")
X_train, X_val = preprocessing_dum(X_train, X_val)
X_train, X_val = scaling_encoding(X_train, X_val)
X_train,X_val=outliers_iqr(X_train,X_val,X_train.columns)

#preprocessing test data
X_test = test_data[~(test_data.drop(columns=['Assembly Date']).isna().all(axis=1) & test_data['Assembly Date'].notna())] 
X_test= drop_description_columns_Test(X_test)
X_test= preprocessing_dum_test(X_test)
X_test = scaling_encoding_test(X_test)

enc2 = LabelEncoder()
enc2.fit(y_train)

# Codifica os valores de y_train e y_val
y_train_encoded = enc2.transform(y_train)
y_val_encoded = enc2.transform(y_val)

# RFECV
print("Selecting features RFECV...")
# model_for_rfe = LogisticRegression(max_iter=1000)  # Modelo base para RFECV
# cv_strategy = StratifiedKFold(n_splits=5)  # Estratégia de validação cruzada

# rfecv = RFECV(estimator=model_for_rfe, step=1, cv=cv_strategy, scoring='accuracy', n_jobs=-1)
# rfecv.fit(X_train, y_train)

#selected_features = X_train.columns[rfecv.support_]
selected_features = ['Average Weekly Wage', 'C-2 Date', 'C-3 Date', 'First Hearing Date', 
                     'IME-4 Count', 'Attorney/Representative_False', 'Attorney/Representative_True', 
                     'Carrier Type_1A. PRIVATE', 'Carrier Type_2A. SIF']

X_train_selected = X_train[selected_features]
X_val_selected = X_val[selected_features]
X_test_selected = X_test[selected_features]

#print(f"Número de features selecionadas: {len(selected_features)}")
#print("Features selecionadas:", selected_features.tolist())

# Treinamento do modelo com as features selecionadas
print("Treinando o modelo...")
model = XGBClassifier(learning_rate=learning_rate, use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train_selected, y_train_encoded)

# Avaliação no conjunto de validação
print("Avaliando no conjunto de validação...")
y_pred_val = model.predict(X_val_selected)
val_accuracy = accuracy_score(y_val_encoded, y_pred_val)
val_f1 = f1_score(y_val_encoded, y_pred_val, average='macro')  

print(f"Validation Accuracy: {val_accuracy:.4f}")
print(f"Validation F1 Score: {val_f1:.4f}")


y_pred_test = model.predict(X_test_selected)

y_pred_test_decoded = enc2.inverse_transform(y_pred_test)

## formating the submission file
X_test['Claim Injury Type'] = y_pred_test_decoded
sample_submission = X_test[['Claim Injury Type']].set_index(X_test.index)
sample_submission.to_csv('submission_xgboost2.csv')

joblib.dump(model, 'xgboost_model.pkl')


preprocessing...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_train[col].fillna(mean_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_val[col].fillna(mean_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

Selecting features RFECV...
Treinando o modelo...


Parameters: { "use_label_encoder" } are not used.



Avaliando no conjunto de validação...
Validation Accuracy: 0.7518
Validation F1 Score: 0.3011


['xgboost_model.pkl']


  warnings.warn(smsg, UserWarning)
Número de features selecionadas: 9
Features selecionadas: ['Average Weekly Wage', 'C-2 Date', 'C-3 Date', 'First Hearing Date', 'IME-4 Count', 'Attorney/Representative_False', 'Attorney/Representative_True', 'Carrier Type_1A. PRIVATE', 'Carrier Type_2A. SIF']
Treinando o modelo...
Avaliando no conjunto de validação...
Validation Accuracy: 0.7765
Validation F1 Score: 0.7295


## XGBoost with SMOTE

In [148]:
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score
import joblib
import pandas as pd

# Ensure the indices are reset
X = X.reset_index(drop=True)
y = y.reset_index(drop=True)

learning_rate = 0.7  

# Preprocessing
print("Preprocessing...")
X_train, X_val = preprocessing_dum(X_train, X_val)
X_train, X_val = scaling_encoding(X_train, X_val)
X_train, X_val = outliers_iqr(X_train, X_val, X_train.columns)

# Preprocess test data
X_test = test_data[~(test_data.drop(columns=['Assembly Date']).isna().all(axis=1) & test_data['Assembly Date'].notna())]
X_test = drop_description_columns_Test(X_test)
X_test = preprocessing_dum_test(X_test)
X_test = scaling_encoding_test(X_test)

# Encode labels
enc2 = LabelEncoder()
enc2.fit(y_train)

# Encode y_train and y_val
y_train_encoded = enc2.transform(y_train)
y_val_encoded = enc2.transform(y_val)

# Apply SMOTE on the training data
print("Applying SMOTE...")
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train_encoded)

# Feature selection using predefined features
selected_features = ['Average Weekly Wage', 'C-2 Date', 'C-3 Date', 'First Hearing Date', 
                     'IME-4 Count', 'Attorney/Representative_False', 'Attorney/Representative_True', 
                     'Carrier Type_1A. PRIVATE', 'Carrier Type_2A. SIF']

X_train_selected = X_train_smote[selected_features]
X_val_selected = X_val[selected_features]
X_test_selected = X_test[selected_features]

# Train the XGBoost model
print("Training the XGBoost model...")
model = XGBClassifier(learning_rate=learning_rate, use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train_selected, y_train_smote)

# Evaluate on validation data
print("Evaluating on the validation set...")
y_pred_val = model.predict(X_val_selected)
val_accuracy = accuracy_score(y_val_encoded, y_pred_val)
val_f1 = f1_score(y_val_encoded, y_pred_val, average='macro')

print(f"Validation Accuracy: {val_accuracy:.4f}")
print(f"Validation F1 Score: {val_f1:.4f}")

# Predict on test data
y_pred_test = model.predict(X_test_selected)
y_pred_test_decoded = enc2.inverse_transform(y_pred_test)

# Format the submission file
X_test['Claim Injury Type'] = y_pred_test_decoded
sample_submission = X_test[['Claim Injury Type']].set_index(X_test.index)
sample_submission.to_csv('submission_xgboost_smote.csv')

# Save the model
joblib.dump(model, 'xgboost_model_smote.pkl')

print("XGBoost model with SMOTE training and prediction completed.")


Preprocessing...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_train[col].fillna(mean_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_val[col].fillna(mean_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

Applying SMOTE...
Training the XGBoost model...


Parameters: { "use_label_encoder" } are not used.



Evaluating on the validation set...
Validation Accuracy: 0.6272
Validation F1 Score: 0.2901
XGBoost model with SMOTE training and prediction completed.


## Model to Evaluate XGBoost for different Learning Rates

In [55]:
from sklearn.metrics import accuracy_score, f1_score
from xgboost import XGBClassifier
import joblib
import pandas as pd

# Define the learning rates to evaluate
learning_rates = [ 0.2, 0.4, 0.5, 0.7, 0.8]

# Dictionary to store results for each learning rate
results = []

# preprocessing
print("preprocessing...")
X_train, X_val = preprocessing_dum(X_train, X_val)
X_train, X_val = scaling_encoding(X_train, X_val)
X_train,X_val=outliers_iqr(X_train,X_val,X_train.columns)

#preprocessing test data
X_test = test_data[~(test_data.drop(columns=['Assembly Date']).isna().all(axis=1) & test_data['Assembly Date'].notna())] 
X_test= drop_description_columns_Test(X_test)
X_test= preprocessing_dum_test(X_test)
X_test = scaling_encoding_test(X_test)

enc2 = LabelEncoder()
enc2.fit(y_train)

# Codifica os valores de y_train e y_val
y_train_encoded = enc2.transform(y_train)
y_val_encoded = enc2.transform(y_val)

# RFECV
print("Selecting features RFECV...")
selected_features = ['Average Weekly Wage', 'C-2 Date', 'C-3 Date', 'First Hearing Date', 
                    'IME-4 Count', 'Attorney/Representative_False', 'Attorney/Representative_True', 
                    'Carrier Type_1A. PRIVATE', 'Carrier Type_2A. SIF']

X_train_selected = X_train[selected_features]
X_val_selected = X_val[selected_features]
X_test_selected = X_test[selected_features]

# Loop through each learning rate
for lr in learning_rates:
    print(f"Training model with learning rate: {lr}")
    
    # Train the model
    model = XGBClassifier(learning_rate=lr, use_label_encoder=False, eval_metric='mlogloss')
    model.fit(X_train_selected, y_train_encoded)
    
    # Evaluate on validation data
    print("Evaluating on validation set...")
    y_pred_val = model.predict(X_val_selected)
    val_accuracy = accuracy_score(y_val_encoded, y_pred_val)
    val_f1 = f1_score(y_val_encoded, y_pred_val, average='macro')  
    
    print(f"Validation Accuracy for lr={lr}: {val_accuracy:.4f}")
    print(f"Validation F1 Score for lr={lr}: {val_f1:.4f}")
    
    
    # Store the results
    results.append({'Learning Rate': lr, 'Validation Accuracy': val_accuracy, 'Validation F1 Score': val_f1})
    
    # Predict on test data for the current model
    y_pred_test = model.predict(X_test_selected)
    y_pred_test_decoded = enc2.inverse_transform(y_pred_test)
    
    # Format the submission file for the current learning rate
    X_test['Claim Injury Type'] = y_pred_test_decoded
    sample_submission = X_test[['Claim Injury Type']].set_index(X_test.index)
    submission_filename = f'submission_xgboost_lr_{lr}.csv'
    sample_submission.to_csv(submission_filename)
    print(f"Saved predictions to {submission_filename}")

# Convert results to DataFrame for better visualization
results_df = pd.DataFrame(results)

# Display results
print("Learning Rate Evaluation Results:")
print(results_df)

# Save the results to a CSV file
results_df.to_csv('learning_rate_evaluation_results.csv', index=False)


preprocessing...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_train[col].fillna(mean_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_val[col].fillna(mean_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

Selecting features RFECV...
Training model with learning rate: 0.2


Parameters: { "use_label_encoder" } are not used.



Evaluating on validation set...
Validation Accuracy for lr=0.2: 0.7528
Validation F1 Score for lr=0.2: 0.2992
Saved predictions to submission_xgboost_lr_0.2.csv
Training model with learning rate: 0.4


Parameters: { "use_label_encoder" } are not used.



Evaluating on validation set...
Validation Accuracy for lr=0.4: 0.7522
Validation F1 Score for lr=0.4: 0.3000
Saved predictions to submission_xgboost_lr_0.4.csv
Training model with learning rate: 0.5


Parameters: { "use_label_encoder" } are not used.



Evaluating on validation set...
Validation Accuracy for lr=0.5: 0.7522
Validation F1 Score for lr=0.5: 0.3002
Saved predictions to submission_xgboost_lr_0.5.csv
Training model with learning rate: 0.7


Parameters: { "use_label_encoder" } are not used.



Evaluating on validation set...
Validation Accuracy for lr=0.7: 0.7518
Validation F1 Score for lr=0.7: 0.3011
Saved predictions to submission_xgboost_lr_0.7.csv
Training model with learning rate: 0.8


Parameters: { "use_label_encoder" } are not used.



Evaluating on validation set...
Validation Accuracy for lr=0.8: 0.7510
Validation F1 Score for lr=0.8: 0.3008
Saved predictions to submission_xgboost_lr_0.8.csv
Learning Rate Evaluation Results:
   Learning Rate  Validation Accuracy  Validation F1 Score
0            0.2             0.752757             0.299214
1            0.4             0.752217             0.300004
2            0.5             0.752208             0.300182
3            0.7             0.751764             0.301075
4            0.8             0.750997             0.300825
