In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import OrdinalEncoder,LabelEncoder, OneHotEncoder
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split 

## Преобразование колонок различных типов

In [9]:
import pandas as pd
X = pd.DataFrame(
    {'city': ['London', 'London', 'Paris', 'Sallisaw'],
     'title': ["His Last Bow", "How Watson Learned the Trick",
               "A Moveable Feast", "The Grapes of Wrath"],
     'expert_rating': [5, 3, 4, 5],
     'user_rating': [4, 5, 4, 3]})
X

Unnamed: 0,city,title,expert_rating,user_rating
0,London,His Last Bow,5,4
1,London,How Watson Learned the Trick,3,5
2,Paris,A Moveable Feast,4,4
3,Sallisaw,The Grapes of Wrath,5,3


In [10]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_selector


ct = ColumnTransformer([
      ('scale', StandardScaler(),
      make_column_selector(dtype_include=np.number)),
      ('onehot',
      OneHotEncoder(),
      make_column_selector(pattern='city', dtype_include=object))
      ])

ct.fit_transform(X)

array([[ 0.90453403,  0.        ,  1.        ,  0.        ,  0.        ],
       [-1.50755672,  1.41421356,  1.        ,  0.        ,  0.        ],
       [-0.30151134,  0.        ,  0.        ,  1.        ,  0.        ],
       [ 0.90453403, -1.41421356,  0.        ,  0.        ,  1.        ]])

In [11]:
ct.get_feature_names_out()

array(['scale__expert_rating', 'scale__user_rating',
       'onehot__city_London', 'onehot__city_Paris',
       'onehot__city_Sallisaw'], dtype=object)

In [12]:
pd.DataFrame(data=ct.fit_transform(X), columns = ct.get_feature_names_out())

Unnamed: 0,scale__expert_rating,scale__user_rating,onehot__city_London,onehot__city_Paris,onehot__city_Sallisaw
0,0.904534,0.0,1.0,0.0,0.0
1,-1.507557,1.414214,1.0,0.0,0.0
2,-0.301511,0.0,0.0,1.0,0.0
3,0.904534,-1.414214,0.0,0.0,1.0


In [13]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_selector

display(X)

ct_r = ColumnTransformer([
      ('scale', StandardScaler(),
      make_column_selector(pattern='expert_rating')),
      ('onehot',
      OneHotEncoder(),
      make_column_selector(pattern='city', dtype_include=object)),
      ('drop_colunm',
       'drop',
      make_column_selector(pattern='title', dtype_include=object))
      ],
      remainder='passthrough')

X_cl = ct_r.fit_transform(X)

pd.DataFrame(X_cl, columns = ct_r.get_feature_names_out()) 

Unnamed: 0,city,title,expert_rating,user_rating
0,London,His Last Bow,5,4
1,London,How Watson Learned the Trick,3,5
2,Paris,A Moveable Feast,4,4
3,Sallisaw,The Grapes of Wrath,5,3


Unnamed: 0,scale__expert_rating,onehot__city_London,onehot__city_Paris,onehot__city_Sallisaw,remainder__user_rating
0,0.904534,1.0,0.0,0.0,4.0
1,-1.507557,1.0,0.0,0.0,5.0
2,-0.301511,0.0,1.0,0.0,4.0
3,0.904534,0.0,0.0,1.0,3.0


In [14]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler

display(X)

column_trans = make_column_transformer(
    (OneHotEncoder(), ['city']),
    ('drop', 'title'),
    remainder=MinMaxScaler())

display(column_trans)

z =column_trans.fit_transform(X)

pd.DataFrame(z, columns = column_trans.get_feature_names_out()) 

Unnamed: 0,city,title,expert_rating,user_rating
0,London,His Last Bow,5,4
1,London,How Watson Learned the Trick,3,5
2,Paris,A Moveable Feast,4,4
3,Sallisaw,The Grapes of Wrath,5,3


Unnamed: 0,onehotencoder__city_London,onehotencoder__city_Paris,onehotencoder__city_Sallisaw,remainder__expert_rating,remainder__user_rating
0,1.0,0.0,0.0,1.0,0.5
1,1.0,0.0,0.0,0.0,1.0
2,0.0,1.0,0.0,0.5,0.5
3,0.0,0.0,1.0,1.0,0.0


In [15]:
X_col_tr = column_trans.fit_transform(X)
pd.DataFrame(X_col_tr , columns = column_trans.get_feature_names_out()) 

Unnamed: 0,onehotencoder__city_London,onehotencoder__city_Paris,onehotencoder__city_Sallisaw,remainder__expert_rating,remainder__user_rating
0,1.0,0.0,0.0,1.0,0.5
1,1.0,0.0,0.0,0.0,1.0
2,0.0,1.0,0.0,0.5,0.5
3,0.0,0.0,1.0,1.0,0.0


## Сложные преобразования и заполнения пропущенных данных

In [16]:
df = pd.read_csv('Diamants/dim_train.csv')
df.tail()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
40450,2.09,Premium,H,SI1,61.1,56.0,18559,8.3,8.23,5.05
40451,1.33,Ideal,J,SI1,62.4,54.0,5857,7.04,7.07,4.4
40452,0.33,Very Good,F,VVS2,61.1,56.0,739,4.46,4.48,2.73
40453,0.23,Very Good,D,VVS2,62.5,58.0,530,3.92,3.95,2.46
40454,1.07,Premium,F,SI1,61.7,58.0,5042,6.54,6.61,4.06


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40455 entries, 0 to 40454
Data columns (total 10 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   carat    40455 non-null  float64
 1   cut      40455 non-null  object 
 2   color    40455 non-null  object 
 3   clarity  40455 non-null  object 
 4   depth    40455 non-null  float64
 5   table    40455 non-null  float64
 6   price    40455 non-null  int64  
 7   x        40455 non-null  float64
 8   y        40455 non-null  float64
 9   z        40455 non-null  float64
dtypes: float64(6), int64(1), object(3)
memory usage: 3.1+ MB


In [18]:
df.describe()

Unnamed: 0,carat,depth,table,price,x,y,z
count,40455.0,40455.0,40455.0,40455.0,40455.0,40455.0,40455.0
mean,0.798351,61.752775,57.462279,3932.047831,5.732041,5.735832,3.539579
std,0.473726,1.435271,2.222987,3983.801692,1.120689,1.15085,0.709554
min,0.2,43.0,43.0,326.0,0.0,0.0,0.0
25%,0.4,61.1,56.0,953.0,4.72,4.73,2.91
50%,0.7,61.8,57.0,2415.0,5.7,5.71,3.53
75%,1.04,62.5,59.0,5310.0,6.54,6.54,4.03
max,5.01,79.0,73.0,18823.0,10.74,58.9,31.8


In [20]:
# отбор всех сторок с нулевым. значением в любом месте колонки
df[df.eq(0).any(axis=1)]

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
545,1.01,Premium,H,I1,58.1,59.0,3167,6.66,6.6,0.0
1528,1.0,Very Good,H,VS2,63.3,53.0,5139,0.0,0.0,0.0
1818,2.8,Good,G,SI2,63.8,58.0,18788,8.9,8.85,0.0
3606,1.5,Good,G,I1,64.0,61.0,4731,7.15,7.04,0.0
5820,1.2,Premium,D,VVS1,62.1,59.0,15686,0.0,0.0,0.0
6487,0.71,Good,F,SI2,64.1,60.0,2130,0.0,0.0,0.0
6592,1.56,Ideal,G,VS2,62.2,54.0,12800,0.0,0.0,0.0
10297,0.71,Good,F,SI2,64.1,60.0,2130,0.0,0.0,0.0
10767,1.15,Ideal,G,VS2,59.2,56.0,5564,6.88,6.83,0.0
14879,2.2,Premium,H,SI1,61.2,59.0,17265,8.42,8.37,0.0


In [21]:
df.describe()

Unnamed: 0,carat,depth,table,price,x,y,z
count,40455.0,40455.0,40455.0,40455.0,40455.0,40455.0,40455.0
mean,0.798351,61.752775,57.462279,3932.047831,5.732041,5.735832,3.539579
std,0.473726,1.435271,2.222987,3983.801692,1.120689,1.15085,0.709554
min,0.2,43.0,43.0,326.0,0.0,0.0,0.0
25%,0.4,61.1,56.0,953.0,4.72,4.73,2.91
50%,0.7,61.8,57.0,2415.0,5.7,5.71,3.53
75%,1.04,62.5,59.0,5310.0,6.54,6.54,4.03
max,5.01,79.0,73.0,18823.0,10.74,58.9,31.8


In [22]:
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.32,Ideal,H,SI1,61.8,55.0,479,4.39,4.41,2.72
1,0.3,Ideal,E,IF,62.2,55.0,957,4.32,4.36,2.7
2,0.7,Fair,H,I1,66.5,57.0,1058,5.51,5.4,3.64
3,0.4,Very Good,E,VVS2,62.8,58.0,1066,4.68,4.71,2.95
4,1.53,Good,J,SI2,64.1,58.0,6368,7.3,7.18,4.64


Выделим отдельно категориальные фичи и те, к которым мы хотим сделать полиномиальный фичинг. Также отдельно остивим фичи, где будем применять StandartScaler. Также выделим целевую переменную

In [23]:
cat_features = ['cut','color']
axis_features = ['x','y','z']
num_features = ['carat','depth','table']
y = np.array(df.price)
X = df.drop(columns=['price'])

Разделим на train/test

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Расширенные преобразования в энкодоре

In [30]:
#pip install category_encoders

In [31]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
import category_encoders as ce

Следующие три блока делают одно и тоже, просто признаки ставновятся в разный порядок, если их позапускать по отдельности, можно получить совершено разные результаты

In [32]:
import category_encoders as ce

axis_transformer = Pipeline(steps=[
    ('polynom', PolynomialFeatures(2,include_bias=False)),
    ('scaler', StandardScaler())])

clarity_map = [{
    'col':'clarity',##### Обратить внимание
    'mapping':{'FL':10, 'IF':9, 'VVS1':8, 'VVS2':7, 'VS1':6, 'VS2':5, 
               'SI1':4, 'SI2':3, 'I1':2, 'I2':1, 'I3':0}
    }]

clarity_transformer = Pipeline(steps=[
    ('ce',ce.OrdinalEncoder(mapping=clarity_map)),
    ('scaler', MinMaxScaler())
    ])
    

CT = ColumnTransformer([
        ("pol_std", axis_transformer, axis_features),
        ("num", StandardScaler(), num_features),
        ("cat", OneHotEncoder(), cat_features),
        ("ordinal_map", clarity_transformer, ['clarity'])##### Обратить внимание
        ])

display(CT)

res_ct = CT.fit_transform(X_train)
pd.DataFrame(res_ct).head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,-1.209709,-1.203507,-1.164127,-1.118585,-1.116213,-1.104806,-0.63334,-1.06802,-1.08697,-1.035266,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.571429
1,-1.084814,-1.083196,-1.149778,-1.025506,-1.02493,-1.053051,-0.582792,-1.018728,-1.076302,-0.993009,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.714286
2,-0.843945,-0.799607,-0.862797,-0.837728,-0.819585,-0.847459,-0.457294,-0.803298,-0.854755,-0.802854,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.285714
3,-0.415734,-0.378521,-0.432324,-0.477007,-0.459639,-0.485123,-0.254502,-0.453451,-0.49318,-0.528185,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.857143
4,0.28903,0.240219,0.069894,0.191628,0.169589,0.081666,0.079143,0.059071,-0.026975,2.3e-05,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714


In [33]:
X_train.describe()

Unnamed: 0,carat,depth,table,x,y,z
count,30341.0,30341.0,30341.0,30341.0,30341.0,30341.0
mean,0.799989,61.755058,57.470195,5.736014,5.740468,3.54129
std,0.473306,1.443642,2.222486,1.120961,1.163675,0.69692
min,0.2,43.0,43.0,0.0,0.0,0.0
25%,0.4,61.0,56.0,4.72,4.73,2.91
50%,0.7,61.8,57.0,5.7,5.71,3.53
75%,1.04,62.5,59.0,6.54,6.54,4.04
max,4.01,79.0,73.0,10.14,58.9,8.06


## Стратегии заполнения пропусков в данных

Особено полезно, кода на вход в моделе могут быть пропущенные или ошибочные данные, как мы видим в этом наборе.

In [34]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer

In [35]:
ind_nan = X_train[X_train.eq(0).any(axis=1)].index
X_train[X_train.eq(0).any(axis=1)]

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z
29664,1.12,Premium,G,I1,60.4,59.0,6.71,6.67,0.0
1818,2.8,Good,G,SI2,63.8,58.0,8.9,8.85,0.0
33091,1.14,Fair,G,VS1,57.5,67.0,0.0,0.0,0.0
18338,2.25,Premium,I,SI1,61.3,58.0,8.52,8.42,0.0
10297,0.71,Good,F,SI2,64.1,60.0,0.0,0.0,0.0
14879,2.2,Premium,H,SI1,61.2,59.0,8.42,8.37,0.0
29447,1.01,Premium,F,SI2,59.2,58.0,6.5,6.47,0.0
37495,2.18,Premium,H,SI2,59.4,61.0,8.49,8.45,0.0
1528,1.0,Very Good,H,VS2,63.3,53.0,0.0,0.0,0.0
6487,0.71,Good,F,SI2,64.1,60.0,0.0,0.0,0.0


In [15]:
ind_nan = X_train[X_train.eq(0).any(axis=1)].index
X_train[X_train.eq(0).any(axis=1)]

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z
29664,1.12,Premium,G,I1,60.4,59.0,6.71,6.67,0.0
1818,2.8,Good,G,SI2,63.8,58.0,8.9,8.85,0.0
33091,1.14,Fair,G,VS1,57.5,67.0,0.0,0.0,0.0
18338,2.25,Premium,I,SI1,61.3,58.0,8.52,8.42,0.0
10297,0.71,Good,F,SI2,64.1,60.0,0.0,0.0,0.0
14879,2.2,Premium,H,SI1,61.2,59.0,8.42,8.37,0.0
29447,1.01,Premium,F,SI2,59.2,58.0,6.5,6.47,0.0
37495,2.18,Premium,H,SI2,59.4,61.0,8.49,8.45,0.0
1528,1.0,Very Good,H,VS2,63.3,53.0,0.0,0.0,0.0
6487,0.71,Good,F,SI2,64.1,60.0,0.0,0.0,0.0


### Заполнение нужным значением среднем или наиболее частым `SimpleImputer`

Используем, чтобы не исказить статистику по данным, но может дать в итоге ошибки в модели. Заполняется единым значением по всем "дыркам".

In [36]:
column_trans = make_column_transformer(
    (SimpleImputer(missing_values=0, strategy='mean'), ['x','y','z'])
     )

X_ct = column_trans.fit_transform(X_train)

df_ct = pd.DataFrame(X_ct, columns = column_trans.get_feature_names_out(), index = X_train.index)
display(df_ct.head())
df_ct.describe()

Unnamed: 0,simpleimputer__x,simpleimputer__y,simpleimputer__z
36492,4.38,4.34,2.73
12932,4.52,4.48,2.74
2764,4.79,4.81,2.94
29533,5.27,5.3,3.24
9026,6.06,6.02,3.59


Unnamed: 0,simpleimputer__x,simpleimputer__y,simpleimputer__z
count,30341.0,30341.0,30341.0
mean,5.737148,5.741603,3.543159
std,1.118054,1.160871,0.692157
min,3.73,3.68,1.41
25%,4.72,4.73,2.92
50%,5.7,5.71,3.53
75%,6.54,6.54,4.04
max,10.14,58.9,8.06


In [37]:
pd.concat([X_train[X_train.eq(0).any(axis=1)][['x','y','z']], df_ct.loc[ind_nan] ], axis=1)

Unnamed: 0,x,y,z,simpleimputer__x,simpleimputer__y,simpleimputer__z
29664,6.71,6.67,0.0,6.71,6.67,3.543159
1818,8.9,8.85,0.0,8.9,8.85,3.543159
33091,0.0,0.0,0.0,5.737148,5.741603,3.543159
18338,8.52,8.42,0.0,8.52,8.42,3.543159
10297,0.0,0.0,0.0,5.737148,5.741603,3.543159
14879,8.42,8.37,0.0,8.42,8.37,3.543159
29447,6.5,6.47,0.0,6.5,6.47,3.543159
37495,8.49,8.45,0.0,8.49,8.45,3.543159
1528,0.0,0.0,0.0,5.737148,5.741603,3.543159
6487,0.0,0.0,0.0,5.737148,5.741603,3.543159


### Заполнение итеративным методом

Заполняем пропуски моделируя функцию с отсутствующими значениями как функцию других функций и использует эту оценку для заполнения. Алгоритм делает это в итерированном круговом режиме: на каждом шаге столбец признаков обозначается как вывод y, а другие столбцы признаков рассматриваются как входы X. Регрессор помещается на (X, y) для известных y. Затем регрессор используется для прогнозирования недостающих значений y. Это делается для каждой функции итеративно, а затем повторяется для раундов max_iter.

Могут быть разные способы заполнения. По умолчанию байесовская регрессия. [Возможен любой алгоритм.](https://scikit-learn.org/stable/modules/generated/sklearn.impute.IterativeImputer.html#sklearn.impute.IterativeImputer)

In [38]:
column_trans = make_column_transformer(
    (IterativeImputer(missing_values=0, max_iter=20, random_state=0), ['carat','x','y','z']) 
     )

X_ct = column_trans.fit_transform(X_train)

df_it = pd.DataFrame(X_ct, columns = column_trans.get_feature_names_out(), index = X_train.index)
display(df_it.head())
df_it.describe()

Unnamed: 0,iterativeimputer__carat,iterativeimputer__x,iterativeimputer__y,iterativeimputer__z
36492,0.31,4.38,4.34,2.73
12932,0.33,4.52,4.48,2.74
2764,0.42,4.79,4.81,2.94
29533,0.55,5.27,5.3,3.24
9026,0.8,6.06,6.02,3.59


Unnamed: 0,iterativeimputer__carat,iterativeimputer__x,iterativeimputer__y,iterativeimputer__z
count,30341.0,30341.0,30341.0,30341.0
mean,0.799989,5.737315,5.741768,3.543622
std,0.473306,1.118241,1.161049,0.692651
min,0.2,3.73,3.68,1.41
25%,0.4,4.72,4.73,2.92
50%,0.7,5.7,5.71,3.53
75%,1.04,6.54,6.54,4.04
max,4.01,10.14,58.9,8.06


In [39]:
pd.concat([X_train[X_train.eq(0).any(axis=1)][['x','y','z']], df_ct.loc[ind_nan], df_it.loc[ind_nan] ], axis=1)

Unnamed: 0,x,y,z,simpleimputer__x,simpleimputer__y,simpleimputer__z,iterativeimputer__carat,iterativeimputer__x,iterativeimputer__y,iterativeimputer__z
29664,6.71,6.67,0.0,6.71,6.67,3.543159,1.12,6.71,6.67,4.116679
1818,8.9,8.85,0.0,8.9,8.85,3.543159,2.8,8.9,8.85,5.658253
33091,0.0,0.0,0.0,5.737148,5.741603,3.543159,1.14,6.514296,6.513396,4.02497
18338,8.52,8.42,0.0,8.52,8.42,3.543159,2.25,8.52,8.42,5.32122
10297,0.0,0.0,0.0,5.737148,5.741603,3.543159,0.71,5.531675,5.537545,3.416225
14879,8.42,8.37,0.0,8.42,8.37,3.543159,2.2,8.42,8.37,5.260967
29447,6.5,6.47,0.0,6.5,6.47,3.543159,1.01,6.5,6.47,3.982525
37495,8.49,8.45,0.0,8.49,8.45,3.543159,2.18,8.49,8.45,5.292087
1528,0.0,0.0,0.0,5.737148,5.741603,3.543159,1.0,6.194373,6.195677,3.826774
6487,0.0,0.0,0.0,5.737148,5.741603,3.543159,0.71,5.531675,5.537545,3.416225


### Заполнение методом ближайших соседей "дырок" в данных

Специализированное решение ближайшими соседями.

In [20]:
column_trans = make_column_transformer(
    (KNNImputer(missing_values=0, n_neighbors=3, weights="uniform"), ['carat','x','y','z']) 
     )

X_ct = column_trans.fit_transform(X_train)

df_knn = pd.DataFrame(X_ct, columns = column_trans.get_feature_names_out(), index = X_train.index)
display(df_it.head())
df_knn.describe()

Unnamed: 0,iterativeimputer__carat,iterativeimputer__x,iterativeimputer__y,iterativeimputer__z
36492,0.31,4.38,4.34,2.73
12932,0.33,4.52,4.48,2.74
2764,0.42,4.79,4.81,2.94
29533,0.55,5.27,5.3,3.24
9026,0.8,6.06,6.02,3.59


Unnamed: 0,knnimputer__carat,knnimputer__x,knnimputer__y,knnimputer__z
count,30341.0,30341.0,30341.0,30341.0
mean,0.799989,5.737328,5.741781,3.543607
std,0.473306,1.118201,1.161006,0.69258
min,0.2,3.73,3.68,1.41
25%,0.4,4.72,4.73,2.92
50%,0.7,5.7,5.71,3.53
75%,1.04,6.54,6.54,4.04
max,4.01,10.14,58.9,8.06


In [21]:
pd.concat([X_train[X_train.eq(0).any(axis=1)][['x','y','z']], df_it.loc[ind_nan], df_knn.loc[ind_nan] ], axis=1)

Unnamed: 0,x,y,z,iterativeimputer__carat,iterativeimputer__x,iterativeimputer__y,iterativeimputer__z,knnimputer__carat,knnimputer__x,knnimputer__y,knnimputer__z
29664,6.71,6.67,0.0,1.12,6.71,6.67,4.116679,1.12,6.71,6.67,4.106667
1818,8.9,8.85,0.0,2.8,8.9,8.85,5.658253,2.8,8.9,8.85,5.576667
33091,0.0,0.0,0.0,1.14,6.514296,6.513396,4.02497,1.14,6.733333,6.743333,4.123333
18338,8.52,8.42,0.0,2.25,8.52,8.42,5.32122,2.25,8.52,8.42,5.203333
10297,0.0,0.0,0.0,0.71,5.531675,5.537545,3.416225,0.71,5.733333,5.756667,3.536667
14879,8.42,8.37,0.0,2.2,8.42,8.37,5.260967,2.2,8.42,8.37,5.146667
29447,6.5,6.47,0.0,1.01,6.5,6.47,3.982525,1.01,6.5,6.47,3.953333
37495,8.49,8.45,0.0,2.18,8.49,8.45,5.292087,2.18,8.49,8.45,5.043333
1528,0.0,0.0,0.0,1.0,6.194373,6.195677,3.826774,1.0,6.443333,6.436667,3.916667
6487,0.0,0.0,0.0,0.71,5.531675,5.537545,3.416225,0.71,5.733333,5.756667,3.536667
