In [2]:
import pandas as pd
import numpy as np
%pip install openpyxl -q

Note: you may need to restart the kernel to use updated packages.


In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 1000)

In [4]:
df = pd.read_csv('data/Train_rev1.csv')

In [5]:
df.shape

(244768, 12)

In [6]:
df.head(3)

Unnamed: 0,Id,Title,FullDescription,LocationRaw,LocationNormalized,ContractType,ContractTime,Company,Category,SalaryRaw,SalaryNormalized,SourceName
0,12612628,Engineering Systems Analyst,Engineering Systems Analyst Dorking Surrey Sal...,"Dorking, Surrey, Surrey",Dorking,,permanent,Gregory Martin International,Engineering Jobs,20000 - 30000/annum 20-30K,25000,cv-library.co.uk
1,12612830,Stress Engineer Glasgow,Stress Engineer Glasgow Salary **** to **** We...,"Glasgow, Scotland, Scotland",Glasgow,,permanent,Gregory Martin International,Engineering Jobs,25000 - 35000/annum 25-35K,30000,cv-library.co.uk
2,12612844,Modelling and simulation analyst,Mathematical Modeller / Simulation Analyst / O...,"Hampshire, South East, South East",Hampshire,,permanent,Gregory Martin International,Engineering Jobs,20000 - 40000/annum 20-40K,30000,cv-library.co.uk


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244768 entries, 0 to 244767
Data columns (total 12 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   Id                  244768 non-null  int64 
 1   Title               244767 non-null  object
 2   FullDescription     244768 non-null  object
 3   LocationRaw         244768 non-null  object
 4   LocationNormalized  244768 non-null  object
 5   ContractType        65442 non-null   object
 6   ContractTime        180863 non-null  object
 7   Company             212338 non-null  object
 8   Category            244768 non-null  object
 9   SalaryRaw           244768 non-null  object
 10  SalaryNormalized    244768 non-null  int64 
 11  SourceName          244767 non-null  object
dtypes: int64(2), object(10)
memory usage: 22.4+ MB


In [8]:
df.describe(include='all')

Unnamed: 0,Id,Title,FullDescription,LocationRaw,LocationNormalized,ContractType,ContractTime,Company,Category,SalaryRaw,SalaryNormalized,SourceName
count,244768.0,244767,244768,244768,244768,65442,180863,212338,244768,244768,244768.0,244767
unique,,135435,242138,20986,2732,2,2,20812,29,97286,,167
top,,Business Development Manager,What is expected of you as a Registered Nurse ...,London,UK,full_time,permanent,UKStaffsearch,IT Jobs,"50,000-74,999 yearly",,totaljobs.com
freq,,921,18,15605,41093,57538,151521,4997,38483,1923,,48149
mean,69701420.0,,,,,,,,,,34122.577576,
std,3129813.0,,,,,,,,,,17640.543124,
min,12612630.0,,,,,,,,,,5000.0,
25%,68695500.0,,,,,,,,,,21500.0,
50%,69937000.0,,,,,,,,,,30000.0,
75%,71626060.0,,,,,,,,,,42500.0,


In [9]:
df.shape

(244768, 12)

# 1. Drop columns

In [10]:
df.drop(columns=['Id', 'SalaryRaw', 'LocationRaw'], inplace=True)

# 2. Fill missing values

In [11]:
print('Missing values:')
df.isna().sum()/df.shape[0]*100

Missing values:


Title                  0.000409
FullDescription        0.000000
LocationNormalized     0.000000
ContractType          73.263662
ContractTime          26.108397
Company               13.249281
Category               0.000000
SalaryNormalized       0.000000
SourceName             0.000409
dtype: float64

In [12]:
def fill_missing(df):
    for col in df.columns:
        if df[col].dtype == 'O':  # object/string
            df = df.fillna(df[col].mode()[0])
        else:  # numbers
            df = df.fillna(df[col].mean())
    return df

In [13]:
df = fill_missing(df)

In [14]:
df.isna().sum()/df.shape[0]*100

Title                 0.0
FullDescription       0.0
LocationNormalized    0.0
ContractType          0.0
ContractTime          0.0
Company               0.0
Category              0.0
SalaryNormalized      0.0
SourceName            0.0
dtype: float64

# 3. Duplicates

In [15]:
df.duplicated().sum()

1

# 4. Geostandarization - web scrapping

In [15]:
%pip install requests -q
%pip install beautifulsoup4 -q
import requests
from bs4 import BeautifulSoup as bs
import re
import time
import random
import json

In [None]:
def extract_number_from_text(text):
    match = re.search(r'(\d{1,3}(?:,\d{3})*)', text)
    return int(match.group(1).replace(',', '')) if match else None

In [None]:
def select_population_from_table(table):
    for header_row in table.select('tr:has(th)'):
        th = header_row.select_one('th')
        if 'Population' not in th.get_text():
            continue

        # next <tr> sibling (population data may be here)
        next_row = header_row.find_next_sibling('tr')


        # 1. population in the same row
        td = header_row.select_one('td')
        if td:
            val = extract_number_from_text(td.get_text())
            if val:
                return val
            
        # 2. population in the next row <td>
        if next_row and next_row.select_one('td'):
            val = extract_number_from_text(next_row.select_one('td').get_text())
            if val:
                return val

        # 3. multiple population years (bulleted list)
        if next_row and re.match(r'\s*â€¢\s*\d{4}', next_row.get_text()):
            # select all following <tr> until a break
            for tr in header_row.find_all_next('tr'):
                val = extract_number_from_text(tr.get_text())
                if val:
                    last_val = val
            return last_val
    return None

In [None]:
def get_page(url, headers, retries=3, delay_range=(1, 3)):
    for _ in range(retries):
        try:
            response = requests.get(url, headers=headers, timeout=10)
            if response.status_code == 200:
                return response
            if response.status_code == 404:
                return None
        except requests.exceptions.RequestException:
            pass
        time.sleep(random.uniform(*delay_range))
    return None

In [None]:
def fetch_infobox_table(url, headers, class_name='infobox'):
    page = get_page(url, headers)
    if not page:
        return None
    soup = bs(page.content, 'html.parser')
    return soup.find('table', class_=class_name)

In [None]:
def get_population_for_city(city, headers):
    urls = [
        f'https://en.wikipedia.org/wiki/{city}',
        f'https://en.wikipedia.org/wiki/{city}_(county)'
    ]
    for url in urls:
        table = fetch_infobox_table(url, headers)
        if table:
            pop = select_population_from_table(table)
            if pop:
                return pop
    return None

In [None]:
def get_population_for_location():
    headers = {"User-Agent": "LocationWebScrapper"}

    for city in cities:
        population = population_cache.get(city)
        if population is None:
            population = get_population_for_city(city, headers)
            if population:
                population_cache[city] = population
            else:
                not_working.add(city)
        print(f"{city}: {population}")

In [None]:
# not_working = set()
# population_cache = {}

In [None]:
cities = df['LocationNormalized'].unique().tolist()

In [None]:
with open('data/population_cache.json', 'r', encoding='utf-8') as f:
    population_cache = json.load(f)

In [None]:
not_working = [city for city in cities if city not in population_cache]

In [None]:
print('not working: ', len(not_working))
print('cache: ', len(population_cache))
print("percent of coveraged population of cities: ", round((len(population_cache) / len(cities) * 100), 2), '%')

In [None]:
not_working[:10]

In [None]:
get_population_for_location()

In [408]:
with open('data/population_cache.json', 'w', encoding='utf-8') as f:
    json.dump(population_cache, f, ensure_ascii=False, indent=4)

# 4. Geostandarization

## 4.1. Get population data from geonames dataset

In [15]:
# selecting data only for GB - turn on once (long)
# cols = [
#     'geonameid','name','asciiname','alternatenames','lat','lon',
#     'feature_class','feature_code','country_code','cc2','admin1',
#     'admin2','admin3','admin4','population','elevation','dem','tz','moddate'
# ]

# geonames = pd.read_csv(
#     "allCountries.txt",
#     sep="\t",
#     names=cols,
#     usecols=['asciiname', 'alternatenames', 'country_code', 'feature_code', 'feature_class', 'admin1', 'admin2', 'admin3', 'lon', 'lat', 'population'],
#     dtype=str,
#     header=None
# )

# geonames_gb = geonames[geonames['country_code'] == 'GB'].copy().reset_index(drop=True)
# geonames_gb = geonames_gb[geonames_gb['feature_class'].isin(['P', 'A'])].reset_index()
# geonames_gb.loc[geonames_gb['feature_code'] == 'PCLI', 'asciiname'] = 'UK'
# geonames_gb.to_csv('geonames_gb.csv')

In [34]:
geonames_gb = pd.read_csv('geo_datasets/geonames_gb.csv')
geonames_gb.rename(columns={'asciiname': 'name'}, inplace=True)

## 4.2. Get population for all locations where it is directly possible

In [35]:
# get population for locations
pop_dict = geonames_gb['population'].copy()
pop_dict = geonames_gb.set_index(geonames_gb['name'].str.lower().str.strip())['population'].to_dict()

df['LocationPopulation'] = df['LocationNormalized'].str.lower().str.strip().map(lambda x: pop_dict.get(x))

In [36]:
def print_missing_info():
    print(f"Missing data in population of location: {round(df[df['LocationPopulation'].isna()]['LocationNormalized'].count() / len(df) * 100, 2)}%, {df[df['LocationPopulation'].isna()]['LocationNormalized'].count()} cases")
    print()
    print(df[df['LocationPopulation'].isna()]['LocationNormalized'].value_counts()[:5])

In [37]:
print_missing_info()

Missing data in population of location: 12.44%, 30460 cases

LocationNormalized
South East London    11713
Central London        2607
West Midlands         2540
Berkshire             1502
West Yorkshire        1072
Name: count, dtype: int64


## 4.3. Remove directions and assign population to other fitting names

In [38]:
directions = ['North', 'South', 'East', 'West', 'Central']
df['LocationNormalized'] = df['LocationNormalized'].replace(
    directions, '', regex=True
).str.strip()

missing_mask = df['LocationPopulation'].isna()
missing_locations = df.loc[missing_mask, 'LocationNormalized'].str.lower().str.strip()

pop_dict_missing = {loc: pop_dict.get(loc, np.nan) for loc in missing_locations}

df.loc[missing_mask, 'LocationPopulation'] = missing_locations.map(pop_dict_missing)

In [39]:
print_missing_info()

Missing data in population of location: 4.58%, 11216 cases

LocationNormalized
Midlands                    3456
Berkshire                   1502
Cheshire                     871
Yorkshire and Humberside     683
Bedfordshire                 544
Name: count, dtype: int64


## 4.4. Find population for Midlands in NUT regions

In [40]:
# remove locations out of GB
uk_lat_mask = (geonames_gb['lat'] >= 49) & (geonames_gb['lat'] <= 61)
uk_lon_mask = (geonames_gb['lon'] >= -10) & (geonames_gb['lon'] <= 2)
geonames_gb = geonames_gb[(geonames_gb['country_code'] == 'GB') & (uk_lat_mask) & (uk_lon_mask)]

In [41]:
nuts = pd.read_excel("geo_datasets/NUTS.xlsx")
nuts['NUTS118NM'] = nuts['NUTS118NM'].str.replace('(England)', '', regex=False).str.strip()
nuts = nuts.rename(columns={'NUTS118NM': 'name', 'LONG': 'lon', 'LAT': 'lat'})

In [42]:
# find the closest point in geonames in nuts
from scipy.spatial import cKDTree
tree = cKDTree(geonames_gb[['lat', 'lon']].values)
nuts_coords = nuts[['lat', 'lon']].values

distances, indices = tree.query(nuts_coords, k=1)  # k=1 -> 1 neighbour

nuts['population'] = geonames_gb.iloc[indices]['population'].values
nuts_population = dict(zip(nuts['name'], nuts['population']))

In [43]:
from typing import Counter

# combine West and East Midlands
nuts_population = {**{k: v for k, v in nuts_population.items() if 'Midlands' not in k},
                 **{'Midlands': sum(v for k, v in nuts_population.items() if 'Midlands' in k)}}

In [44]:
nuts_population

{'North East': 1126,
 'North West': 0,
 'Yorkshire and The Humber': 0,
 'East of England': 686,
 'London': 10750,
 'South East': 0,
 'South West': 0,
 'Wales': 0,
 'Scotland': 8830,
 'Northern Ireland': 0,
 'Midlands': 50878}

In [45]:
# impute nuts locations
population_from_dict = df['LocationNormalized'].map(nuts_population)

mask = ((df['LocationPopulation'].isnull()) | (df['LocationPopulation'] == 0)) & population_from_dict.notnull()

df.loc[mask, 'LocationPopulation'] = population_from_dict[mask]

In [46]:
print_missing_info()

Missing data in population of location: 3.17%, 7760 cases

LocationNormalized
Berkshire                   1502
Cheshire                     871
Yorkshire and Humberside     683
Bedfordshire                 544
Edinburgh Technopole         408
Name: count, dtype: int64


## 4.5. Cast rest of cases as 'UK'

In [47]:
mask = df['LocationPopulation'].isna() | (df['LocationPopulation'] == 0)
uk_pop = df.loc[df['LocationNormalized'].str.lower().eq('uk'), 'LocationPopulation'].dropna().iloc[0] if any(df['LocationNormalized'].str.lower().eq('uk')) else np.nan
df.loc[mask, ['LocationNormalized', 'LocationPopulation']] = ['UK', uk_pop]

In [48]:
print_missing_info()

Missing data in population of location: 0.0%, 0 cases

Series([], Name: count, dtype: int64)


In [49]:
df['LocationPopulation'].value_counts().head()

LocationPopulation
66488991.0    109313
8961989.0      45511
541263.0        3516
50878.0         3456
1157603.0       3061
Name: count, dtype: int64

In [50]:
df.drop(columns=['LocationNormalized'], inplace=True)

# Word2Vec

In [52]:
import nltk
nltk.download('punkt_tab')
from nltk.tokenize import word_tokenize
%pip install gensim -q
from gensim.models import Word2Vec
import multiprocessing

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/ec2-user/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Note: you may need to restart the kernel to use updated packages.


In [55]:
# tokenization
def tokenize_text(text):
    return word_tokenize(text.lower())

In [None]:
titles = df['Title'].apply(tokenize_text).tolist()
descriptions = df['FullDescription'].apply(tokenize_text).tolist()

In [None]:
all_sentences = titles + descriptions

In [None]:
vector_size = 50  # output dim
window = 5        # context window
min_count = 5     # filter rare words
workers = multiprocessing.cpu_count() - 1

w2v_model = Word2Vec(
    sentences=all_sentences,
    vector_size=vector_size,
    window=window,
    min_count=min_count,
    workers=workers,
    sg=0
)

In [None]:
# average vectors
def document_vector(word_list, model, vector_size):
    # Initialize a zero vector
    vector = np.zeros(vector_size)
    count = 0
    
    # Sum the vectors of all words in the text
    for word in word_list:
        if word in model.wv:
            vector += model.wv[word]
            count += 1
            
    # Return the average vector
    if count != 0:
        return vector / count
    else:
        # Return the zero vector if no words were found in the vocabulary
        return vector

### Prepare ngrams

In [101]:
titles = df['Title'].apply(lambda x: document_vector(tokenize_text(x), w2v_model, vector_size))
descriptions = df['FullDescription'].apply(lambda x: document_vector(tokenize_text(x), w2v_model, vector_size))

In [102]:
title_df = pd.DataFrame(titles.tolist(), index=df.index).add_prefix('Title_vec_')
desc_df = pd.DataFrame(descriptions.tolist(), index=df.index).add_prefix('FullDescription_vec_')

In [103]:
texts = pd.concat([title_df, desc_df], axis=1)

# Transformer

In [16]:
%pip install transformers -q
from transformers import RobertaTokenizer, RobertaModel
from torch.utils.data import Dataset, DataLoader
import torch
from tqdm import tqdm

Note: you may need to restart the kernel to use updated packages.


  import pynvml  # type: ignore[import]


In [18]:
class RobertaFeatureDataset(Dataset):
    def __init__(self, texts):
        self.texts = texts

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=MAX_LEN,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }

In [17]:
MAX_LEN = 128
model_name = 'roberta-base'
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaModel.from_pretrained(model_name)
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda


In [61]:
# Prepare data

In [35]:
texts_concat = df['Title'] + ' [SEP] ' + df['FullDescription']
texts_list = texts_concat.tolist()

In [37]:
dataset = RobertaFeatureDataset(texts_list)
data_loader = DataLoader(dataset, batch_size=16, shuffle=False)

In [29]:
def extract_cls_vectors(model, data_loader, device):
    model.eval()
    all_cls_vectors = []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Extraction [CLS] RoBERTa"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            cls_vector = outputs.last_hidden_state[:, 0, :]
            
            all_cls_vectors.append(cls_vector.cpu().numpy())

    final_vector_array = np.concatenate(all_cls_vectors, axis=0)
    return final_vector_array

In [40]:
final_vector_array = extract_cls_vectors(model, data_loader, device)

texts_w2v_roberta = pd.DataFrame(
    final_vector_array,
    index=df.index
)

texts_w2v_roberta.columns = [f'cls_{i}' for i in range(final_vector_array.shape[1])]

In [41]:
texts_w2v_roberta.head()

Unnamed: 0,cls_0,cls_1,cls_2,cls_3,cls_4,cls_5,cls_6,cls_7,cls_8,cls_9,cls_10,cls_11,cls_12,cls_13,cls_14,cls_15,cls_16,cls_17,cls_18,cls_19,cls_20,cls_21,cls_22,cls_23,cls_24,cls_25,cls_26,cls_27,cls_28,cls_29,cls_30,cls_31,cls_32,cls_33,cls_34,cls_35,cls_36,cls_37,cls_38,cls_39,cls_40,cls_41,cls_42,cls_43,cls_44,cls_45,cls_46,cls_47,cls_48,cls_49,cls_50,cls_51,cls_52,cls_53,cls_54,cls_55,cls_56,cls_57,cls_58,cls_59,cls_60,cls_61,cls_62,cls_63,cls_64,cls_65,cls_66,cls_67,cls_68,cls_69,cls_70,cls_71,cls_72,cls_73,cls_74,cls_75,cls_76,cls_77,cls_78,cls_79,cls_80,cls_81,cls_82,cls_83,cls_84,cls_85,cls_86,cls_87,cls_88,cls_89,cls_90,cls_91,cls_92,cls_93,cls_94,cls_95,cls_96,cls_97,cls_98,cls_99,cls_100,cls_101,cls_102,cls_103,cls_104,cls_105,cls_106,cls_107,cls_108,cls_109,cls_110,cls_111,cls_112,cls_113,cls_114,cls_115,cls_116,cls_117,cls_118,cls_119,cls_120,cls_121,cls_122,cls_123,cls_124,cls_125,cls_126,cls_127,cls_128,cls_129,cls_130,cls_131,cls_132,cls_133,cls_134,cls_135,cls_136,cls_137,cls_138,cls_139,cls_140,cls_141,cls_142,cls_143,cls_144,cls_145,cls_146,cls_147,cls_148,cls_149,cls_150,cls_151,cls_152,cls_153,cls_154,cls_155,cls_156,cls_157,cls_158,cls_159,cls_160,cls_161,cls_162,cls_163,cls_164,cls_165,cls_166,cls_167,cls_168,cls_169,cls_170,cls_171,cls_172,cls_173,cls_174,cls_175,cls_176,cls_177,cls_178,cls_179,cls_180,cls_181,cls_182,cls_183,cls_184,cls_185,cls_186,cls_187,cls_188,cls_189,cls_190,cls_191,cls_192,cls_193,cls_194,cls_195,cls_196,cls_197,cls_198,cls_199,cls_200,cls_201,cls_202,cls_203,cls_204,cls_205,cls_206,cls_207,cls_208,cls_209,cls_210,cls_211,cls_212,cls_213,cls_214,cls_215,cls_216,cls_217,cls_218,cls_219,cls_220,cls_221,cls_222,cls_223,cls_224,cls_225,cls_226,cls_227,cls_228,cls_229,cls_230,cls_231,cls_232,cls_233,cls_234,cls_235,cls_236,cls_237,cls_238,cls_239,cls_240,cls_241,cls_242,cls_243,cls_244,cls_245,cls_246,cls_247,cls_248,cls_249,cls_250,cls_251,cls_252,cls_253,cls_254,cls_255,cls_256,cls_257,cls_258,cls_259,cls_260,cls_261,cls_262,cls_263,cls_264,cls_265,cls_266,cls_267,cls_268,cls_269,cls_270,cls_271,cls_272,cls_273,cls_274,cls_275,cls_276,cls_277,cls_278,cls_279,cls_280,cls_281,cls_282,cls_283,cls_284,cls_285,cls_286,cls_287,cls_288,cls_289,cls_290,cls_291,cls_292,cls_293,cls_294,cls_295,cls_296,cls_297,cls_298,cls_299,cls_300,cls_301,cls_302,cls_303,cls_304,cls_305,cls_306,cls_307,cls_308,cls_309,cls_310,cls_311,cls_312,cls_313,cls_314,cls_315,cls_316,cls_317,cls_318,cls_319,cls_320,cls_321,cls_322,cls_323,cls_324,cls_325,cls_326,cls_327,cls_328,cls_329,cls_330,cls_331,cls_332,cls_333,cls_334,cls_335,cls_336,cls_337,cls_338,cls_339,cls_340,cls_341,cls_342,cls_343,cls_344,cls_345,cls_346,cls_347,cls_348,cls_349,cls_350,cls_351,cls_352,cls_353,cls_354,cls_355,cls_356,cls_357,cls_358,cls_359,cls_360,cls_361,cls_362,cls_363,cls_364,cls_365,cls_366,cls_367,cls_368,cls_369,cls_370,cls_371,cls_372,cls_373,cls_374,cls_375,cls_376,cls_377,cls_378,cls_379,cls_380,cls_381,cls_382,cls_383,cls_384,cls_385,cls_386,cls_387,cls_388,cls_389,cls_390,cls_391,cls_392,cls_393,cls_394,cls_395,cls_396,cls_397,cls_398,cls_399,cls_400,cls_401,cls_402,cls_403,cls_404,cls_405,cls_406,cls_407,cls_408,cls_409,cls_410,cls_411,cls_412,cls_413,cls_414,cls_415,cls_416,cls_417,cls_418,cls_419,cls_420,cls_421,cls_422,cls_423,cls_424,cls_425,cls_426,cls_427,cls_428,cls_429,cls_430,cls_431,cls_432,cls_433,cls_434,cls_435,cls_436,cls_437,cls_438,cls_439,cls_440,cls_441,cls_442,cls_443,cls_444,cls_445,cls_446,cls_447,cls_448,cls_449,cls_450,cls_451,cls_452,cls_453,cls_454,cls_455,cls_456,cls_457,cls_458,cls_459,cls_460,cls_461,cls_462,cls_463,cls_464,cls_465,cls_466,cls_467,cls_468,cls_469,cls_470,cls_471,cls_472,cls_473,cls_474,cls_475,cls_476,cls_477,cls_478,cls_479,cls_480,cls_481,cls_482,cls_483,cls_484,cls_485,cls_486,cls_487,cls_488,cls_489,cls_490,cls_491,cls_492,cls_493,cls_494,cls_495,cls_496,cls_497,cls_498,cls_499,cls_500,cls_501,cls_502,cls_503,cls_504,cls_505,cls_506,cls_507,cls_508,cls_509,cls_510,cls_511,cls_512,cls_513,cls_514,cls_515,cls_516,cls_517,cls_518,cls_519,cls_520,cls_521,cls_522,cls_523,cls_524,cls_525,cls_526,cls_527,cls_528,cls_529,cls_530,cls_531,cls_532,cls_533,cls_534,cls_535,cls_536,cls_537,cls_538,cls_539,cls_540,cls_541,cls_542,cls_543,cls_544,cls_545,cls_546,cls_547,cls_548,cls_549,cls_550,cls_551,cls_552,cls_553,cls_554,cls_555,cls_556,cls_557,cls_558,cls_559,cls_560,cls_561,cls_562,cls_563,cls_564,cls_565,cls_566,cls_567,cls_568,cls_569,cls_570,cls_571,cls_572,cls_573,cls_574,cls_575,cls_576,cls_577,cls_578,cls_579,cls_580,cls_581,cls_582,cls_583,cls_584,cls_585,cls_586,cls_587,cls_588,cls_589,cls_590,cls_591,cls_592,cls_593,cls_594,cls_595,cls_596,cls_597,cls_598,cls_599,cls_600,cls_601,cls_602,cls_603,cls_604,cls_605,cls_606,cls_607,cls_608,cls_609,cls_610,cls_611,cls_612,cls_613,cls_614,cls_615,cls_616,cls_617,cls_618,cls_619,cls_620,cls_621,cls_622,cls_623,cls_624,cls_625,cls_626,cls_627,cls_628,cls_629,cls_630,cls_631,cls_632,cls_633,cls_634,cls_635,cls_636,cls_637,cls_638,cls_639,cls_640,cls_641,cls_642,cls_643,cls_644,cls_645,cls_646,cls_647,cls_648,cls_649,cls_650,cls_651,cls_652,cls_653,cls_654,cls_655,cls_656,cls_657,cls_658,cls_659,cls_660,cls_661,cls_662,cls_663,cls_664,cls_665,cls_666,cls_667,cls_668,cls_669,cls_670,cls_671,cls_672,cls_673,cls_674,cls_675,cls_676,cls_677,cls_678,cls_679,cls_680,cls_681,cls_682,cls_683,cls_684,cls_685,cls_686,cls_687,cls_688,cls_689,cls_690,cls_691,cls_692,cls_693,cls_694,cls_695,cls_696,cls_697,cls_698,cls_699,cls_700,cls_701,cls_702,cls_703,cls_704,cls_705,cls_706,cls_707,cls_708,cls_709,cls_710,cls_711,cls_712,cls_713,cls_714,cls_715,cls_716,cls_717,cls_718,cls_719,cls_720,cls_721,cls_722,cls_723,cls_724,cls_725,cls_726,cls_727,cls_728,cls_729,cls_730,cls_731,cls_732,cls_733,cls_734,cls_735,cls_736,cls_737,cls_738,cls_739,cls_740,cls_741,cls_742,cls_743,cls_744,cls_745,cls_746,cls_747,cls_748,cls_749,cls_750,cls_751,cls_752,cls_753,cls_754,cls_755,cls_756,cls_757,cls_758,cls_759,cls_760,cls_761,cls_762,cls_763,cls_764,cls_765,cls_766,cls_767
0,-0.025227,0.089374,-0.017654,-0.074229,0.035923,-0.110014,-0.008648,0.010993,0.108525,-0.068382,-0.034167,0.001859,0.025804,-0.002787,0.045874,-0.010902,-0.138545,-0.005408,-0.016467,-0.009722,-0.095642,0.121593,-0.051694,0.110705,0.005038,0.034742,0.085676,0.070858,-0.019885,0.03051,-0.027813,-0.048084,0.0089,0.023571,0.050863,0.073222,0.040722,-0.000679,-0.153464,-0.019375,0.043268,0.08819,-0.009965,-0.029454,0.075491,0.002996,0.045457,0.050452,0.013915,0.052201,0.018797,0.057792,-0.036378,-0.028594,-0.114506,0.000288,-0.008318,0.091715,0.091042,-0.052939,-0.013691,-0.076664,-0.108126,-0.038573,0.021306,-0.020364,-0.074063,0.028171,0.034138,0.097896,0.041039,-0.038515,0.056036,-0.072036,0.023863,0.00301,-0.041538,0.387489,-0.008138,0.008306,0.046395,-0.011421,0.266517,0.052176,0.005206,0.009897,0.102714,0.075689,0.06122,0.028401,0.010769,0.048406,-0.052249,-0.004582,0.016794,0.069303,0.001285,-0.154992,-0.049535,-0.040741,-0.077791,-0.049087,0.115729,0.06962,-0.007619,0.009798,0.037998,0.01182,-0.016181,-0.016864,0.015143,0.045185,0.035561,0.060059,-0.024479,-0.047473,-0.011615,0.019336,0.015815,0.031917,0.053663,0.065616,0.035626,-0.00173,-0.007094,0.032394,-0.025264,-0.010011,-0.006135,0.033582,-0.001674,-0.120072,-0.014519,0.065525,0.049748,0.023529,0.051231,-0.006007,0.105802,-0.001403,-0.072467,0.061935,0.05741,0.054569,0.104478,0.007013,0.029884,-0.038308,-0.017968,-0.046359,0.066908,-0.023318,-0.081126,-0.002564,0.02755,0.454982,0.081075,0.077127,0.011532,-0.038571,0.142152,-0.001654,0.049297,0.036343,-0.021295,-0.004996,0.021173,0.015314,0.044656,0.001654,0.010571,0.056715,0.007664,-0.06153,-0.06789,-0.038485,0.025016,0.004578,-0.128328,-0.027897,0.024117,0.047759,-0.121895,0.043606,-0.02946,0.000424,-0.03152,0.074384,0.017808,0.063811,0.027524,-0.04531,0.033802,-0.041912,-0.061039,0.104815,-0.059834,-0.014221,0.01004,-0.068848,0.083274,-0.12169,0.057742,-0.080722,0.028575,0.014036,0.073817,0.055896,0.029494,-0.008131,-0.052219,0.027136,0.029823,0.022737,-0.008882,0.014075,0.058361,0.14391,-0.024778,0.000354,0.048002,0.037578,0.063824,0.114777,0.005497,-0.030499,0.000825,0.006073,0.004842,-0.016398,-0.034422,0.034704,0.023143,-0.030926,0.053416,-0.100248,0.00373,-0.055668,-0.03412,0.073494,-0.066913,0.014724,0.050753,0.003186,0.032689,0.0416,0.023801,0.054773,-0.027178,0.004074,-0.025599,0.024525,0.000151,-0.030636,0.050233,-0.002213,-0.116578,0.025599,0.004885,-0.007851,-0.003777,-0.052663,0.051049,-0.000589,-0.080583,-0.020694,-0.017493,0.001218,-0.010087,-0.073003,0.029416,-0.035919,-0.040371,0.002134,0.054969,0.058971,-0.08377,0.018184,-0.031308,-0.015397,0.050211,-0.024292,-0.105376,-0.006155,0.035969,0.045496,-0.078739,-0.037317,0.064245,0.072594,0.031076,0.0248,0.000125,0.04497,-0.028574,-0.004331,0.061772,-0.013914,0.019132,0.016638,-0.040941,-0.096274,-0.124415,-0.057842,-0.012569,-0.028768,0.01899,-0.020802,0.03685,-0.049115,-0.037673,-0.078251,-0.098763,0.129155,0.050707,-0.008628,0.067625,-0.01272,-0.006217,0.053142,0.025298,0.001926,0.040033,-0.014458,-0.031976,0.033361,0.067558,-0.075817,0.061323,0.385527,-0.265688,-0.017857,0.016015,0.023237,0.082979,-0.048385,0.017628,0.081558,0.074999,0.038566,0.030921,0.039961,0.004212,0.046383,0.016887,0.046544,-0.095795,-0.041533,-0.023771,-0.008317,0.027105,-0.001385,0.026304,-0.016168,-0.021166,0.024768,0.032877,-0.023965,-0.007227,-0.135063,0.043066,-0.001109,0.058172,0.001705,0.055339,-0.102451,-0.026235,0.04681,-0.036197,0.00405,0.029707,-0.004642,-0.010207,-0.065268,0.057712,0.026534,0.045407,0.020787,-0.01127,0.143131,0.057412,-0.00627,-0.068536,-0.01514,0.088655,0.024941,0.013901,0.007057,0.085269,0.017301,0.002002,-0.107508,0.046322,-0.05446,0.097172,0.002054,-0.054195,-0.223502,-0.024228,-0.048292,0.003534,0.083741,0.027997,0.045087,0.044114,-0.019896,0.022481,-0.06727,0.00928,0.020967,-0.020543,-0.006161,0.015338,-0.065056,0.020765,-0.038144,0.061336,-0.013518,-0.010681,0.025754,-0.033055,0.054616,0.013598,0.026746,-0.062952,-0.012551,0.004873,-0.034407,0.001425,0.033217,-0.057938,0.097536,-0.022811,0.011013,0.002262,-0.038803,-0.047381,-0.042548,-0.091219,-0.049695,0.027117,-0.001039,0.051176,0.005167,-0.068622,0.015812,0.124068,-0.035702,-0.123479,0.037397,-0.049662,0.059609,-0.028632,-0.547399,0.077551,-0.007834,0.050619,0.004734,-0.056816,0.009549,0.045187,0.081942,0.039422,-0.02752,-0.025186,-0.045455,-0.05827,0.03793,-0.021474,-0.025627,0.018569,-0.033217,-0.040835,-0.037279,0.012021,-0.02769,-0.035306,0.067103,-0.020207,-0.133884,-0.017259,0.068955,0.023941,-0.003948,-0.077292,0.030352,0.010288,0.036225,0.058022,-0.024527,0.00675,-0.037665,0.087685,0.010694,0.2258,-0.035274,0.114156,-0.006046,0.039336,-0.017046,-0.000202,-0.043416,0.017391,0.055046,-0.001011,-0.003817,-0.047646,-0.002392,-0.00023,0.047268,-0.017178,-0.003207,0.169201,-0.031857,0.06401,0.021391,-0.004338,0.011547,-0.040118,0.007113,-0.054272,0.032656,-0.022245,-0.008156,-0.036529,-0.037108,0.039737,-0.020015,0.094111,-0.001798,0.051253,-0.047023,0.071571,0.009662,0.041123,0.008585,0.039092,0.01216,-0.060505,-0.070711,-0.056528,0.010004,0.076945,-0.016994,0.064642,0.026786,-0.012122,-0.028789,0.052709,0.086265,0.037344,-0.52114,-0.084283,0.003818,0.041634,-0.019646,0.049828,0.012647,-0.018753,0.081546,-0.088887,0.015897,0.0266,0.059318,-0.012582,-0.015133,0.069035,-0.068579,-0.036986,0.024591,-0.214671,0.041821,-0.050504,0.107901,0.038421,0.009999,0.064715,-0.081184,0.007188,0.08218,0.003415,0.064568,0.112503,-0.033439,0.052624,0.069738,0.089423,-0.03269,10.577237,-0.023681,0.049847,-0.004463,-0.03095,-0.082866,0.111711,-0.052038,-0.010502,0.051153,0.017549,0.009657,-0.108268,0.00696,0.023119,0.007978,-0.060916,-0.016663,-0.011124,-0.038459,-0.015018,-0.016743,0.034532,0.00516,-0.0388,-0.051675,-0.02422,-0.036185,-0.008399,0.095126,-0.00447,0.060481,0.061682,-0.026593,0.100776,-0.036915,0.054598,0.092126,0.0361,0.095251,0.023892,-0.021582,-0.010973,0.048801,0.057032,0.029263,-0.044157,0.088792,0.026805,0.108305,0.049159,-0.022378,0.073865,0.026273,-0.018815,-0.034814,0.00683,-0.045542,0.06041,0.065676,-0.052326,0.144449,-0.041601,0.023386,-0.030658,0.095941,0.084395,0.059177,-0.096736,0.036151,0.023175,-0.048008,-0.076172,0.053412,-0.013922,-0.058183,0.071537,0.00535,-0.000416,-0.020788,0.042767,0.023169,-0.054714,-0.025025,0.045944,0.022953,0.015102,0.116801,-0.037606,-0.034946,-0.073735,-0.021109,0.003942,-0.002287,0.037786,0.032015,-0.025209,-0.082991,0.073292,0.051378,-0.090196,-0.017474,-0.020255,0.042232,0.042557,0.001014,-0.0113,0.018848,0.008752,0.007427,-0.081619,0.017679,0.00779,-0.033339,0.002946,0.026965,-0.00087,0.02352,0.002616,0.081777,-0.050419,-0.003752,-0.012564,0.033048,-0.015276,-0.049403,-0.042214,-0.030263,-0.003009,-0.003657,-0.043214,0.050045,0.059197,-0.01495,0.034246,-0.022027,0.005207,0.121774,0.039346,0.063942,-0.038835,0.00324,0.006319,-0.058754,-0.02792,0.003737,0.085608,-0.010873,0.061509,0.021118,0.010152,0.013877,0.037699,-0.05195,0.028391,-0.03781,0.006047,-0.003208,0.032574,-5.7e-05,0.01818,-0.100841,0.032221,0.083549,-0.02585,-0.037997,-0.017839,0.012114,0.024665,-0.005297,0.03432,0.019185,-0.020766,-0.054815,-0.005392,0.101308,0.148061,-0.117978,-0.032573,0.008857
1,-0.063176,0.075453,-0.010697,-0.11787,0.066936,-0.102683,-0.006008,-0.016275,0.088553,-0.06403,-0.035159,0.007404,0.018063,0.008277,0.084138,-0.043287,-0.098213,0.054073,-0.022515,-0.04496,-0.149918,0.055622,-0.021834,0.12648,0.005014,0.067434,0.039494,0.051424,-0.026299,0.038844,-0.071274,-0.079107,0.048192,0.041213,0.001751,0.067706,0.033898,-0.022792,-0.118735,0.023736,0.11557,0.166871,0.029131,-0.036591,0.047476,-0.022486,0.073266,0.035235,0.012147,0.027684,0.006135,0.080862,-0.054403,0.025771,-0.122113,-0.020264,0.013779,0.081721,0.075541,-0.102141,-0.014168,-0.137133,-0.132978,-0.075779,0.029483,-0.089297,-0.058726,0.061282,0.049305,0.080184,0.062908,-0.028852,-0.001262,-0.011675,0.01065,-0.023869,-0.107243,0.381388,-0.024913,-0.001951,0.061377,-0.052388,0.380718,0.056542,-0.019324,-0.010765,0.087835,0.064489,0.046796,0.010174,0.007384,0.041026,0.031454,-0.054282,0.087644,0.012564,-0.023819,-0.099731,-0.06152,-0.056114,-0.076787,-0.075206,0.041322,0.044577,-0.016999,-0.005779,0.036263,8.1e-05,0.016424,-0.014974,0.039142,0.023706,0.08128,0.053182,-0.009367,-0.066007,-0.027307,0.066689,0.0329,0.044034,0.029869,0.111463,0.031087,-0.055121,-0.021811,0.013502,-0.070978,-0.044401,-0.040663,0.005029,-0.046891,-0.092786,0.008668,0.055159,0.02811,0.050854,0.054119,-0.054942,0.041671,0.005663,-0.056096,0.037189,0.004667,0.025924,0.096103,0.085352,-0.002375,0.010386,-0.026244,-0.01332,0.041177,-0.061258,-0.013051,-0.034689,-0.026737,0.464912,0.155539,0.160295,0.046702,-0.037147,0.156938,0.001471,0.059774,0.025724,-0.028278,0.002775,0.005782,-0.030104,0.024184,-0.016837,0.023826,0.041523,0.034383,-0.038432,-0.073127,-0.014453,0.089052,-0.004673,-0.101352,-0.025636,0.016206,0.048629,-0.116494,0.059472,-0.0253,0.035748,-0.010949,0.082534,0.03601,0.06915,0.062815,-0.023479,0.046476,0.020299,0.017488,0.085086,-0.037004,-0.040823,0.009741,-0.018459,0.032544,-0.052544,0.096717,-0.029701,-0.009968,-0.019664,0.064647,0.077478,0.031592,-0.036031,-0.04302,0.075999,0.018599,0.09161,0.034301,0.02277,0.060392,0.269963,0.04052,0.054293,0.034439,0.028949,-0.014348,0.162363,-0.018266,-0.016698,0.001123,0.020573,0.030711,-0.001255,-0.066725,0.069525,-0.003216,-0.013943,0.066981,-0.105632,-0.045703,-0.035672,-0.074593,0.037027,-0.041384,0.057962,0.054099,0.006428,0.045056,0.072266,0.031389,0.054604,-0.032642,-0.012665,-0.074672,-0.027452,0.012744,-0.021592,0.030061,-0.027851,-0.116427,0.016657,-0.031616,-0.049844,-0.01943,-0.031458,0.008873,-0.014457,-0.068754,0.003275,-0.058913,0.023292,-0.007228,-0.028195,-0.001098,-0.091152,-0.063922,-0.019687,0.053352,0.007113,-0.034444,-0.016286,-0.046007,0.05426,0.053169,-0.077867,-0.02476,-0.062813,0.064886,0.070403,-0.066755,-0.032423,0.044517,0.080984,0.092217,0.016439,-0.022725,0.027102,-0.0084,-0.025966,0.056633,-0.017192,0.002605,0.02015,-0.043376,-0.092536,-0.102143,-0.059847,0.003026,-0.058875,0.028981,0.015851,0.003189,-0.022289,-0.041362,-0.057102,-0.078288,0.143997,0.078025,0.001957,0.079055,-0.043194,0.001996,0.020038,0.010401,0.051026,0.078113,-0.048015,-0.011698,0.038058,0.071064,-0.067915,0.02937,0.353226,-0.259322,-0.01197,0.042285,0.00571,0.068157,0.001247,0.008672,0.09204,0.159792,0.108529,0.011715,-0.005043,-0.004159,0.059389,0.059117,0.031725,-0.053801,-0.051501,0.007108,0.026515,-0.022424,-0.00801,0.023229,-0.036029,-0.033612,0.07375,-0.017868,-0.01926,0.050054,-0.188858,0.049929,-0.02775,0.042952,-0.045601,-0.001514,-0.019558,-0.09456,0.096671,-0.053693,-0.017701,0.056241,-0.070191,-0.045809,-0.051796,0.045818,0.037075,0.031846,0.048356,-0.030836,0.139132,0.008762,0.010686,-0.099648,0.017379,0.065453,0.037102,-0.013428,0.03784,0.042033,0.046187,0.016816,-0.115494,0.023828,-0.086596,0.108367,-0.011255,-0.049483,-0.146976,-0.005159,-0.034591,0.023397,0.029725,-0.00601,0.069806,0.026974,-0.012852,0.104525,-0.02494,-0.014226,-0.013779,-0.005914,0.012676,0.052243,-0.04184,0.033212,-0.056155,0.011734,-0.018051,0.015649,0.008594,-0.053788,0.059505,0.009406,-0.019686,-0.048948,0.001795,-0.010505,-0.058373,-0.038737,0.000326,-0.073347,0.084746,0.024143,-0.016022,-0.017216,-0.039069,-0.030317,-0.013943,-0.094943,-0.118084,0.01099,0.037057,0.042178,0.015279,-0.021069,-0.006101,0.063634,-0.017686,-0.072312,0.05779,-0.08856,0.000665,-0.00754,-0.371514,0.052787,-0.066777,0.036737,0.011837,-0.081943,0.019329,0.01304,0.03241,0.01697,-0.017414,0.029609,-0.019558,-0.082062,0.00927,-0.096707,-0.03689,0.054403,-0.041683,-0.061164,-0.038915,0.069785,-0.022188,0.024773,0.030897,0.030797,-0.134769,-0.056889,0.049417,0.037675,-0.01588,-0.066918,0.029611,-0.022012,0.015287,0.080433,-0.029803,0.024432,0.031639,0.120521,0.024303,0.219206,-0.035911,0.025584,-0.021725,0.072128,0.031615,-0.008873,-0.005328,-0.001121,0.048985,-0.007928,0.066771,-0.063263,-0.022836,0.005731,0.005599,-0.017206,-0.050067,0.215993,0.007285,0.062619,-0.002028,-0.077137,0.000947,-0.031512,0.027566,-0.042611,0.050728,-0.007151,0.003719,-0.065257,-0.05778,0.049325,0.007944,0.087338,-0.029795,0.061014,-0.069031,0.056501,0.048684,0.019394,0.002167,0.076994,0.025557,-0.040978,-0.005128,-0.064996,0.040363,0.08545,-0.029416,0.051933,0.043508,-0.02211,-0.027243,0.041181,0.019272,0.068002,-0.456767,-0.076841,0.051389,0.034805,-0.036892,0.051413,0.013533,0.007713,0.052821,-0.0557,0.103501,0.036521,0.048217,0.026827,-0.074269,0.055898,-0.015296,-0.033676,0.044335,-0.175339,0.027514,-0.028612,0.103798,0.01145,0.045626,0.070098,-0.080457,0.018871,0.062277,-0.025882,0.08969,0.079337,-0.026955,0.036727,0.047528,0.057371,-0.031984,11.060996,-0.00837,0.019968,-0.058024,0.021854,-0.097595,0.090955,-0.132908,-0.096917,0.084798,0.007323,-0.042142,-0.037739,-0.05267,0.018574,0.054683,-0.069365,-0.004848,-0.033416,-0.041732,-0.04233,0.063674,0.03726,0.010511,-0.07336,0.013407,0.000789,-0.005971,0.00871,0.041825,-0.025012,0.056996,0.063508,-0.015664,0.094923,-0.007132,0.074854,0.092016,0.010028,0.074423,-0.017568,0.016345,0.045599,0.064029,0.049557,0.023269,0.017558,0.096813,0.01527,0.091932,0.07198,0.014611,0.096013,0.089493,-0.028921,0.019309,-0.004477,-0.036972,0.072323,0.071288,-0.089869,0.136604,-0.010973,0.031844,-0.00926,0.103849,0.109712,0.0285,-0.101688,0.006617,-0.028717,-0.081711,-0.064349,-0.009888,-0.00652,-0.064537,0.0442,0.006331,-0.031089,0.011504,0.021263,0.020071,-0.033362,-0.044491,0.089358,0.046662,0.039598,0.138026,-0.024336,-0.059226,-0.07688,-0.008507,-0.001861,-0.031247,0.000335,-0.02208,0.031956,-0.093514,0.056155,0.101172,-0.172922,-0.011748,-0.02155,0.063555,0.051407,-0.062928,-0.086376,0.030206,-0.00591,-0.037183,-0.060895,0.02128,0.038026,-0.054064,0.025326,0.044455,0.031455,-0.028433,0.027988,0.094261,-0.096463,0.027929,-0.025668,-0.016395,-0.002658,-0.039736,-0.072672,0.03326,-0.053226,0.004644,-0.04244,0.057841,0.073647,-0.057447,0.079257,-0.063758,-0.030641,0.096224,0.00402,0.022169,0.00275,-0.026422,0.019391,-0.127672,-0.029889,0.015739,0.068128,0.043011,0.094614,-0.016291,-0.052754,-0.004705,0.048839,-0.025778,0.033032,-0.071429,-0.049358,0.030268,0.035104,0.00305,0.037117,-0.065939,0.087241,0.040253,0.039676,-0.018533,0.001448,0.047437,-0.017152,-0.014838,0.00365,0.011132,-0.024333,-0.009834,-0.002337,0.133874,0.105572,-0.078639,-0.015985,-0.048812
2,-0.047614,0.057003,-0.017052,-0.067291,0.103634,-0.060095,-0.021878,0.049101,0.073716,-0.076217,-0.011751,0.034173,0.043086,-0.027212,0.054391,-0.043982,-0.116286,0.016585,-0.043656,-0.029465,-0.121517,0.059754,-0.013255,0.117205,-0.023655,0.035053,0.117312,0.056621,-0.000856,-0.018524,-0.028882,-0.038406,0.03269,0.022077,0.037162,0.070779,0.044492,0.005103,-0.083556,0.023027,0.07973,0.171297,-0.000583,-0.022561,0.007646,-0.004454,0.080037,0.051765,0.00721,0.009197,0.019674,0.077995,-0.029819,0.026712,-0.066813,-0.007347,0.015513,0.084886,0.086914,-0.075765,-0.005824,-0.146461,-0.103224,-0.076264,0.022754,-0.094232,-0.044824,0.056673,0.065125,0.043073,0.064267,0.009294,-0.004766,0.013434,-0.016325,0.008323,-0.032813,0.365322,-0.034955,0.002466,0.041476,-0.032444,0.382086,0.070199,-0.000992,-0.039593,0.118813,0.048676,0.029989,0.030742,0.003879,0.048189,-0.014153,-0.021052,0.0969,-0.00223,0.004377,-0.070914,-0.069038,-0.057627,-0.084342,-0.086578,0.107148,0.072006,-0.012006,0.016951,0.058996,0.018527,-0.001777,-0.023121,-0.008226,0.027658,0.061655,0.083512,-0.023493,-0.028971,-0.015836,0.070493,0.022917,0.054742,0.018212,0.058713,0.040743,-0.052727,-0.033257,0.001596,-0.031401,-0.027496,0.003045,0.032956,-0.021687,-0.113078,-0.026917,0.048673,0.058225,-0.006666,0.017638,-0.02799,0.028223,0.015326,-0.023731,0.059023,0.032092,0.04821,0.093247,0.037424,-0.035765,-0.019233,-0.006627,-0.023629,0.092829,-0.04975,-0.002973,-0.04662,-0.029139,0.491357,0.095391,0.122667,0.051467,-0.063379,0.159104,0.009053,0.066203,0.039161,-0.018834,-0.015887,-0.025553,-0.017886,0.052321,0.012516,0.04163,0.02888,0.013203,-0.040891,-0.073291,-0.03303,0.066693,0.011365,-0.084093,-0.003128,0.024037,0.048035,-0.09002,0.030272,-0.058136,0.02464,-0.015874,0.050537,0.004142,0.020327,0.037937,-0.052923,0.002296,-0.023819,0.02574,0.095936,-0.038907,-0.008074,0.041135,-0.025503,0.036577,-0.102394,0.103297,-0.057603,0.071504,0.015367,0.032173,0.03337,0.048241,-0.03518,0.004648,0.013842,0.038563,0.072102,0.004075,0.018256,0.036026,0.170767,0.003901,0.029491,0.019764,0.025682,-0.028965,0.149692,-0.003894,-0.041486,0.010394,0.034883,-0.005932,-0.025517,-0.079206,0.045447,0.06663,-0.027239,0.051353,-0.109734,-0.049891,-0.035684,0.019119,0.058601,-0.002945,0.030816,0.067728,0.037402,0.03358,0.041718,0.013621,-0.02065,-0.013845,-0.029069,-0.04712,-0.014499,-0.011994,-0.028265,0.056471,-0.000211,-0.110895,0.00214,-0.04252,-0.029566,-0.021827,0.003694,-0.004052,-0.035463,-0.038432,0.009525,-0.050355,0.023621,-0.029048,-0.024881,0.018453,-0.086888,-0.066126,-0.017345,0.046169,0.02312,-0.076881,0.028875,-0.032877,0.007796,0.038332,-0.048329,-0.108363,-0.007118,0.037274,0.024244,-0.072588,0.006993,0.040122,0.044881,0.032976,-0.008415,-0.032828,0.071131,0.039603,-0.015606,0.068865,-0.028534,0.028143,0.010572,-0.098852,-0.07354,-0.088057,-0.036772,-0.047958,-0.033485,-0.019342,0.01568,0.019001,-0.008937,-0.08591,-0.014987,-0.130329,0.111444,0.065891,0.024514,0.036446,-0.00037,0.000278,0.022601,0.023221,0.015347,0.05321,-0.028317,0.036606,0.077154,0.064881,-0.02111,0.049794,0.38598,-0.180067,-0.027368,0.066602,-0.006611,0.063825,-0.005417,0.020365,0.102306,0.108989,0.138914,0.005829,-0.004154,-0.041688,0.02758,0.009039,0.04351,-0.059481,-0.047604,-0.053492,0.039199,0.034526,0.002405,0.01196,0.001297,-0.046125,0.02369,-0.031999,-0.003506,0.013018,-0.143484,0.060524,-0.001663,-0.000361,-0.071489,0.033068,-0.004709,-0.060059,0.096072,-0.020947,-0.021164,0.044633,-0.04427,-0.009008,-0.040621,0.061068,0.027489,0.044089,0.012645,-0.030997,0.137759,0.008246,0.037232,-0.098056,0.002565,0.112814,-0.012767,0.013155,-0.010948,0.055654,0.018597,-0.004627,-0.085107,0.01358,-0.056706,0.114496,0.028209,0.01192,-0.139808,0.023179,-0.023581,0.006578,0.024664,0.013549,0.040511,0.03193,-0.05506,0.064374,-0.042142,-0.016898,-0.016768,0.009492,0.047202,0.042221,-0.021366,0.056671,-0.040007,0.035869,0.05938,-0.044102,0.015802,-0.036719,0.031361,0.027943,0.006137,-0.060797,-0.019129,0.003999,-0.079117,-0.061907,0.032766,-0.064046,0.080573,-0.023416,-0.013013,-0.036733,-0.037291,-0.014508,-0.025879,-0.07219,-0.104319,-0.018785,-0.004202,0.053556,0.028402,-0.017701,-0.022431,0.028158,0.000442,-0.096625,0.057764,-0.039166,0.037254,-0.000946,-0.4344,0.057283,0.050348,0.01734,0.001075,-0.076538,-0.014837,0.030825,0.044636,0.027805,-0.045182,0.000291,-0.003308,-0.112881,0.024847,-0.021821,-0.036355,0.084893,-0.071859,-0.071918,-0.106333,0.008338,-0.011186,-0.000264,0.04356,-0.062153,-0.082061,-0.045306,0.050038,0.022577,-0.016744,-0.085922,0.029376,-0.009823,0.005357,0.072378,-0.021608,-0.018292,-0.043222,0.102145,0.005584,0.219761,-0.057202,0.07875,-0.012194,0.043196,-0.019183,-0.001582,-0.023575,-0.016762,0.053794,-0.001076,0.065555,-0.041165,-0.020145,-0.009578,0.016781,-0.031035,-0.030949,0.170525,0.003223,0.028969,0.002029,-0.012137,-0.01328,-0.051637,0.00941,-0.031592,5.3e-05,0.006931,-0.030132,-0.063389,-0.04111,0.007757,0.006809,0.077451,-0.003585,0.06508,-0.070829,0.076184,0.040542,0.015723,0.062644,0.130784,0.004388,-0.028712,-0.04758,-0.05978,0.008144,0.114781,-0.010307,0.037013,0.048701,0.012791,-0.039066,0.03719,0.019628,0.043487,-0.442464,-0.056208,0.074339,0.003872,-0.037691,0.031728,0.01907,-0.030222,0.084992,-0.049441,0.067937,0.019349,0.085066,-0.033959,-0.051969,0.026005,-0.079708,-0.048019,0.02561,-0.186381,0.029611,-0.048323,0.077957,0.057878,0.059027,0.074595,-0.084297,0.013452,0.071298,-0.027631,0.101903,0.091362,-0.030401,0.06685,0.047155,0.082641,-0.046539,11.041768,-0.034346,0.023334,-0.05235,-0.021797,-0.073844,0.077888,-0.16254,-0.044021,0.120999,-0.011939,-0.032323,-0.08724,-0.087491,0.041878,-0.025845,-0.041427,0.01176,-0.05978,-0.036927,-0.00268,0.009881,0.040934,-0.030193,-0.103926,0.025544,-0.010939,-0.032228,0.016615,0.113096,-0.028662,0.068889,0.043659,-0.018759,0.103105,-0.026619,0.095167,0.047536,0.039859,0.095142,0.005202,0.012412,0.028374,0.068666,0.062784,-0.002998,0.054506,0.075564,0.001066,0.105891,0.086107,-0.030428,0.092708,-0.034972,0.000986,0.01024,0.051244,-0.033065,0.095523,0.064845,-0.076817,0.160518,-0.025161,0.015136,0.016978,0.117884,0.093589,0.047732,-0.108145,0.00714,-0.025552,-0.051648,-0.000324,0.038314,-0.032274,-0.083405,0.053875,0.004819,0.030754,-0.03691,0.02725,0.022423,-0.039648,-0.018248,0.061711,0.017267,0.008839,0.045577,-0.015447,-0.075867,-0.005096,-0.023085,0.018544,-0.040087,0.023668,-0.034264,0.006186,-0.06769,0.070783,0.066909,-0.115516,0.025667,-0.06,0.091182,0.035717,0.02192,-0.024331,-0.023677,-0.005565,0.000413,-0.012941,0.003056,0.025849,-0.026505,0.051657,0.032574,0.01051,0.002665,0.050271,0.081028,-0.090779,-0.003479,-0.030248,0.001027,-0.004237,0.00169,-0.036482,0.001273,-0.054932,0.001698,-0.027778,0.058268,-0.014891,-0.064384,0.059275,-0.074287,-0.030636,0.072317,0.033165,0.084609,-0.047768,-0.007475,0.027497,-0.060141,-0.045843,0.072712,0.061591,0.053319,0.121826,-0.038755,-0.02411,0.005711,0.045046,-0.053851,0.037302,-0.060371,-0.021883,0.040525,0.04637,0.010167,0.017496,-0.026912,0.05864,0.047384,0.006753,0.010921,-0.023689,0.00747,-0.003883,-0.00197,-0.023998,0.015012,-0.049011,-0.063189,-0.051354,0.121658,0.039883,-0.10499,-0.013955,-0.049584
3,-0.055787,0.083468,-0.010403,-0.023431,0.07643,-0.105103,0.000467,-0.032292,0.111917,-0.062297,-0.048576,0.010734,0.062767,0.059059,0.025644,-0.014685,-0.096645,0.037814,-0.044286,-0.006353,-0.067835,0.131456,-0.007657,0.130094,0.02171,0.065861,0.154109,0.091043,-0.040255,0.014171,-0.035942,-0.043163,0.056081,0.034511,0.038901,0.083606,0.037087,-0.015373,-0.132345,-0.015228,0.061958,0.15271,0.02345,-0.043979,0.070867,0.000266,0.051731,0.028989,0.013152,0.031738,0.018418,0.030142,-0.048416,0.011626,-0.123716,0.024694,0.023101,0.104888,0.075139,-0.063729,-0.02678,-0.096866,-0.086571,-0.06965,0.035921,-0.090767,-0.07263,0.075589,0.057548,0.058724,0.037374,-0.027951,0.039804,-0.033022,0.002115,-0.014544,-0.05125,0.405433,-0.093932,0.013718,0.053869,-0.025122,0.34108,0.042267,0.015976,0.021417,0.093516,0.007928,0.029516,0.048319,0.033279,0.069798,-0.010077,0.03378,0.039685,0.034864,-0.045776,-0.110793,-0.047729,-0.021275,-0.108095,-0.042476,0.107382,0.082665,-0.029899,-0.003617,0.075807,-0.009558,0.003272,0.004802,-0.026285,0.050601,0.037538,0.04585,-0.017803,-0.066828,-0.012028,0.064518,0.029373,0.058262,-0.003893,0.084172,-0.025278,-0.003742,-0.026204,0.043314,-0.055766,-0.025153,0.01057,-0.011428,-0.005425,-0.160533,0.000217,0.037434,0.074511,0.026912,0.008674,-0.012575,0.078559,-0.041819,-0.038736,0.071089,0.02861,0.066078,0.124067,0.03127,-0.02564,-0.029271,-0.036174,-0.027012,0.064555,-0.046386,-0.021286,0.016679,-0.05226,0.446493,0.134561,0.055376,0.026327,-0.022556,0.148162,-0.022507,0.050003,0.037884,0.006898,-0.036381,0.002813,0.006173,0.062345,0.031594,0.035458,0.056835,0.021599,-0.029503,-0.074305,-0.00812,0.030666,0.029104,-0.118643,-0.012137,0.000278,0.057529,-0.117053,0.082504,-0.061802,-0.00482,-0.042981,0.067045,0.05441,0.073164,0.022908,-0.036277,0.004099,-0.030604,-0.043184,0.106789,-0.028541,-0.015905,0.052731,-0.055536,0.076863,-0.057305,0.100041,-0.075408,0.016979,-0.021727,0.091354,0.066216,0.043909,-0.038326,-0.042237,0.009755,-0.006845,0.036567,0.011358,-0.006903,0.024564,0.058781,0.018306,-0.028719,0.043143,0.013864,0.055898,0.117082,-0.046128,-0.001791,-0.004256,0.031505,0.015406,-0.043332,-0.058814,0.019306,-0.0032,-0.036025,0.047996,-0.107762,-0.038472,-0.017181,-0.012707,0.055212,0.009553,0.014098,0.059437,-0.004702,0.011315,0.036329,0.015186,0.080815,-0.000422,0.006962,-0.048198,-0.02391,0.026206,-0.004493,0.10465,0.00908,-0.084546,0.018663,-0.025485,-0.039,-0.026337,-0.02474,0.025821,0.015988,-0.072559,-0.022816,-0.049214,0.00234,-0.010298,-0.06544,0.028215,-0.089477,-0.052806,0.001,0.06484,0.032488,-0.035387,0.026423,-0.070489,-0.000634,0.044406,-0.032065,-0.096858,0.002098,0.01696,0.05697,-0.132806,0.038587,0.054012,0.100787,0.057159,0.009645,-0.026481,0.091045,0.028611,-0.015712,0.069515,-0.042161,0.02649,0.025285,-0.064556,-0.073379,-0.125291,-0.036814,-0.03268,-0.066069,-0.006822,-0.046077,-0.005918,-0.031262,-0.04305,-0.059174,-0.104455,0.084265,0.029051,-0.021119,0.049775,0.006716,-0.015431,0.036689,0.007711,0.010385,0.040004,-0.027367,0.021152,0.083499,0.053533,-0.051038,0.070954,0.370613,-0.296922,-0.012586,0.020309,0.007228,0.070677,-0.018492,0.039568,0.088047,0.107193,0.096617,0.002659,-0.031311,-0.028255,0.04645,0.048704,0.043863,-0.023606,-0.068037,-0.006593,0.001883,-0.00668,-0.012464,-0.009969,-0.048069,-0.022993,0.022313,0.012252,-0.033034,0.027726,-0.137945,0.016691,-0.031801,0.020028,-0.020538,0.065674,-0.045984,-0.062808,0.064116,-0.048265,-0.011414,0.034119,-0.069076,-0.023806,-0.060987,0.027254,0.054883,0.036686,0.03167,0.000114,0.129524,0.034206,-0.001994,-0.048629,0.01455,0.086631,-0.022955,0.041385,-0.016597,0.064342,0.047264,0.00924,-0.041072,0.037615,-0.068556,0.108393,0.050209,-0.065052,-0.057029,0.026743,-0.045975,0.019023,0.059034,0.018031,0.062359,0.040829,-0.012831,-0.023807,-0.087493,-0.007047,-0.041284,-0.008343,0.019934,0.039437,-0.067408,0.038484,-0.053253,0.016158,0.024227,-0.002888,0.022679,-0.038856,0.076147,0.018446,0.011554,-0.078908,-0.02886,0.007033,-0.062895,-0.046372,0.033108,-0.085423,0.10122,-0.013355,0.005407,0.008872,-0.062461,-0.044717,-0.022279,-0.104718,-0.067809,0.003952,-0.014315,0.044593,0.020002,-0.018301,0.020141,0.063272,-0.025559,-0.103143,0.053438,-0.081432,0.023292,-0.020316,-0.442212,0.055936,-0.051891,0.064221,0.007177,-0.075909,0.026381,0.030182,0.043407,0.033622,-0.037686,0.027488,-0.007965,-0.086144,0.034902,-0.024354,0.015901,0.061884,-0.074752,-0.068081,-0.078176,0.003148,-0.011396,-0.012823,0.013422,-0.026805,-0.093628,-0.032809,0.049009,0.026455,-0.028903,-0.070156,-0.013145,-0.028902,0.040105,0.070203,-0.042068,-0.030885,-0.058939,0.087947,0.0196,0.218374,-0.05443,0.13231,-0.018521,0.106133,-0.032151,0.003712,-0.048698,0.035677,0.020919,0.00408,-0.026897,-0.047755,-0.035837,0.038464,-0.008962,-0.008809,-0.032361,0.188888,0.004834,0.054671,-0.005519,-0.028397,-0.00445,-0.030515,0.049674,-0.050983,0.019056,-0.034248,-0.015547,-0.048282,-0.036696,0.013737,-0.041048,0.117668,-0.015806,0.062966,-0.039987,0.097967,0.04875,0.026838,-0.004438,0.057603,-0.021323,-0.038224,-0.04844,-0.063038,0.00151,0.099665,-0.04105,0.061541,0.043866,-0.001566,-0.054044,0.016237,0.060036,0.062249,-0.50306,-0.059469,0.060527,-0.011056,-0.019826,0.062784,0.052226,-0.00762,0.038347,-0.114587,0.053896,0.028026,0.093527,-0.003709,-0.067356,0.051782,-0.074935,-0.035221,0.040413,-0.208427,0.044538,-0.059189,0.142398,0.014656,0.054425,0.059447,-0.062967,0.002464,0.048403,-0.016513,0.057963,0.082603,-0.038996,0.069914,0.056779,0.078009,0.022157,11.139329,-0.00765,0.050817,-0.040137,-0.031863,-0.070288,0.093639,-0.086032,-0.000308,0.10768,0.012781,-0.041369,-0.117235,-0.0421,0.016028,0.005038,-0.037564,-0.006535,-0.021105,-0.032231,-0.001586,0.023212,0.071523,-0.030235,-0.042261,-0.028374,-0.028767,-0.015494,0.007052,0.125316,-0.019805,0.058779,0.030301,-0.02895,0.117814,-0.043749,0.045452,0.092671,0.030628,0.082595,-0.004343,0.007284,-0.007554,0.039781,0.061396,0.021161,-0.00211,0.128341,0.040233,0.127146,0.090793,0.007233,0.08577,-0.025974,0.001569,-0.03936,0.057411,-0.059482,0.062703,0.09936,-0.049781,0.141256,-0.046915,0.001892,0.061926,0.139913,0.052143,0.048265,-0.134743,0.007569,0.008037,-0.065481,-0.061774,0.075104,-0.026168,-0.032019,0.095108,-0.018514,0.021905,-0.061265,0.012468,0.043025,-0.002669,-0.040198,0.054533,0.044193,-0.017378,0.096852,-0.047218,-0.055762,-0.031918,-0.079522,-0.013551,-0.00863,0.038314,-0.016644,-0.025682,-0.111757,0.0804,0.069622,-0.151681,-0.027937,-0.008858,0.088874,-0.022622,0.021935,0.008409,-0.002405,0.012917,-0.007536,-0.0458,0.020887,0.035822,-0.071844,-0.015567,0.043552,0.006282,0.014283,0.034368,0.099985,-0.078425,0.01374,-0.064694,-0.01928,-0.013924,-0.022797,-0.048783,-0.040996,-0.021454,0.01268,-0.047107,0.048958,0.029476,-0.032358,0.05801,-0.05214,-0.041351,0.128474,0.058028,0.092914,0.025445,0.026857,-0.027598,-0.041816,-0.041246,0.102142,0.066129,-0.027735,0.109246,-0.015041,-0.028082,0.029037,0.068618,-0.044292,0.008508,-0.071456,-0.004567,0.049751,0.006257,0.028263,-0.001218,-0.127125,0.039018,0.079159,-0.028739,-0.012944,-0.060496,-0.005051,0.033721,-0.021031,0.068159,0.029655,-0.037815,-0.047088,-0.011181,0.091564,0.067291,-0.148826,-0.060812,-0.003384
4,-0.037177,0.065484,-0.033264,-0.097688,0.043018,-0.095644,-0.026122,0.001848,0.133342,-0.054999,-0.0479,0.073016,0.0443,0.070017,0.054446,0.017357,-0.10877,0.001506,0.001677,-0.031772,-0.094347,0.147335,-0.048235,0.098226,0.014687,0.034912,0.115023,0.112283,-0.046763,0.016756,-0.025676,-0.057612,-0.0036,0.028337,0.062559,0.085848,0.067487,-0.014428,-0.120353,0.013407,0.021353,0.074721,0.027607,-0.022729,0.086644,-0.000644,0.036871,0.050617,1.8e-05,0.034123,0.03468,0.059503,-0.076522,0.026817,-0.119017,0.011259,-0.017081,0.071787,0.066568,-0.05749,-0.01206,-0.084221,-0.114442,-0.041237,0.024609,0.002704,-0.06175,-0.001285,0.032355,0.069761,0.068338,-0.051553,0.075976,-0.094018,-0.005097,0.003347,-0.004784,0.458051,-0.044595,-0.021188,0.029554,-0.012838,0.356025,0.040014,0.015369,0.071234,0.096772,0.046335,0.051681,0.042216,0.001612,0.077173,-0.060342,0.036337,0.007682,0.08639,0.004487,-0.073057,-0.058422,-0.051239,-0.045293,-0.048656,0.120107,0.076738,-0.000534,0.007275,0.08055,0.000696,-0.023018,-0.012307,0.011493,0.020842,0.037703,0.032269,-0.013107,-0.051263,-0.009975,0.027414,0.011942,0.043666,0.029131,0.082081,0.056724,0.001371,0.006641,0.011529,-0.017389,0.006146,-0.013652,0.036424,-0.015888,-0.143517,-0.035126,0.123981,0.046208,0.036752,0.050732,-0.006837,0.083891,-0.008068,-0.071855,0.055266,0.051976,0.037552,0.114944,0.014409,0.022682,-0.059301,-0.012699,-0.036785,0.081761,-0.043678,-0.088068,0.018824,-0.001857,0.456739,0.099373,0.077557,-0.031844,-0.038938,0.150866,-0.002354,0.027825,0.02326,-0.049709,-0.035988,0.014666,0.057418,0.064248,-0.031027,0.023583,0.047419,0.037128,-0.044023,-0.064201,-0.02992,0.027742,0.017746,-0.107715,-0.035846,0.022155,0.053895,-0.126287,0.048105,-0.020183,0.014715,-0.023197,0.06047,0.030018,0.057268,0.053785,-0.019909,0.014697,-0.076736,-0.077049,0.09001,-0.040354,-0.006629,0.031387,-0.044647,0.10661,-0.086961,0.089968,-0.094413,0.012648,-0.013466,0.099746,0.055501,0.033703,-0.03184,-0.067927,-0.018893,-0.008538,0.053914,-0.034561,0.021831,0.064237,0.121602,-0.019843,-0.001575,0.036738,0.049477,0.04916,0.104062,-0.03127,-0.00261,-0.009938,-0.019836,0.017466,0.008854,-0.017907,0.035684,-0.00523,-0.026493,0.0698,-0.101199,-0.000386,-0.04773,-0.034813,0.0639,-0.073714,0.037043,0.056234,0.009034,0.014469,0.030437,0.019205,0.11947,-0.02457,0.02923,-0.031408,-0.003254,-0.011413,-0.032192,0.05513,-0.003212,-0.085593,0.009606,0.022047,-0.016928,-0.019108,-0.066011,0.035689,0.01343,-0.126732,-0.063941,-0.021062,0.023764,-0.013219,-0.071046,0.012668,-0.038918,-0.053567,-0.003229,0.05602,0.063689,-0.054472,0.028267,-0.075894,-0.023774,0.031218,-0.027675,-0.111024,0.021712,0.009871,0.040397,-0.095439,-0.062134,0.066948,0.087872,0.037061,0.012278,0.014684,0.038957,-0.062835,0.040955,0.074645,-0.00483,0.039894,0.020272,-0.067495,-0.076227,-0.152141,-0.058199,0.011426,-0.066714,0.009316,-0.039664,0.031576,-0.035437,-0.041838,-0.073341,-0.126256,0.09665,0.047037,0.003584,0.050673,-0.014526,0.033203,0.052174,0.034608,0.016498,0.0441,-0.027045,-0.032343,0.041833,0.043003,-0.068029,0.048621,0.362659,-0.265299,-0.015019,0.030611,0.010169,0.079913,-0.043901,0.007995,0.065992,0.073253,0.032268,-0.008553,0.02184,-0.032586,0.054697,0.055288,0.002051,-0.060816,-0.025682,-0.00246,0.017238,0.036124,-0.011784,0.033188,-0.036096,-0.016536,0.032673,0.050072,-0.036311,-0.01569,-0.092397,0.029431,0.002112,0.060377,0.0152,0.066879,-0.092199,-0.024819,0.062049,-0.035076,0.03203,0.028493,-0.005438,-0.001102,-0.054606,0.049115,0.008082,0.045824,0.040714,-0.011235,0.145452,0.060105,0.001685,-0.049264,-0.003109,0.102501,-0.008096,0.005375,-0.000766,0.07816,0.012113,-0.004936,-0.130636,0.033145,-0.063346,0.085894,0.001382,-0.048292,-0.208787,-0.032975,-0.025643,0.005333,0.079677,0.014842,-0.016242,0.064609,-0.012933,-0.023998,-0.100636,0.015034,0.021088,-0.017244,-0.001336,0.042778,-0.05589,-0.002818,-0.024578,0.038939,0.000741,-0.014692,0.01543,-0.034002,0.047163,0.001817,0.021883,-0.070957,-0.037219,0.019502,-0.042578,0.008006,0.020021,-0.046107,0.086248,-0.006861,-0.001091,0.008286,-0.031109,-0.030646,-0.033945,-0.079639,-0.052119,0.007985,-0.004707,0.056993,0.007293,-0.037666,0.023911,0.114598,-0.030189,-0.125099,0.053658,-0.043301,0.059983,-0.033861,-0.615076,0.069305,0.035498,0.082916,0.022012,-0.0526,0.001558,0.042792,0.050139,0.050206,-0.021437,-0.017458,-0.025479,-0.065208,0.05268,-0.017522,-0.021371,0.03456,-0.050869,-0.047662,-0.050105,0.033228,-0.005019,-0.0332,0.077527,-0.018411,-0.134307,-0.021748,0.078251,0.022077,0.009434,-0.09937,0.003402,-0.018024,0.037572,0.068176,-0.00189,0.014017,-0.053521,0.048918,0.014873,0.224891,-0.048172,0.17051,-0.009129,0.068176,-0.017374,-0.002898,-0.064661,0.014205,0.022823,-0.011644,-0.001796,-0.059751,-0.000729,0.013034,0.063237,-0.02097,0.016135,0.171871,-0.02118,0.071234,0.015464,-0.006395,0.009477,-0.040452,0.050028,-0.06569,0.015816,-0.036739,0.013194,-0.046845,-0.020105,0.043365,-0.018556,0.088216,-0.018207,0.067959,-0.075595,0.083417,0.000374,0.030323,0.012,0.036669,0.058177,-0.030698,-0.0944,-0.05601,0.031192,0.0798,-0.016421,0.078126,0.031537,0.000929,-0.032349,0.044443,0.11735,0.028038,-0.589482,-0.084804,0.039801,-0.023921,-0.006269,0.011555,0.017553,-0.02935,0.071635,-0.064291,0.026866,0.037937,0.063618,-0.00379,-0.039743,0.042629,-0.059734,-0.052254,0.010604,-0.270319,0.044358,-0.017262,0.125277,0.05016,0.026167,0.060142,-0.034554,-0.013007,0.08057,0.011182,0.050598,0.128603,-0.047677,0.028653,0.061297,0.129651,0.003565,10.625595,-0.008661,0.060423,-0.004855,-0.016793,-0.091092,0.10243,-0.03091,-0.014932,0.05713,0.000275,0.042202,-0.095466,-0.011483,0.008127,-0.005172,-0.051275,-0.032039,0.006801,-0.024322,-0.021122,0.009917,0.016668,0.033222,-0.061955,-0.036618,-0.033189,-0.030154,-0.039068,0.089545,0.006488,0.031969,0.071493,0.010995,0.104533,-0.017531,0.029541,0.114605,0.03915,0.100004,0.039235,0.013047,-0.01917,0.004703,0.034976,0.010062,-0.051968,0.119268,0.023531,0.113976,0.075287,-0.040067,0.033845,0.041096,-0.028441,-0.031052,-0.023814,-0.041424,0.062831,0.071113,-0.032298,0.136422,-0.03635,0.034071,-0.029112,0.11129,0.074295,0.06746,-0.138041,0.0015,-0.003238,-0.053877,-0.057105,-0.002476,0.013466,-0.050752,0.05327,-0.020381,-0.000694,-0.020456,0.032156,0.048958,-0.035681,-0.040546,0.053318,0.020414,0.021893,0.084764,-0.039896,0.000203,-0.064525,-0.012986,0.004216,-0.022174,0.033543,0.010273,-0.012729,-0.084795,0.118908,0.05293,-0.073816,-0.02118,-0.03968,0.058902,0.042282,0.020497,-0.006758,0.031185,0.02181,-0.011361,-0.082431,0.05601,-0.016979,-0.040052,0.001177,0.048406,-0.01773,0.021207,0.003669,0.073493,-0.05053,0.00464,-0.041766,0.024254,-0.046778,-0.036848,-0.065233,-0.012795,-0.001042,-0.013924,-0.082715,0.033592,0.075389,-0.010214,0.03434,-0.030334,0.007411,0.113516,0.055233,0.058711,-0.028585,-0.017621,0.000232,-0.053178,-0.019006,0.022756,0.049204,0.01769,0.084491,-0.027276,0.002377,0.015709,0.057427,-0.047,0.002952,-0.062077,-0.017157,0.050607,0.026399,-0.003433,0.035635,-0.055056,0.035425,0.06848,-0.012599,-0.058165,-0.049941,0.013946,0.039761,-0.017256,0.035223,0.001151,-0.010405,-0.080127,-0.006412,0.0891,0.152729,-0.129816,-0.057907,0.007631


# title-uni_gram & description-roberta

roberta

In [None]:
%pip install transformers -q
from transformers import RobertaTokenizer, RobertaModel
from torch.utils.data import Dataset, DataLoader
import torch
from tqdm import tqdm

In [None]:
MAX_LEN = 128
model_name = 'roberta-base'
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaModel.from_pretrained(model_name)
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(device)

In [19]:
description_ds = RobertaFeatureDataset(df['FullDescription'].tolist())
description_dl = DataLoader(description_ds, batch_size=16, shuffle=False)

In [30]:
final_vector_array = extract_cls_vectors(model, description_dl, device)

description_roberta = pd.DataFrame(
    final_vector_array,
    index=df.index
)

description_roberta.columns = [f'cls_{i}' for i in range(final_vector_array.shape[1])]

Extraction [CLS] RoBERTa: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 15298/15298 [31:13<00:00,  8.16it/s]


In [31]:
desc_roberta_train = description_roberta.loc[train.index]
desc_roberta_test  = description_roberta.loc[test.index]

tf-idf

In [40]:
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import hstack
from sklearn.decomposition import TruncatedSVD
import joblib

In [41]:
tfidf_title = TfidfVectorizer(max_features=5000, stop_words='english', ngram_range=(1, 1))
X_train_title = tfidf_title.fit_transform(train["Title"])
X_test_title  = tfidf_title.transform(test["Title"])

In [42]:
from sklearn.utils.extmath import randomized_svd

U, Sigma, VT = randomized_svd(X_train_title, n_components=50, n_iter=5, random_state=42)
X_train_title = U @ np.diag(Sigma)
X_test_title  = X_test_title.dot(VT.T)

In [None]:
# svd = TruncatedSVD(n_components=50, random_state=42, verbose=1)

# X_train_text = svd.fit_transform(X_train_title)
# X_test_text  = svd.transform(X_test_title)

In [46]:
X_train_title.shape

(171337, 50)

In [47]:
desc_roberta_train.shape

(171337, 768)

In [38]:
df.shape

(244768, 9)

In [39]:
train.shape

(171337, 9)

In [49]:
train_text_combined = np.concatenate([desc_roberta_train, X_train_title], axis=1)
test_text_combined = np.concatenate([desc_roberta_test, X_test_title], axis=1)

In [None]:
train_text_combined_df = pd.DataFrame(train_text_combined)
test_text_combined_df = pd.DataFrame(test_text_combined)

train_text_combined_df.to_parquet('data/texts_uni_roberta_train.parquet', index=True)
test_text_combined_df.to_parquet('data/texts_uni_roberta_test.parquet',  index=True)

In [62]:
!df -h

Filesystem      Size  Used Avail Use% Mounted on
devtmpfs        7.7G     0  7.7G   0% /dev
tmpfs           7.7G  4.0K  7.7G   1% /dev/shm
tmpfs           7.7G  672K  7.7G   1% /run
tmpfs           7.7G     0  7.7G   0% /sys/fs/cgroup
/dev/nvme0n1p1  135G   78G   58G  58% /
tmpfs           1.6G     0  1.6G   0% /run/user/0
/dev/nvme2n1    4.8G  4.4G  146M  97% /home/ec2-user/SageMaker
tmpfs           1.6G     0  1.6G   0% /run/user/1001
tmpfs           1.6G     0  1.6G   0% /run/user/1000
tmpfs           1.6G     0  1.6G   0% /run/user/1002


In [57]:
test_text_combined_df.to_parquet('data/texts_uni_roberta_test.parquet',  index=True)

OSError: [Errno 28] Error writing bytes to file. Detail: [errno 28] No space left on device

# 5. Split data

In [25]:
# tabular
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.3, random_state=42)

In [106]:
# word2vec
texts_train = texts.loc[train.index]
texts_test = texts.loc[test.index]
texts_train.to_pickle('data/texts_w2v_train.pkl')
texts_test.to_pickle('data/texts_w2v_test.pkl')

In [44]:
# roberta
texts_roberta_train = texts_w2v_roberta.loc[train.index]
texts_roberta_test  = texts_w2v_roberta.loc[test.index]
texts_roberta_train.to_parquet('data/texts_roberta_train.parquet', index=True)
texts_roberta_test.to_parquet('data/texts_roberta_test.parquet',  index=True)

# 6. One hot encoding

In [57]:
# select most common source in category group
category_to_source = train.groupby('Category')['SourceName'].agg(lambda x: x.mode()[0]).to_dict()
train['SourceName'] = train['Category'].map(category_to_source)
test['SourceName'] = test['Category'].map(category_to_source)

In [58]:
train = pd.get_dummies(train, columns = ['ContractType', 'ContractTime', 'Category', 'SourceName'], drop_first=True, dtype=int)
test = pd.get_dummies(test, columns = ['ContractType', 'ContractTime', 'Category', 'SourceName'], drop_first=True, dtype=int)

# 7. Target Encoding - mean salary of company instead of company name

In [59]:
# combining companies by two first words
train['CompanyPrefix'] = train['Company'].apply(lambda x: ' '.join(str(x).split()[:2]))
test['CompanyPrefix'] = test['Company'].apply(lambda x: ' '.join(str(x).split()[:2]))

In [60]:
# mean salary by company
mean_company = train.groupby('CompanyPrefix')['SalaryNormalized'].mean()
train['CompanyEncoded'] = train['CompanyPrefix'].map(mean_company)
test['CompanyEncoded'] = test['CompanyPrefix'].map(mean_company)

# filling not existing companies in test with global mean
global_mean = train['SalaryNormalized'].mean()
test['CompanyEncoded'] = test['CompanyEncoded'].fillna(global_mean)

train.drop(columns=['Company', 'CompanyPrefix'], inplace=True)
test.drop(columns=['Company', 'CompanyPrefix'], inplace=True)

In [107]:
mean_company.head()

CompanyPrefix
.Michael Page    77500.0
1 1              21040.0
10 TRINITY       45000.0
100 percent      40500.0
100% IT          30250.0
Name: SalaryNormalized, dtype: float64

# 8. Standarization

In [62]:
from sklearn.preprocessing import StandardScaler
numeric_cols = ['SalaryNormalized', 'CompanyEncoded', 'LocationPopulation']
scaler = StandardScaler()
train[numeric_cols] = scaler.fit_transform(train[numeric_cols])
test[numeric_cols] = scaler.transform(test[numeric_cols])

# 9. Tf-idf

In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import hstack
from sklearn.decomposition import TruncatedSVD
import joblib

In [22]:
def prepare_tfidf(train, test, n_grams):
    tfidf_description = TfidfVectorizer(max_features=5000, stop_words='english', ngram_range=n_grams)
    tfidf_title = TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=n_grams)
    
    X_train_description = tfidf_description.fit_transform(train["FullDescription"])
    X_test_description  = tfidf_description.transform(test["FullDescription"])
    
    X_train_title = tfidf_title.fit_transform(train["Title"])
    X_test_title  = tfidf_title.transform(test["Title"])
    
    return hstack([X_train_description, X_train_title]), hstack([X_test_description, X_test_title])

In [None]:
X_train_text, X_test_text = prepare_tfidf(train, test, (1, 3))

In [None]:
# dimenshion reduction
svd = TruncatedSVD(n_components=50, random_state=42)

X_train_text = svd.fit_transform(X_train_text)
X_test_text  = svd.transform(X_test_text)

In [None]:
# saving
np.save("data/X_train_text_tri.npy", X_train_text)
np.save("data/X_test_text_tri.npy", X_test_text)

# 10. Tabular data saving

In [46]:
train_tab = train.drop(columns=['Title', 'FullDescription'])
test_tab = test.drop(columns=['Title', 'FullDescription'])

In [47]:
train_tab.to_csv('data/train_preprocessed.csv', index=False)
test_tab.to_csv('data/test_preprocessed.csv', index=False)

In [51]:
train_tab.head(3)

Unnamed: 0,SalaryNormalized,LocationPopulation,ContractType_full_time,ContractType_part_time,ContractTime_contract,ContractTime_permanent,Category_Admin Jobs,Category_Charity & Voluntary Jobs,Category_Consultancy Jobs,Category_Creative & Design Jobs,Category_Customer Services Jobs,Category_Domestic help & Cleaning Jobs,"Category_Energy, Oil & Gas Jobs",Category_Engineering Jobs,Category_Graduate Jobs,Category_HR & Recruitment Jobs,Category_Healthcare & Nursing Jobs,Category_Hospitality & Catering Jobs,Category_IT Jobs,Category_Legal Jobs,Category_Logistics & Warehouse Jobs,Category_Maintenance Jobs,Category_Manufacturing Jobs,Category_Other/General Jobs,"Category_PR, Advertising & Marketing Jobs",Category_Part time Jobs,Category_Property Jobs,Category_Retail Jobs,Category_Sales Jobs,Category_Scientific & QA Jobs,Category_Social work Jobs,Category_Teaching Jobs,Category_Trade & Construction Jobs,Category_Travel Jobs,SourceName_Jobcentre Plus,SourceName_TotallyLegal,SourceName_caterer.com,SourceName_charityjob.co.uk,SourceName_cv-library.co.uk,SourceName_cwjobs.co.uk,SourceName_findababysitter.com,SourceName_jobs.communitycare.co.uk,SourceName_retailchoice.com,SourceName_staffnurse.com,SourceName_totaljobs.com,CompanyEncoded
241453,-0.857472,1.095796,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,-1.474048
169520,-0.281558,1.095796,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,-0.903872
136661,-0.631001,1.095796,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,-0.70608
