In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

pd.options.display.max_columns = 500

In [3]:
!pip install loguru
!pip install psycopg2-binary

Collecting loguru
  Downloading loguru-0.6.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: loguru
Successfully installed loguru-0.6.0
[0mCollecting psycopg2-binary
  Downloading psycopg2_binary-2.9.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.5
[0m

In [4]:
import os
from fastapi import FastAPI, Depends
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy import create_engine, func, Text, Integer, Column, ForeignKey, TIMESTAMP
from sqlalchemy.ext.declarative import declarative_base
from typing import List
import datetime
from pydantic import BaseModel
from loguru import logger

In [5]:
# Create a URL object to connect to DB
SQLALCHEMY_DATABASE_URL = "postgresql://robot-startml-ro:pheiph0hahj1Vaif@postgres.lab.karpov.courses:6432/startml"

# Create engine and link it to the URL
engine = create_engine(SQLALCHEMY_DATABASE_URL)

# Instantiate a Session maker object used to create sessions with required parameters
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

# Create a parental class Base from which other ORM classes will inherit
Base = declarative_base()

In [6]:
# Create function that facilitates downloading huge datasets to pandas dataframes by bathes/chunks
def batch_load_sql(query: str):
    engine = create_engine("postgresql://robot-startml-ro:pheiph0hahj1Vaif@postgres.lab.karpov.courses:6432/startml")
    conn = engine.connect().execution_options(
        stream_results=True)
    chunks = []
    for chunk_df in pd.read_sql(query, conn, chunksize=200000):
        chunks.append(chunk_df)
        logger.info(f'Got chunk: {len(chunk_df)}')
    conn.close()
    return pd.concat(chunks, ignore_index=True)

In [7]:
# Get all the posts from DB
posts_df = batch_load_sql(
    """
    SELECT *
    FROM public.post_text_df
    """)
    

2022-11-04 09:51:05.645 | INFO     | __main__:batch_load_sql:9 - Got chunk: 7023


In [8]:
# Get all the users' info from DB
users_df = batch_load_sql(
    """
    SELECT *
    FROM public.user_data
    """
)

2022-11-04 09:51:14.336 | INFO     | __main__:batch_load_sql:9 - Got chunk: 163205


In [9]:
users_df.describe()

Unnamed: 0,user_id,gender,age,exp_group
count,163205.0,163205.0,163205.0,163205.0
mean,85070.371759,0.551331,27.195405,1.997598
std,48971.63995,0.49736,10.239158,1.413644
min,200.0,0.0,14.0,0.0
25%,41030.0,0.0,19.0,1.0
50%,85511.0,1.0,24.0,2.0
75%,127733.0,1.0,33.0,3.0
max,168552.0,1.0,95.0,4.0


In [10]:
# Get 5M feed entries from DB
feed_df = batch_load_sql(
    """
    SELECT timestamp, user_id, post_id, action
    FROM public.feed_data
    LIMIT 5000000
    """
)

2022-11-03 10:50:17.379 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:20.695 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:24.342 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:26.920 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:29.743 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:33.018 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:35.867 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:38.499 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:42.085 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:44.751 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:47.709 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022-11-03 10:50:50.307 | INFO     | __main__:batch_load_sql:9 - Got chunk: 200000
2022

In [11]:
# feed_df = batch_load_sql(
#         """
#         SELECT timestamp, user_id, post_id, action
#         FROM (
#                 SELECT *,
#                         ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY user_id) as row_num
#                 FROM feed_data
#             ) AS s
#          WHERE row_num <= 20
#         """
# )

In [12]:
posts_features = posts_df.copy()

In [13]:
import nltk

In [14]:
from nltk.stem import WordNetLemmatizer

In [15]:
## Text preprocessing
import re
import string

from nltk.stem import WordNetLemmatizer 
from sklearn.feature_extraction.text import TfidfVectorizer

wnl = WordNetLemmatizer()

def preprocessing(text, token=wnl):
    text = text.lower()
    text = re.sub(r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", " ", text)
    text = re.sub("[^a-zA-Z]", " ", text)
    
    while "  " in text:
        text = text.replace("  ", " ")
    
    text = text.strip()
        
    text = ' '.join([token.lemmatize(x) for x in text.split(' ')])
    return text


vectorizer = TfidfVectorizer(
    stop_words='english',
    preprocessor=preprocessing
)

In [16]:
import nltk
nltk.download('popular')

[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     /usr/share/nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     /usr/share/nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     /usr/share/nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     /usr/share/nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     /usr/share/nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     /usr/share/nltk_data...
[nltk_data]    |   Package movie_reviews is already up-to-date!
[nltk

True

In [17]:
# Vectorize all the texts with TF-IDF
tfidf_data = (
    vectorizer.fit_transform(posts_features['text']).toarray()
)

tfidf_data

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [18]:
tfidf_data.shape

(7023, 42044)

In [19]:
tfidf_data = pd.DataFrame(
    tfidf_data,
    index=posts_features.post_id,
    columns=vectorizer.get_feature_names_out()
)

tfidf_data

Unnamed: 0_level_0,aa,aaa,aaaaah,aaaahhhhhhh,aaaggghhhhhhh,aaah,aac,aadc,aahing,aai,aaila,aaja,aaliyah,aaliyahs,aaltra,aames,aamir,aanes,aankhen,aapl,aara,aaran,aardman,aarhus,aaron,aatish,aavjo,ab,aba,abacus,abandon,abandoned,abandoning,abandonment,abate,abatement,abating,abba,abbas,abbasi,abbey,abbot,abbott,abbotts,abbreviated,abbu,abby,abc,abd,abdellatif,abdicated,abdication,abdomen,abdominal,abduct,abducted,abduction,abductor,abdul,abdullah,abdullatif,abe,abebe,abedded,abel,abensur,aberavon,aberdeen,abernethie,aberration,aberystwyth,abetted,abetting,abeyance,abeyie,abeyies,abhijeetmonet,abhishe,abhishek,abhorrent,abi,abide,abided,abiding,abigail,ability,abish,abisheks,abiyote,abject,ablank,able,ablinken,ably,abn,abnegation,abnormal,abnormality,abo,aboard,aboards,abolish,abolished,abolishing,abolition,abominable,abomination,aboriginal,abort,aborted,abortion,abortionist,abortive,abou,abound,abounds,aboutbalasore,aboutroughly,abraham,abramovich,abramovichs,abridged,abroad,abrupt,abruptly,absa,absas,absconded,absence,absent,absentee,absolute,absolutely,absolution,absolutley,absorb,absorbed,absorbing,absorbs,abstain,abstention,abstract,abstraction,absurd,absurder,absurdist,absurdity,absurdly,absurdness,abt,abtahi,abu,abundance,abundantly,aburizal,abuse,abused,abuser,abusing,abusive,abut,abuzz,abysmal,abysmally,abyss,ac,acacia,acadamy,academia,academic,academically,academictwitter,academie,academy,acapulco,acasuso,acc,acce,accelerate,accelerated,accelerating,acceleration,accelerator,accent,accented,accentuate,accentuates,accentuating,accept,acceptable,acceptably,acceptance,accepted,accepting,accepts,access,accessed,accessibility,accessible,accessing,accession,accessory,accident,accidental,accidentally,accidently,acclaim,acclaimed,acclimate,acclimatise,accolade,accommodate,accommodated,accommodating,accommodation,accomodate,accompanied,accompanies,accompaniment,accompany,accompanying,accomplice,accomplish,accomplished,accomplishes,accomplishment,accord,accordance,according,accordingly,accordion,accorsi,accosiation,account,accountability,accountable,accountancy,accountant,accounted,accounting,accoustic,accra,accredited,accretive,accrued,accumulate,accumulated,accumulates,accumulating,accumulation,...,yodelling,yoffi,yoga,yojiro,yokel,yoko,yolande,yomiuri,yomuri,yon,yonca,yoo,yootha,yoran,yorans,yore,york,yorker,yorkregion,yorkshire,yosemite,yoshi,yoshiaki,yoshifumi,yoshino,yoshinoya,yosync,youcrave,youd,youe,yougov,youll,young,youngblood,younger,youngest,youngmans,youngster,younguns,youre,yourtv,youself,youssou,youth,youthful,youtube,youve,youwecan,youzhny,yoxall,yr,yself,yu,yuan,yuck,yudhoyonos,yue,yuen,yuganksneftegas,yugansk,yuganskneftegas,yuganskneftegaz,yuganskneftgas,yugansks,yugoslavia,yugoslavian,yuji,yuko,yukon,yukos,yukoss,yul,yule,yuletide,yulia,yum,yumiko,yumminot,yun,yungmin,yup,yuppie,yuppy,yuri,yury,yushchenko,yushchenkos,yusuf,yuvstrong,yvaine,yvette,yvonne,zabaleta,zabalza,zabriskie,zach,zack,zadie,zafi,zag,zahn,zaidi,zaire,zajec,zajecs,zakuani,zale,zalman,zambia,zambian,zander,zane,zang,zantaras,zanuck,zanussi,zany,zapata,zapped,zar,zara,zardari,zardine,zaslow,zatoichi,zaz,zazzle,zb,zdf,zdnet,zdravstvomk,ze,zeal,zealand,zealander,zealot,zealous,zealousness,zecchin,zechs,zed,zee,zeeuw,zeffirelli,zeh,zeitgeist,zelah,zelda,zeldas,zellweger,zellwegers,zelwegger,zemeckis,zen,zenden,zenia,zenith,zentropa,zep,zephaniah,zephyr,zeppelin,zerneck,zero,zeroni,zest,zesty,zeta,zetterqvist,zeus,zhang,zhaoxing,zheng,zib,zidane,zidanes,ziegfeld,ziers,zig,zigzagged,zika,zilch,zillion,zillonlife,zima,zimbabwe,zinc,zindulka,zine,zineb,zinedine,zing,zinger,zinneman,zionism,zip,zipless,zither,zithromax,ziyi,zmed,zodiac,zoe,zoellick,zoey,zoheb,zola,zomba,zombi,zombic,zombie,zombieapocalypseready,zomcom,zon,zone,zonealarm,zoned,zoo,zoolander,zoom,zooropa,zoot,zorba,zorina,zornotza,zorro,zosch,zsigmond,zuari,zubair,zucco,zuccos,zucker,zuckerman,zuckers,zues,zuf,zula,zully,zuluaga,zungia,zuniga,zurers,zurich,zurlini,zurlinis,zutons,zvonareva,zvyagintsev,zzzzzzzzz
post_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.072643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076073,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034371,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052302,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7315,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
7316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
7317,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.19219,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
7318,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.096439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
# Clusterize TF-IDF text data with PCA
from sklearn.decomposition import PCA

centered = tfidf_data - tfidf_data.mean()

pca = PCA(n_components=20)
pca_decomp = pca.fit_transform(centered)

from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=15, random_state=0).fit(pca_decomp)

posts_features['TextCluster'] = kmeans.labels_

dists_columns = [f'DistanceToCluster{i}' for i in range(1, 16)]

dists_df = pd.DataFrame(
    data=kmeans.transform(pca_decomp),
    columns=dists_columns
)

dists_df.head()

Unnamed: 0,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15
0,0.434768,0.440013,0.381983,0.425538,0.542192,0.439351,0.414308,0.454437,0.539391,0.579506,0.5021,0.501045,0.576863,0.118573,0.473099
1,0.283319,0.296261,0.201951,0.150595,0.425286,0.27634,0.250033,0.319698,0.446933,0.339266,0.423626,0.375876,0.483084,0.2932,0.334597
2,0.325209,0.322879,0.168358,0.335173,0.458011,0.322384,0.288049,0.34947,0.477249,0.54398,0.434918,0.395001,0.498516,0.162115,0.361589
3,0.308098,0.314921,0.09822,0.308009,0.435133,0.313974,0.275435,0.326979,0.46662,0.516335,0.446583,0.38997,0.450244,0.287323,0.354193
4,0.227377,0.191019,0.126388,0.21403,0.36565,0.218713,0.111942,0.223434,0.394221,0.467461,0.351766,0.302081,0.422501,0.327182,0.257176


In [21]:
# Join clusters' distances to posts' features
posts_features = pd.concat((posts_features, dists_df), axis=1)

In [22]:
posts_features.head()

Unnamed: 0,post_id,text,topic,TextCluster,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15
0,1,UK economy facing major risks\n\nThe UK manufa...,business,13,0.434768,0.440013,0.381983,0.425538,0.542192,0.439351,0.414308,0.454437,0.539391,0.579506,0.5021,0.501045,0.576863,0.118573,0.473099
1,2,Aids and climate top Davos agenda\n\nClimate c...,business,3,0.283319,0.296261,0.201951,0.150595,0.425286,0.27634,0.250033,0.319698,0.446933,0.339266,0.423626,0.375876,0.483084,0.2932,0.334597
2,3,Asian quake hits European shares\n\nShares in ...,business,13,0.325209,0.322879,0.168358,0.335173,0.458011,0.322384,0.288049,0.34947,0.477249,0.54398,0.434918,0.395001,0.498516,0.162115,0.361589
3,4,India power shares jump on debut\n\nShares in ...,business,2,0.308098,0.314921,0.09822,0.308009,0.435133,0.313974,0.275435,0.326979,0.46662,0.516335,0.446583,0.38997,0.450244,0.287323,0.354193
4,5,Lacroix label bought by US firm\n\nLuxury good...,business,6,0.227377,0.191019,0.126388,0.21403,0.36565,0.218713,0.111942,0.223434,0.394221,0.467461,0.351766,0.302081,0.422501,0.327182,0.257176


In [23]:
# Combine feed data and posts' features into one dataframe
df = pd.merge(feed_df, posts_features,
              how='left', 
              on='post_id')
df.head()

Unnamed: 0,timestamp,user_id,post_id,action,text,topic,TextCluster,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15
0,2021-10-19 09:44:44,51461,2173,view,Anti-tremor mouse stops PC shakes\n\nA special...,tech,0,0.09858,0.251744,0.258878,0.248599,0.412333,0.27113,0.211757,0.286883,0.415042,0.499767,0.398505,0.34515,0.468345,0.353714,0.303898
1,2021-10-19 09:44:56,51461,7078,view,Synopsis Correction: The ending does not show ...,movie,1,0.288051,0.088727,0.261462,0.273844,0.380222,0.239673,0.154983,0.247371,0.394274,0.483752,0.371346,0.178991,0.426562,0.377144,0.14339
2,2021-10-19 09:46:56,51461,6730,view,watch a team of bomb disposal experts in Iraq ...,movie,14,0.377099,0.211101,0.354432,0.354406,0.453957,0.307506,0.278052,0.343412,0.421971,0.544852,0.416608,0.307127,0.510195,0.442829,0.096493
3,2021-10-19 09:49:06,51461,4360,view,If this is the authors and directors idea of a...,movie,14,0.34182,0.171946,0.31613,0.306209,0.428545,0.292056,0.233536,0.309717,0.376987,0.525275,0.406596,0.292362,0.488435,0.413484,0.154732
4,2021-10-19 09:51:17,51461,2300,view,Web photo storage market hots up\n\nAn increas...,tech,0,0.081961,0.290732,0.284872,0.312687,0.436966,0.324163,0.253312,0.32277,0.450407,0.514804,0.436208,0.374345,0.498826,0.39831,0.33453


In [24]:
# Extract potentially useful features from timestamp (e.g. hour, month)
df['hour'] = pd.to_datetime(df['timestamp']).apply(lambda x: x.hour)
df['month'] = pd.to_datetime(df['timestamp']).apply(lambda x: x.month)

df.head()

Unnamed: 0,timestamp,user_id,post_id,action,text,topic,TextCluster,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15,hour,month
0,2021-10-19 09:44:44,51461,2173,view,Anti-tremor mouse stops PC shakes\n\nA special...,tech,0,0.09858,0.251744,0.258878,0.248599,0.412333,0.27113,0.211757,0.286883,0.415042,0.499767,0.398505,0.34515,0.468345,0.353714,0.303898,9,10
1,2021-10-19 09:44:56,51461,7078,view,Synopsis Correction: The ending does not show ...,movie,1,0.288051,0.088727,0.261462,0.273844,0.380222,0.239673,0.154983,0.247371,0.394274,0.483752,0.371346,0.178991,0.426562,0.377144,0.14339,9,10
2,2021-10-19 09:46:56,51461,6730,view,watch a team of bomb disposal experts in Iraq ...,movie,14,0.377099,0.211101,0.354432,0.354406,0.453957,0.307506,0.278052,0.343412,0.421971,0.544852,0.416608,0.307127,0.510195,0.442829,0.096493,9,10
3,2021-10-19 09:49:06,51461,4360,view,If this is the authors and directors idea of a...,movie,14,0.34182,0.171946,0.31613,0.306209,0.428545,0.292056,0.233536,0.309717,0.376987,0.525275,0.406596,0.292362,0.488435,0.413484,0.154732,9,10
4,2021-10-19 09:51:17,51461,2300,view,Web photo storage market hots up\n\nAn increas...,tech,0,0.081961,0.290732,0.284872,0.312687,0.436966,0.324163,0.253312,0.32277,0.450407,0.514804,0.436208,0.374345,0.498826,0.39831,0.33453,9,10


In [25]:
df.set_index(df['user_id'].apply(str) + '_' + df['post_id'].apply(str), inplace=True)

In [26]:
# Sort dataframe by timestamp in ascending order
df.sort_values('timestamp', ascending=True, inplace=True)
df = df[~df.index.duplicated(keep='last')]
df.head()

Unnamed: 0,timestamp,user_id,post_id,action,text,topic,TextCluster,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15,hour,month
87173_6030,2021-10-01 06:01:40,87173,6030,view,"Please, do not waste your time and money with ...",movie,14,0.32644,0.176283,0.299701,0.297541,0.421611,0.273023,0.234267,0.311888,0.411274,0.509769,0.37428,0.297296,0.447655,0.383495,0.144543,6,10
107767_685,2021-10-01 06:01:52,107767,685,view,US afflicted with awards fatigue\n\nThe film w...,entertainment,8,0.477216,0.449788,0.466476,0.439238,0.582331,0.463687,0.45545,0.507981,0.191401,0.572726,0.554835,0.50599,0.632575,0.528009,0.408999,6,10
128167_6206,2021-10-01 06:01:52,128167,6206,view,I have just recently purchased collection one ...,movie,1,0.30338,0.135984,0.284932,0.288045,0.401662,0.260593,0.188604,0.272379,0.414791,0.504573,0.396075,0.281044,0.467373,0.383496,0.242414,6,10
134967_5272,2021-10-01 06:01:52,134967,5272,view,"What can you say about a grainy, poorly filmed...",movie,6,0.271094,0.140489,0.223548,0.255108,0.354056,0.225629,0.10749,0.205543,0.364864,0.491396,0.366321,0.238344,0.356339,0.348685,0.189012,6,10
141770_583,2021-10-01 06:01:52,141770,583,view,Oscar nominee Dan OHerlihy dies\n\nIrish actor...,entertainment,8,0.382911,0.302588,0.358331,0.362262,0.465927,0.337431,0.300863,0.36422,0.171244,0.555855,0.427678,0.376916,0.521437,0.43474,0.310235,6,10


In [27]:
# Replace 'action' entries with 1('like') and 0(others)
df['action'] = np.where(df['action'] == 'like', 1, 0)
df.head()

Unnamed: 0,timestamp,user_id,post_id,action,text,topic,TextCluster,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15,hour,month
87173_6030,2021-10-01 06:01:40,87173,6030,0,"Please, do not waste your time and money with ...",movie,14,0.32644,0.176283,0.299701,0.297541,0.421611,0.273023,0.234267,0.311888,0.411274,0.509769,0.37428,0.297296,0.447655,0.383495,0.144543,6,10
107767_685,2021-10-01 06:01:52,107767,685,0,US afflicted with awards fatigue\n\nThe film w...,entertainment,8,0.477216,0.449788,0.466476,0.439238,0.582331,0.463687,0.45545,0.507981,0.191401,0.572726,0.554835,0.50599,0.632575,0.528009,0.408999,6,10
128167_6206,2021-10-01 06:01:52,128167,6206,0,I have just recently purchased collection one ...,movie,1,0.30338,0.135984,0.284932,0.288045,0.401662,0.260593,0.188604,0.272379,0.414791,0.504573,0.396075,0.281044,0.467373,0.383496,0.242414,6,10
134967_5272,2021-10-01 06:01:52,134967,5272,0,"What can you say about a grainy, poorly filmed...",movie,6,0.271094,0.140489,0.223548,0.255108,0.354056,0.225629,0.10749,0.205543,0.364864,0.491396,0.366321,0.238344,0.356339,0.348685,0.189012,6,10
141770_583,2021-10-01 06:01:52,141770,583,0,Oscar nominee Dan OHerlihy dies\n\nIrish actor...,entertainment,8,0.382911,0.302588,0.358331,0.362262,0.465927,0.337431,0.300863,0.36422,0.171244,0.555855,0.427678,0.376916,0.521437,0.43474,0.310235,6,10


In [28]:
df.shape

(4261538, 24)

In [29]:
# Join user data to our dataframe
df = df.merge(users_df, 
              how='left', 
              on='user_id')
df.head()

Unnamed: 0,timestamp,user_id,post_id,action,text,topic,TextCluster,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15,hour,month,gender,age,country,city,exp_group,os,source
0,2021-10-01 06:01:40,87173,6030,0,"Please, do not waste your time and money with ...",movie,14,0.32644,0.176283,0.299701,0.297541,0.421611,0.273023,0.234267,0.311888,0.411274,0.509769,0.37428,0.297296,0.447655,0.383495,0.144543,6,10,1,36,Russia,Novokuznetsk,0,Android,ads
1,2021-10-01 06:01:52,107767,685,0,US afflicted with awards fatigue\n\nThe film w...,entertainment,8,0.477216,0.449788,0.466476,0.439238,0.582331,0.463687,0.45545,0.507981,0.191401,0.572726,0.554835,0.50599,0.632575,0.528009,0.408999,6,10,0,31,Russia,Rostov,4,Android,organic
2,2021-10-01 06:01:52,128167,6206,0,I have just recently purchased collection one ...,movie,1,0.30338,0.135984,0.284932,0.288045,0.401662,0.260593,0.188604,0.272379,0.414791,0.504573,0.396075,0.281044,0.467373,0.383496,0.242414,6,10,1,18,Russia,Vladivostok,2,iOS,organic
3,2021-10-01 06:01:52,134967,5272,0,"What can you say about a grainy, poorly filmed...",movie,6,0.271094,0.140489,0.223548,0.255108,0.354056,0.225629,0.10749,0.205543,0.364864,0.491396,0.366321,0.238344,0.356339,0.348685,0.189012,6,10,1,21,Russia,Moscow,3,Android,organic
4,2021-10-01 06:01:52,141770,583,0,Oscar nominee Dan OHerlihy dies\n\nIrish actor...,entertainment,8,0.382911,0.302588,0.358331,0.362262,0.465927,0.337431,0.300863,0.36422,0.171244,0.555855,0.427678,0.376916,0.521437,0.43474,0.310235,6,10,0,27,Russia,Astrakhan,0,Android,organic


In [30]:
# Sort dataframe by user_id and set new index
df.sort_values('user_id', inplace=True)
df.set_index(['user_id', 'post_id'], inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,timestamp,action,text,topic,TextCluster,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15,hour,month,gender,age,country,city,exp_group,os,source
user_id,post_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
240,5087,2021-12-08 16:08:41,0,I cant really criticize this film. It is liter...,movie,14,0.342826,0.192005,0.30487,0.32941,0.424274,0.26855,0.234994,0.306012,0.36392,0.524997,0.389967,0.288052,0.473445,0.402879,0.10173,16,12,1,20,Belarus,Smarhon’,0,Android,ads
240,68,2021-12-23 08:43:14,0,French suitor holds LSE meeting\n\nEuropean st...,business,2,0.329821,0.302287,0.115646,0.298119,0.432305,0.295643,0.257895,0.328571,0.463426,0.51976,0.425976,0.38207,0.482498,0.350224,0.34482,8,12,1,20,Belarus,Smarhon’,0,Android,ads
240,164,2021-12-29 18:07:22,0,Cash gives way to flexible friend\n\nSpending ...,business,6,0.202642,0.187897,0.131047,0.15982,0.378171,0.204498,0.126211,0.23232,0.371622,0.419919,0.347837,0.299132,0.441345,0.268668,0.254788,18,12,1,20,Belarus,Smarhon’,0,Android,ads
240,6187,2021-11-25 07:12:32,0,STAR RATING: ***** Saturday Night **** Friday ...,movie,14,0.323926,0.196446,0.300355,0.299088,0.422905,0.285544,0.228786,0.310242,0.388511,0.518673,0.39798,0.312003,0.460451,0.397302,0.077336,7,11,1,20,Belarus,Smarhon’,0,Android,ads
240,5236,2021-12-26 14:35:23,0,If you are looking for a cinematic masterpiece...,movie,11,0.418514,0.266695,0.413834,0.420044,0.510879,0.395631,0.351193,0.414798,0.458193,0.588772,0.480858,0.163023,0.557283,0.486204,0.247365,14,12,1,20,Belarus,Smarhon’,0,Android,ads


In [31]:
# Remove irrelevant columns, but keep "timestamp"
df.drop('text', axis=1, inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,timestamp,action,topic,TextCluster,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15,hour,month,gender,age,country,city,exp_group,os,source
user_id,post_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
240,5087,2021-12-08 16:08:41,0,movie,14,0.342826,0.192005,0.30487,0.32941,0.424274,0.26855,0.234994,0.306012,0.36392,0.524997,0.389967,0.288052,0.473445,0.402879,0.10173,16,12,1,20,Belarus,Smarhon’,0,Android,ads
240,68,2021-12-23 08:43:14,0,business,2,0.329821,0.302287,0.115646,0.298119,0.432305,0.295643,0.257895,0.328571,0.463426,0.51976,0.425976,0.38207,0.482498,0.350224,0.34482,8,12,1,20,Belarus,Smarhon’,0,Android,ads
240,164,2021-12-29 18:07:22,0,business,6,0.202642,0.187897,0.131047,0.15982,0.378171,0.204498,0.126211,0.23232,0.371622,0.419919,0.347837,0.299132,0.441345,0.268668,0.254788,18,12,1,20,Belarus,Smarhon’,0,Android,ads
240,6187,2021-11-25 07:12:32,0,movie,14,0.323926,0.196446,0.300355,0.299088,0.422905,0.285544,0.228786,0.310242,0.388511,0.518673,0.39798,0.312003,0.460451,0.397302,0.077336,7,11,1,20,Belarus,Smarhon’,0,Android,ads
240,5236,2021-12-26 14:35:23,0,movie,11,0.418514,0.266695,0.413834,0.420044,0.510879,0.395631,0.351193,0.414798,0.458193,0.588772,0.480858,0.163023,0.557283,0.486204,0.247365,14,12,1,20,Belarus,Smarhon’,0,Android,ads


In [32]:
# Since the data is time-structured, we'll split the data by time for validation
# Find max and min timestamp

max(df.timestamp), min(df.timestamp)

(Timestamp('2021-12-29 23:43:15'), Timestamp('2021-10-01 06:01:40'))

In [33]:
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score

cat_cols = [
    'topic', 'TextCluster', 'gender', 'country',
    'city', 'exp_group', 'hour', 'month',
    'os', 'source'
]

# Define our time threshold as December 15, 2021, for example.
# We'll use the features obtained before the threshold to train our model and then test it on data obtained after that date.
df_train = (df[df.timestamp < '2021-12-15']).drop('timestamp', axis=1)
df_test = (df[df.timestamp >= '2021-12-15']).drop('timestamp', axis=1)

X_train= df_train.drop('action', axis=1)
X_test = df_test.drop('action', axis=1)

y_train = df_train['action']
y_test = df_test['action']

In [34]:
# Use CatBoostClassifier
model = CatBoostClassifier(
    iterations=100,
    depth=3,
    cat_features=cat_cols
)
model.fit(X_train, y_train)
print(f"ROC-AUC score on train: {roc_auc_score(y_train, model.predict_proba(X_train)[:, 1])}")
print(f"ROC-AUC score on test: {roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])}") 

Learning rate set to 0.5
0:	learn: 0.4235559	total: 1.81s	remaining: 2m 59s
1:	learn: 0.3691825	total: 3.4s	remaining: 2m 46s
2:	learn: 0.3564331	total: 4.86s	remaining: 2m 37s
3:	learn: 0.3533559	total: 6.08s	remaining: 2m 25s
4:	learn: 0.3483990	total: 7.34s	remaining: 2m 19s
5:	learn: 0.3471346	total: 8.59s	remaining: 2m 14s
6:	learn: 0.3465991	total: 9.82s	remaining: 2m 10s
7:	learn: 0.3463477	total: 11s	remaining: 2m 6s
8:	learn: 0.3461805	total: 12.3s	remaining: 2m 4s
9:	learn: 0.3460688	total: 13.4s	remaining: 2m 1s
10:	learn: 0.3459832	total: 14.7s	remaining: 1m 58s
11:	learn: 0.3445850	total: 15.9s	remaining: 1m 56s
12:	learn: 0.3436191	total: 17.4s	remaining: 1m 56s
13:	learn: 0.3435405	total: 18.7s	remaining: 1m 55s
14:	learn: 0.3433981	total: 19.9s	remaining: 1m 52s
15:	learn: 0.3429360	total: 21.1s	remaining: 1m 50s
16:	learn: 0.3429009	total: 22.3s	remaining: 1m 48s
17:	learn: 0.3428678	total: 23.5s	remaining: 1m 46s
18:	learn: 0.3427617	total: 24.7s	remaining: 1m 45s
19:

In [None]:
# import seaborn as sns
# import numpy as np
# import matplotlib.pyplot as plt

# def plot_feature_importance(importance, names, model_type):
    
#     #Create arrays from feature importance and feature names
#     feature_importance = np.array(importance)
#     feature_names = np.array(names)
    
#     #Create a DataFrame using a Dictionary
#     data={'feature_names':feature_names,'feature_importance':feature_importance}
#     fi_df = pd.DataFrame(data)
    
#     #Sort the DataFrame in order decreasing feature importance
#     fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
    
#     #Define size of bar plot
#     plt.figure(figsize=(10,8))
#     #Plot Searborn bar chart
#     sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
#     #Add chart labels
#     plt.title(model_type + 'FEATURE IMPORTANCE')
#     plt.xlabel('FEATURE IMPORTANCE')
#     plt.ylabel('FEATURE NAMES')
    
# plot_feature_importance(catboost.feature_importances_,X_train.columns,'Catboost')

In [35]:
### Save the model

model.save_model(
    'catboost_model_tfidf',
    format="cbm"                  
)

In [38]:
### Put the posts_features to DataBase.

posts_features.to_sql(    
   "pg_posts_features_tfidf",                    
    con="postgresql://robot-startml-ro:pheiph0hahj1Vaif@"
        "postgres.lab.karpov.courses:6432/startml",                      
    schema="public",                   
    if_exists='replace',
    index=False
   )                              
        

In [39]:
### Ensure if post_features have been correctly uploaded.

test_ = pd.read_sql(
    """
    SELECT * 
    FROM public.pg_posts_features_tfidf
    """,
    
    con="postgresql://robot-startml-ro:pheiph0hahj1Vaif@"
        "postgres.lab.karpov.courses:6432/startml"
)

test_

Unnamed: 0,post_id,text,topic,TextCluster,DistanceToCluster1,DistanceToCluster2,DistanceToCluster3,DistanceToCluster4,DistanceToCluster5,DistanceToCluster6,DistanceToCluster7,DistanceToCluster8,DistanceToCluster9,DistanceToCluster10,DistanceToCluster11,DistanceToCluster12,DistanceToCluster13,DistanceToCluster14,DistanceToCluster15
0,1,UK economy facing major risks\n\nThe UK manufa...,business,13,0.434768,0.440013,0.381983,0.425538,0.542192,0.439351,0.414308,0.454437,0.539391,0.579506,0.502100,0.501045,0.576863,0.118573,0.473099
1,2,Aids and climate top Davos agenda\n\nClimate c...,business,3,0.283319,0.296261,0.201951,0.150595,0.425286,0.276340,0.250033,0.319698,0.446933,0.339266,0.423626,0.375876,0.483084,0.293200,0.334597
2,3,Asian quake hits European shares\n\nShares in ...,business,13,0.325209,0.322879,0.168358,0.335173,0.458011,0.322384,0.288049,0.349470,0.477249,0.543980,0.434918,0.395001,0.498516,0.162115,0.361589
3,4,India power shares jump on debut\n\nShares in ...,business,2,0.308098,0.314921,0.098220,0.308009,0.435133,0.313974,0.275435,0.326979,0.466620,0.516335,0.446583,0.389970,0.450244,0.287323,0.354193
4,5,Lacroix label bought by US firm\n\nLuxury good...,business,6,0.227377,0.191019,0.126388,0.214030,0.365650,0.218713,0.111942,0.223434,0.394221,0.467461,0.351766,0.302081,0.422501,0.327182,0.257176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7018,7315,"OK, I would not normally watch a Farrelly brot...",movie,11,0.392529,0.223144,0.383168,0.373347,0.468023,0.343133,0.314127,0.369652,0.487145,0.556810,0.458426,0.121244,0.528805,0.460692,0.312710
7019,7316,I give this movie 2 stars purely because of it...,movie,11,0.337854,0.191920,0.329814,0.337992,0.434906,0.323969,0.251392,0.317179,0.430117,0.533833,0.419855,0.083198,0.494837,0.418254,0.241527
7020,7317,I cant believe this film was allowed to be mad...,movie,14,0.322063,0.166311,0.302322,0.309746,0.401890,0.274997,0.201729,0.275811,0.395539,0.522645,0.407111,0.284097,0.466161,0.408749,0.068664
7021,7318,The version I saw of this film was the Blockbu...,movie,14,0.339304,0.186661,0.317661,0.316763,0.434467,0.293464,0.245071,0.319898,0.356548,0.514925,0.408527,0.286156,0.488112,0.408439,0.168844


In [30]:
import hashlib

def get_exp_group(id, salt='my_salt', ab_size=0.5):
    encoded_id = int(hashlib.md5((str(id) + 'my_salt').encode()).hexdigest()[:6], 16) / 0xFFFFFF
    if encoded_id > ab_size:
        return 'test'
    else:
        return 'control'

In [33]:
users_df['ab_group'] = users_df.user_id.apply(get_exp_group)
users_df

Unnamed: 0,user_id,gender,age,country,city,exp_group,os,source,ab_group
0,200,1,34,Russia,Degtyarsk,3,Android,ads,test
1,201,0,37,Russia,Abakan,0,Android,ads,control
2,202,1,17,Russia,Smolensk,4,Android,ads,test
3,203,0,18,Russia,Moscow,1,iOS,ads,control
4,204,0,36,Russia,Anzhero-Sudzhensk,3,Android,ads,control
...,...,...,...,...,...,...,...,...,...
163200,168548,0,36,Russia,Kaliningrad,4,Android,organic,test
163201,168549,0,18,Russia,Tula,2,Android,organic,test
163202,168550,1,41,Russia,Yekaterinburg,4,Android,organic,control
163203,168551,0,38,Russia,Moscow,3,iOS,organic,test
