In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os 
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tools.sm_exceptions import ConvergenceWarning

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore", message="Non-stationary starting autoregressive parameters found")
warnings.filterwarnings("ignore", message="Non-invertible starting MA parameters found")

In [3]:
from config import ARTICLE_SUMMARIES_FILE, SARIMAX_SUMMARY_CLUSTER_DIR
from meta_data_utils import sanitize_filename, get_meta_data, get_cat_to_articles
from data_utils import load_data, get_date_columns, check_stationarity, get_page_to_article_domain_mapping, filter_articles, get_page_info_df
from plot_utils import plot_stacked_time_series

In [4]:
raw_data = load_data(transpose=False, remove_inactive_articles=True)
date_columns = get_date_columns(raw_data)

In [5]:
page_info_df = get_page_info_df()

In [6]:
# merged_df = pd.merge(page_info_df[['page', 'article']], raw_data, on='page', how='inner')
# merged_df.drop('', axis=1, inplace=True)
df = raw_data.copy()
df.set_index('page', inplace=True)
df = df.T
df.index = pd.to_datetime(df.index)
df.head()

page,2NE1_zh.wikipedia.org_all-access_spider,2PM_zh.wikipedia.org_all-access_spider,3C_zh.wikipedia.org_all-access_spider,4minute_zh.wikipedia.org_all-access_spider,5566_zh.wikipedia.org_all-access_spider,A'N'D_zh.wikipedia.org_all-access_spider,AKB48_zh.wikipedia.org_all-access_spider,ASCII_zh.wikipedia.org_all-access_spider,Ahq_e-Sports_Club_zh.wikipedia.org_all-access_spider,All_your_base_are_belong_to_us_zh.wikipedia.org_all-access_spider,...,Transgénero_es.wikipedia.org_all-access_spider,Edad_Contemporánea_es.wikipedia.org_all-access_spider,Salvador_Dalí_es.wikipedia.org_all-access_spider,Soraya_Jiménez_es.wikipedia.org_all-access_spider,Día_Internacional_del_Beso_es.wikipedia.org_all-access_spider,Chichén_Itzá_es.wikipedia.org_all-access_spider,Fecundación_es.wikipedia.org_all-access_spider,Gran_Hermano_VIP_(España)_es.wikipedia.org_all-access_spider,Modelo_atómico_de_Thomson_es.wikipedia.org_all-access_spider,Copa_América_2019_es.wikipedia.org_all-access_spider
2015-07-01,18.0,11.0,1.0,35.0,12.0,118.0,5.0,6.0,2.0,2.0,...,4.0,21.0,23.0,3.0,1.0,8.0,29.0,4.0,0.0,3.0
2015-07-02,11.0,14.0,0.0,13.0,7.0,26.0,23.0,3.0,1.0,5.0,...,3.0,32.0,40.0,7.0,3.0,13.0,16.0,25.0,2.0,10.0
2015-07-03,5.0,15.0,1.0,10.0,4.0,30.0,14.0,5.0,4.0,5.0,...,4.0,38.0,55.0,6.0,8.0,19.0,6.0,7.0,6.0,41.0
2015-07-04,13.0,18.0,1.0,94.0,5.0,24.0,12.0,12.0,4.0,1.0,...,17.0,21.0,32.0,3.0,3.0,14.0,11.0,11.0,6.0,17.0
2015-07-05,14.0,11.0,0.0,4.0,20.0,29.0,9.0,6.0,2.0,3.0,...,28.0,28.0,42.0,3.0,6.0,6.0,33.0,6.0,7.0,16.0


### Get Wikipedia Article Summary 

In [7]:
summaries_df = pd.read_csv(ARTICLE_SUMMARIES_FILE)

### Topic Modeling and Group Similar Articles

In [9]:
import re
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from gensim.parsing.preprocessing import remove_stopwords
from collections import defaultdict
from nltk.corpus import stopwords
import nltk

# Download NLTK stopwords
nltk.download('stopwords')

# Preprocessing 
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    text = str(text)
    text = text.lower()
    text = re.sub(r'\b\w{1,2}\b', '', text)  # Remove short words
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

# Preprocess summaries
summaries_df['processed_summary'] = summaries_df['summary'].apply(preprocess_text)

# Vectorize the preprocessed summaries using TF-IDF
vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words='english')
tfidf_matrix = vectorizer.fit_transform(summaries_df['processed_summary'])

# Apply LDA to identify topics
num_topics = 20  
lda = LatentDirichletAllocation(n_components=num_topics, random_state=42)
lda.fit(tfidf_matrix)

# Get the topic distribution for each summary
topic_distribution = lda.transform(tfidf_matrix)

# Assign each summary to the most probable topic
summaries_df['topic_id'] = np.argmax(topic_distribution, axis=1)

# Group summaries by their assigned topic
topic_to_articles = defaultdict(list)
for idx, row in summaries_df.iterrows():
    topic_to_articles[row['topic_id']].append((row['article'], row['domain']))

# Function to get the top words for each topic
def get_top_words(model, feature_names, n_top_words):
    top_words = {}
    for topic_idx, topic in enumerate(model.components_):
        top_words[topic_idx] = [feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]]
    return top_words

# Get the feature names (words) from the TF-IDF vectorizer
tf_feature_names = vectorizer.get_feature_names_out()

# Get the top words for each topic
top_words = get_top_words(lda, tf_feature_names, 10)

# Print the top words for each topic
for topic, words in top_words.items():
    print(f"Topic {topic}: {', '.join(words)}")
    print("\n")

summaries_df[['article', 'domain', 'topic_id']].head()


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/ajaykarthicksenthilkumar/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Topic 0: nan, refer, python, アメリカ合衆国の俳優, みゆき, 美智子, strikeforce, xxxx, клуб, 健太郎


Topic 1: nba, golf, élection, ali, reagan, hudson, lakers, italian, yves, tor


Topic 2: series, american, film, television, created, known, written, published, based, directed


Topic 3: potter, harry, икс, google, rowling, technology, князь, computing, company, 美奈子


Topic 4: footballer, professional, plays, midfielder, club, played, attacking, актриса, striker, team


Topic 5: фильм, американский, режиссёра, роли, фильма, фильме, телесериал, года, роль, также


Topic 6: president, party, politician, served, united, states, minister, member, democratic, political


Topic 7: der, tag, kalenders, gregorianischen, jahresende, tage, somit, bleiben, zum, bis


Topic 8: que, los, del, una, por, las, como, para, fue, más


Topic 9: juegos, 日本の俳優, 日本の女優, olímpicos, refer, mediawiki, tipo, trump, movimiento, wikimedia


Topic 10: manga, shōnen, tankōbon, volumes, serialized, 日本の俳優タレント, chapters, collected, чемпи

Unnamed: 0,article,domain,topic_id
0,2NE1,zh.wikipedia.org,16
1,2PM,zh.wikipedia.org,16
2,3C,zh.wikipedia.org,11
3,4minute,zh.wikipedia.org,16
4,5566,zh.wikipedia.org,16


In [10]:
def create_page_topic_mappings(summary_df, page_info_df):
    page_to_topic = {}
    topic_to_page = defaultdict(list)

    # Create a mapping from (article, domain) to topic_id
    article_domain_to_topic = summary_df.set_index(['article', 'domain'])['topic_id'].to_dict()

    # Create page_to_topic and topic_to_page mappings
    for index, row in page_info_df.iterrows():
        article = row['article']
        domain = row['domain']
        page = row['page']
        
        if (article, domain) in article_domain_to_topic:
            topic_id = article_domain_to_topic[(article, domain)]
            page_to_topic[page] = topic_id
            topic_to_page[topic_id].append(page)

    return page_to_topic, topic_to_page

page_to_topic, topic_to_page = create_page_topic_mappings(summaries_df, page_info_df)

In [12]:
def compute_within_cluster_variance(data_df, cluster_to_pages):
    scores = defaultdict(dict)
    scaler = StandardScaler()
    # Convert data_df columns to a set for efficient look-up
    valid_columns_set = set(data_df.columns)
    
    # Compute within-cluster variance for each cluster
    for cluster_id, pages in tqdm(cluster_to_pages.items(), total=len(cluster_to_pages.items())):
        # Filter the pages that are present in the DataFrame using set intersection
        valid_pages = list(set(pages) & valid_columns_set)
        
        if len(valid_pages) < 10:
            continue    # Need at least 10 pages to compute any meaningful measure

        # Extract and standardize data
        category_data = scaler.fit_transform(data_df[valid_pages].fillna(0).T)

        # Use PCA to reduce dimensionality for large data sets
        pca = PCA(n_components=min(category_data.shape) - 1 if category_data.shape[0] > 2 else 2)
        reduced_data = pca.fit_transform(category_data)

        # Calculate variance within the cluster
        variance = np.var(reduced_data, axis=0).mean()  # Average variance across all components
        scores[cluster_id] = [variance, len(valid_pages)]

    return pd.DataFrame(scores, index=['Variance', 'Num_Pages']).T


within_cluster_variance = compute_within_cluster_variance(df, topic_to_page)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [10:53<00:00, 32.66s/it]


In [13]:
within_cluster_variance

Unnamed: 0,Variance,Num_Pages
16,1.001245,11335.0
11,1.001246,4279.0
0,1.001247,30007.0
2,1.001246,13409.0
13,1.001246,6898.0
15,1.001246,7050.0
6,1.001247,3551.0
12,1.001247,1310.0
9,1.001247,1048.0
19,1.001247,8865.0


In [15]:
def create_aggregated_df_with_proportions(data_df, topic_to_pages):
    # Initialize an empty DataFrame with the same index as data_df
    aggregated_df = pd.DataFrame(index=data_df.index)
    
    # Dictionary to store the proportion of each page in its topic aggregation
    topic_proportions = {}
    
    # Convert data_df columns to a set for efficient look-up
    valid_columns_set = set(data_df.columns)
    
    # Iterate over each topic and its pages
    for topic_id, pages in tqdm(topic_to_pages.items(), total=len(topic_to_pages.items())):
        # Filter the pages that are present in the DataFrame using set intersection
        valid_pages = list(set(pages) & valid_columns_set)
        
        if not valid_pages:
            continue
        
        # Sum the view counts of the valid pages for each day
        aggregated_df[topic_id] = data_df[valid_pages].sum(axis=1)
        
        # Calculate the proportion of each page in the aggregation
        total_views = data_df[valid_pages].sum().sum()
        topic_proportions[topic_id] = {page: data_df[page].sum() / total_views for page in valid_pages}
    
    return aggregated_df, topic_proportions

aggregated_df, topic_proportions = create_aggregated_df_with_proportions(df, topic_to_page)

aggregated_df.head()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:07<00:00,  2.53it/s]


Unnamed: 0,16,11,0,2,13,15,6,12,9,19,7,3,17,4,14,5,10,18,8,1
2015-07-01,10505023.0,1803024.0,67520051.0,12837786.0,6472189.0,7031516.0,2611946.0,937690.0,779223.0,6289589.0,813852.0,525632.0,709019.0,885449.0,1994919.0,717308.0,554581.0,2228062.0,4782959.0,477094.0
2015-07-02,10625718.0,1757603.0,70127936.0,12599544.0,5699228.0,7106207.0,2479868.0,790215.0,670003.0,6022331.0,827066.0,573199.0,718597.0,1074759.0,1964479.0,744175.0,613675.0,2180176.0,4507640.0,480480.0
2015-07-03,10601401.0,1756415.0,64999228.0,12127624.0,5776448.0,7104477.0,2359061.0,754946.0,590168.0,5602059.0,696632.0,522498.0,695350.0,879761.0,1968846.0,697943.0,559213.0,2016670.0,3815199.0,464590.0
2015-07-04,11613286.0,1880581.0,67395433.0,12707056.0,7069002.0,7593198.0,2420207.0,869897.0,599450.0,5841851.0,854808.0,536687.0,637668.0,960603.0,2047591.0,783103.0,620715.0,1982429.0,3189873.0,464309.0
2015-07-05,11691856.0,1977211.0,68364039.0,13833106.0,7074307.0,8821296.0,2543752.0,714606.0,651333.0,6173256.0,735592.0,555786.0,664333.0,946967.0,2143456.0,844864.0,673249.0,2305704.0,3732327.0,518825.0


In [17]:
def aggregate_meta_data_for_topic(topic_to_pages):
    topic_exog_features = {}

    for topic, pages in tqdm(topic_to_pages.items(), total=len(topic_to_pages.items())):
        meta_dfs = []

        for page_name in pages:
            meta_df = get_meta_data(page_name)
            if meta_df is None:
                continue
            meta_dfs.append(meta_df)

        # Aggregate meta data for all pages within a topic
        if meta_dfs:
            combined_meta_df = pd.concat(meta_dfs)
            aggregated_meta_df = combined_meta_df.groupby('date').sum()
            topic_exog_features[topic] = aggregated_meta_df

    return topic_exog_features



In [19]:
topic_exog_features = aggregate_meta_data_for_topic(topic_to_page)
first_topic = list(topic_exog_features.keys())[0]
print(topic_exog_features[first_topic].head())

  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).ffill()
  meta_df['end_of_day_size'] = meta_df['end_of_day_size'].replace(0, pd.NA).

            total_edits  total_bytes_added  unique_editors  mobile edit  \
date                                                                      
2015-07-01       3179.0           351572.0          2072.0        475.0   
2015-07-02       2898.0            58410.0          1879.0        437.0   
2015-07-03       3132.0            38202.0          1885.0        503.0   
2015-07-04       3058.0            76277.0          1831.0        585.0   
2015-07-05       3045.0            37296.0          1954.0        418.0   

            mobile web edit  visualeditor  mw-reverted  mobile app edit  \
date                                                                      
2015-07-01            410.0         170.0          0.0             65.0   
2015-07-02            403.0         137.0          0.0             34.0   
2015-07-03            466.0          65.0          0.0             37.0   
2015-07-04            532.0         120.0          0.0             53.0   
2015-07-05            37




In [22]:
os.makedirs(SARIMAX_SUMMARY_CLUSTER_DIR, exist_ok=True)

### SARIMAX Model Building

In [None]:
def fit_sarima(series, p, d, q, P, D, Q, s, exog=None):
    model = SARIMAX(series, exog=exog, order=(p, d, q), seasonal_order=(P, D, Q, s))
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=ConvergenceWarning)
            fitted_model = model.fit(disp=False)
    except Exception as e:
        print(f"Fitting SARIMA({p},{d},{q}) x ({P},{D},{Q},{s}) failed with default start parameters: {e}")
        # Retry with initial parameters set to zero
        try:
            fitted_model = model.fit(start_params=[0] * (p + q + P + Q))
        except Exception as e:
            print(f"Retry fitting SARIMA({p},{d},{q}) x ({P},{D},{Q},{s}) failed: {e}")
            return None
    return fitted_model

def find_best_sarima_model(time_series, p_values, d_values, q_values, P_values, D_values, Q_values, s, exog=None):
    best_aic = np.inf
    best_order = None
    best_seasonal_order = None
    best_model = None

    for p in p_values:
        for d in d_values:
            for q in q_values:
                for P in P_values:
                    for D in D_values:
                        for Q in Q_values:
                            try:
                                model = fit_sarima(time_series, p, d, q, P, D, Q, s, exog)
                                if model is not None and model.aic < best_aic:
                                    best_aic = model.aic
                                    best_order = (p, d, q)
                                    best_seasonal_order = (P, D, Q, s)
                                    best_model = model
                            except Exception as e:
                                print(f"Failed to fit SARIMA({p},{d},{q}) x ({P},{D},{Q},{s}): {str(e)}")
    
    return {"order": best_order, "seasonal_order": best_seasonal_order, "model": best_model, "aic": best_aic}

def get_time_series_and_exog(data, page_name):
    time_series = data[page_name]
    time_series = time_series.asfreq('D')
    
    meta_df = topic_exog_features[page_name]
    # Ensure the index is set to 'date'
    if meta_df.index.name != 'date':
        meta_df.set_index('date', inplace=True)

    # Ensure that the exog features are aligned with the time series data
    exog = meta_df.reindex(time_series.index).asfreq('D').fillna(0)
    
    
    # # Day of the week
    # day_of_week = pd.to_datetime(time_series.index).dayofweek
    # exog_dow = pd.get_dummies(day_of_week, prefix='dow').astype(int)
    # exog_dow.index = time_series.index
    
    # # Month of the year
    # month_of_year = pd.to_datetime(time_series.index).month
    # exog_month = pd.get_dummies(month_of_year, prefix='month').astype(int)
    # exog_month.index = time_series.index
    
    # # Is weekend
    # is_weekend = (day_of_week >= 5).astype(int)
    # exog_weekend = pd.DataFrame(is_weekend, index=time_series.index, columns=['is_weekend'])
    
    # # Is holiday
    # holidays = calendar().holidays(start=time_series.index.min(), end=time_series.index.max())
    # is_holiday = time_series.index.isin(holidays).astype(int)
    # exog_holiday = pd.DataFrame(is_holiday, index=time_series.index, columns=['is_holiday'])
    
    # # Combine all exogenous features
    # exog = pd.concat([exog_dow, exog_month, exog_weekend, exog_holiday], axis=1)
    
    return time_series, exog

def train_test_split(series, exog, test_size):
    train = series[:-test_size]
    test = series[-test_size:]
    train_exog = exog[:-test_size]
    test_exog = exog[-test_size:]
    return train, test, train_exog, test_exog

def process_page(page_name, data, date_columns, p_values, d_values, q_values, P_values, D_values, Q_values, S, test_size):
    print(f"Processing SARIMA models for page: {page_name}")
        
    # Get the time series data for the page
    time_series, exog = get_time_series_and_exog(data, page_name)
    
    # Skip the current iteration if no data was found
    if time_series is None:
        return

    # Determine if the series is stationary
    is_stationary = check_stationarity(time_series)

    # Adjust d_values based on stationarity
    adjusted_d_values = [0] if is_stationary else d_values

    # Split the data into training and testing sets
    train_series, test_series, train_exog, test_exog =  train_test_split(time_series, exog, test_size)

    # Find the best ARIMA model for the time series
    best_model_info = find_best_sarima_model(train_series, p_values, adjusted_d_values, q_values, P_values, D_values, Q_values, S, exog=train_exog)
    
    print(f"Best SARIMA model for {page_name}: Order={best_model_info['order']} AIC={best_model_info['aic']:.2f}")

    return best_model_info


def process_and_save_models(data, date_columns, p_values, d_values, q_values, P_values, D_values, Q_values, S, test_size):
    # Iterate over all unique pages in the DataFrame
    for page_name in data.columns:
        best_model_info = process_page(page_name, data, date_columns, p_values, d_values, q_values, P_values, D_values, Q_values, S, test_size)
        if best_model_info is not None:
            # Save each model's information into a separate pickle file
            with open(os.path.join(SARIMAX_SUMMARY_CLUSTER_DIR, f'best_agg_model_{page_name}.pkl'), 'wb') as f:
                pickle.dump(best_model_info, f)

p_values = [0]
d_values = [1]
q_values = [0]
P_values = [1]
D_values = [1]
Q_values = [0]
s = 60

test_size = 30
process_and_save_models(aggregated_df.iloc[:, :2], date_columns, p_values, d_values, q_values, P_values, D_values, Q_values, s, test_size)

Processing SARIMA models for page: 16


In [None]:
def smape(actual, forecast):
    return 100 * np.mean(2 * np.abs(forecast - actual) / (np.abs(actual) + np.abs(forecast)))

def check_residuals(model):
    residuals = model.resid
    fig, ax = plt.subplots(1, 2, figsize=(15, 6))
    residuals.plot(ax=ax[0], title="Residuals")
    residuals.plot(kind='kde', ax=ax[1], title="Density")
    plt.show()
    
def plot_forecast_vs_actual(train_series, test_series, train_exog, test_exog, model):
    fig, ax = plt.subplots(figsize=(14, 7))
    ax.plot(train_series, label='Train', color='blue', linewidth=1)
    ax.plot(test_series, label='Test', color='orange', linewidth=1)
    
    # In-sample forecast
    in_sample_forecast = model.fittedvalues
    ax.plot(in_sample_forecast, label='In-sample Forecast', color='green', linestyle='--', linewidth=2)
    
    # Out-of-sample forecast
    forecast = model.get_forecast(steps=len(test_series), exog=test_exog)
    forecast_index = pd.date_range(start=test_series.index[0], periods=len(test_series), freq='D')
    forecast_series = pd.Series(forecast.predicted_mean, index=forecast_index)
    ax.plot(forecast_series, label='Out-of-sample Forecast', color='red', linestyle='--', linewidth=2)

    sMAPE_value = smape(test_series, forecast_series)
    print(f'sMAPE: {sMAPE_value:.2f}%')
    
    # Adding titles and labels
    ax.set_title('Actual vs Forecasted Values', fontsize=16)
    ax.set_xlabel('Date', fontsize=14)
    ax.set_ylabel('Page Views', fontsize=14)
    
    # Adding legend
    ax.legend(loc='upper left', fontsize=12)
    
    # Adding grid for better readability
    ax.grid(True)
    
    # Improving the appearance
    plt.tight_layout()
    
    plt.show()

def plot_model_forecast(data, date_columns, page_name, test_size):
    try:
        with open(os.path.join(SARIMAX_CATEGORY_CLUSTER_DIR, f'best_agg_model_{page_name}.pkl'), 'rb') as f:
            best_model_info = pickle.load(f)
    except FileNotFoundError:
        print('FileNotFoundError')
        return
    
    model = best_model_info['model']
    
    time_series, exog = get_time_series_and_exog(data, page_name)
    
    if time_series is not None:
        train_series, test_series, train_exog, test_exog = train_test_split(time_series, exog, test_size)
        plot_forecast_vs_actual(train_series, test_series, train_exog, test_exog, model)
        check_residuals(model)


page_name = 11

plot_model_forecast(aggregated_df, date_columns, page_name, test_size=60)

In [10]:
np.mean([
    7.69 , 
    7.3 ,
    5.31 ,
    11.54 ,
    7.78 ,
    14.25 ,
    17.37 ,
    9.17 ,
    5.62 ,
    6.21 ,
    12.92 ,
    13.84 ,
    10.16 ,
    6.39 ,
    8.91 ,
    7.70,
    18.64 ,
    18.31 ,
    5.73 ,
    13.2 
])

10.402