
<h3> Some materials needed for this step:</h3>
    
* Finetuned model from Step 1

* Kfold dataset from Step 1
    
    
<h3> Note:</h3>
    
* The 0's fold is used for validation

* After generating Top-K candidates for training in the next step, we need to use the correlations file to add more label 1 in the training set, because although we get a very high max positive score at stage 1 for Top-K, some topics may have no label 1
    
**Reference**: https://www.kaggle.com/code/ragnar123/lecr-unsupervised-train-set-public 

In [1]:
# =========================================================================================
# Libraries
# =========================================================================================
import numpy as np
import os
import gc
import time
import math
import random
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()
import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset
from torch.utils.checkpoint import checkpoint
import tokenizers
import transformers
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_cosine_schedule_with_warmup, DataCollatorWithPadding
#import cupy as cp
from sklearn.model_selection import KFold
from sentence_transformers import SentenceTransformer
from unidecode import unidecode
import re
%env TOKENIZERS_PARALLELISM=false
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# =========================================================================================
# Configurations
# =========================================================================================
class CFG1:
    num_workers = 24
    model = 'model/stage-1-paraphrase-multilingual-mpnet-base-v2-epochs18'
    tokenizer = AutoTokenizer.from_pretrained(model)
    batch_size = 128
    top_n = 50
    seed = 42
    used_columns = ['title']
    
class CFG2:
    num_workers = 24
    model = 'model/stage-1-paraphrase-MiniLM-L12-v2-epochs-20-tuned'
    tokenizer = AutoTokenizer.from_pretrained(model)
    batch_size = 32
    top_n = 50
    seed = 42
    used_columns = ['title','text']
    
class CFG3:
    num_workers = 24
    model = 'model/stage-1-paraphrase-multilingual-mpnet-base-v2-tuned-4747'
    tokenizer = AutoTokenizer.from_pretrained(model)
    batch_size = 32
    top_n = 50
    seed = 42
    used_columns = 'title'
    
class CFG4:
    num_workers = 24
    model = 'sentence-transformers/all-MiniLM-L6-v2'
    tokenizer = AutoTokenizer.from_pretrained(model)
    batch_size = 32
    top_n = 50
    seed = 42
    used_columns = 'title'
    
class CFG5:
    num_workers = 24
    model = 'sentence-transformers/all-MiniLM-L12-v2'
    tokenizer = AutoTokenizer.from_pretrained(model)
    batch_size = 32
    top_n = 50
    seed = 42
    used_columns = 'title'
    
#class CFG6:
#    num_workers = 24
#    model = 'model/stage-1-all-MiniLM-L6-v2-epochs-10-tuned'
#    tokenizer = AutoTokenizer.from_pretrained(model)
#    batch_size = 32
#    top_n = 50
#    seed = 42
#    used_columns = 'title'  
    
class CFG7:
    num_workers = 24
    model = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
    tokenizer = AutoTokenizer.from_pretrained(model)
    batch_size = 32
    top_n = 50
    seed = 42
    used_columns = 'title'

class CFG8:
    num_workers = 24
    model = "model/stage-1-all-MiniLM-L6-v2-epochs-1-tuned/"
    tokenizer = AutoTokenizer.from_pretrained(model)
    batch_size = 128
    top_n = 50
    seed = 42
    used_columns = 'title'
    
class CFG9:
    num_workers = 24
    model = "model/paraphrase-multilingual-mpnet-base-v2-epochs-2.5-tuned/"
    tokenizer = AutoTokenizer.from_pretrained(model)
    batch_size = 64
    top_n = 50
    seed = 42
    used_columns = 'title'
    
CFG_list = [CFG9]
    
# =========================================================================================
# Data Loading
# =========================================================================================
def read_data(cfg):
    topics = pd.read_csv('data/topics.csv')
    #topics = topics[topics.category != 'source']
    content = pd.read_csv('data/content.csv')
    correlations = pd.read_csv('data/kfold_correlations.csv')
    #kfolds = cv_split(correlations, 5, 42)
    #kfolds.to_csv('kfold_correlations_subset.csv',index=False)
    correlations = correlations[correlations.fold == 0]
    topics = topics.merge(correlations, how = 'inner', left_on = 'id', right_on = 'topic_id')
    #correlations = pd.read_csv('/kaggle/input/lcrs-kfolds/kfold_correlations.csv')
    #correlations = correlations[correlations.fold == 0]
    # Fillna titles 
    #topics = preprocess(topics,['title','description'])
    #content = preprocess(content,['title','description'])
    
    topics['title'].fillna("", inplace = True)
    content['title'].fillna("", inplace = True)
    topics['description'].fillna("", inplace = True)
    content['description'].fillna("", inplace = True)
    
    content['text'].fillna("", inplace = True)
    content['license'].fillna("", inplace = True)
    topics['title'] =  '[CLS] ' + topics['title'] + ' <|=t_sep=|> '  + topics['description'] + ' [SEP]'
    content['title'] = '[CLS] ' + content['title'] + ' <|=t_sep=|> '  + content['description'] + ' <|=t_sep=|> ' + content['text'] + ' <|=t_sep=|> ' + content['kind'] + ' [SEP]'
    #corr['text1'] = corr['text1'].progress_apply(lambda x: white_spaces(x))
    #corr['text2'] = corr['text2'].progress_apply(lambda x: white_spaces(x))
    
    #topics['title'] =  '[CLS] ' + topics['title'].str.lower() + ' <|=t_sep=|> '  + topics['description'] + ' [SEP]'
    #content['title'] = '[CLS] ' + content['title'].str.lower() + ' <|=t_sep=|> ' + content['description'] + '<|=t_sep=|> ' + content['text'] + ' <|=t_sep=|> ' + content['license'] + ' [SEP]'
    #topics['title'] = topics['title'].progress_apply(lambda x: white_spaces(x))
    #content['title'] = content['title'].progress_apply(lambda x: white_spaces(x))
    
    # Sort by title length to make inference faster
    topics['length'] = topics['title'].astype(str).apply(lambda x: len(x))
    content['length'] = content['title'].astype(str).apply(lambda x: len(x))
    topics.sort_values('length', inplace = True)
    content.sort_values('length', inplace = True)
    
    #language_filtering = (topics.language.value_counts(normalize=True)>0.).reset_index()
    #language_filtering.columns=['language','shape']
    #language_filtering.loc[language_filtering["shape"] == False, "language_final"] = "Other"
    #language_filtering.loc[language_filtering["shape"] == True, "language_final"] = language_filtering['language']
    #content = content.merge(language_filtering[['language','language_final']])
    #topics = topics.merge(language_filtering[['language','language_final']])
    
    # Drop cols
    topics.drop(['description', 'channel', 'category', 'level', 'language', 'parent', 'has_content', 'length'], axis = 1, inplace = True)
    content.drop(['description', 'kind', 'language', 'text', 'copyright_holder', 'license', 'length'], axis = 1, inplace = True)
    # Reset index
    topics.reset_index(drop = True, inplace = True)
    content.reset_index(drop = True, inplace = True)
    print(' ')
    print('-' * 50)
    print(f"topics.shape: {topics.shape}")
    print(f"content.shape: {content.shape}")
    print(f"correlations.shape: {correlations.shape}")
    return topics, content, correlations

# =========================================================================================
# Prepare input, tokenize
# =========================================================================================
def prepare_input(text, cfg):
    inputs = cfg.tokenizer.encode_plus(
        text, 
        max_length = 64,
        truncation=True,
        return_tensors = None, 
        add_special_tokens = True, 
    )
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype = torch.long)
    return inputs

# =========================================================================================
# Unsupervised dataset
# =========================================================================================
class uns_dataset(Dataset):
    def __init__(self, df, cfg):
        self.cfg = cfg
        self.texts = df['title'].values
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, item):
        inputs = prepare_input(self.texts[item], self.cfg)
        return inputs
    
# =========================================================================================
# Mean pooling class
# =========================================================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

# =========================================================================================
# Unsupervised model
# =========================================================================================
class uns_model(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.config = AutoConfig.from_pretrained(cfg.model)
        self.model = AutoModel.from_pretrained(cfg.model, config = self.config)
        self.pool = MeanPooling()
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_state = outputs.last_hidden_state
        feature = self.pool(last_hidden_state, inputs['attention_mask'])
        return feature
    def forward(self, inputs):
        feature = self.feature(inputs)
        return feature
    
# =========================================================================================
# Get embeddings
# =========================================================================================
def get_embeddings(loader, model, device):
    model.eval()
    preds = []
    for step, inputs in enumerate(tqdm(loader)):
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.to('cpu').numpy())
    preds = np.concatenate(preds)
    return preds

# =========================================================================================
# Get the amount of positive classes based on the total
# =========================================================================================
def get_pos_score(y_true, y_pred):
    y_true = y_true.apply(lambda x: set(x.split()))
    y_pred = y_pred.apply(lambda x: set(x.split()))
    int_true = np.array([len(x[0] & x[1]) / len(x[0]) for x in zip(y_true, y_pred)])
    return round(np.mean(int_true), 5)

# =========================================================================================
# F2 Score 
def f2_score(y_true, y_pred):
    y_true = y_true.apply(lambda x: set(x.split()))
    y_pred = y_pred.apply(lambda x: set(x.split()))
    tp = np.array([len(x[0] & x[1]) for x in zip(y_true, y_pred)])
    fp = np.array([len(x[1] - x[0]) for x in zip(y_true, y_pred)])
    fn = np.array([len(x[0] - x[1]) for x in zip(y_true, y_pred)])
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f2 = tp / (tp + 0.2 * fp + 0.8 * fn)
    return round(f2.mean(), 4)
# ===========================================================================================


# =========================================================================================
# Build our training set
# =========================================================================================
def build_training_set(topics, content, cfg):
    # Create lists for training
    topics_ids = []
    content_ids = []
    title1 = []
    title2 = []
    targets = []
    folds = []
    # Iterate over each topic
    for k in tqdm(range(len(topics))):
        row = topics.iloc[k]
        topics_id = row['id']
        topics_title = row['title']
        predictions = row['predictions'].split(' ')
        ground_truth = row['content_ids'].split(' ')
        fold = row['fold']
        for pred in predictions:
            content_title = content.loc[pred, 'title']
            topics_ids.append(topics_id)
            content_ids.append(pred)
            title1.append(topics_title)
            title2.append(content_title)
            folds.append(fold)
            # If pred is in ground truth, 1 else 0
            if pred in ground_truth:
                targets.append(1)
            else:
                targets.append(0)
    # Build training dataset
    train = pd.DataFrame(
        {'topics_ids': topics_ids, 
         'content_ids': content_ids, 
         'title1': title1, 
         'title2': title2, 
         'target': targets,
         'fold' : folds}
    )
    # Release memory
    del topics_ids, content_ids, title1, title2, targets
    gc.collect()
    return train
    
# =========================================================================================
# Get neighbors
# =========================================================================================

def cv_split(train, n_folds, seed):
    kfold = KFold(n_splits = n_folds, shuffle = True, random_state = seed)
    for num, (train_index, val_index) in enumerate(kfold.split(train)):
        train.loc[val_index, 'fold'] = int(num)
    train['fold'] = train['fold'].astype(int)
    return train

def white_spaces(x):
    return re.sub(' +', ' ', x)

def preprocess(df,columns):
    for col in columns:
        df[col] = df[col].fillna("")
        #df[col] = df[col].str.strip('123.!? \n\t')
        #df[col] = df[col].str[:100]
    return df

env: TOKENIZERS_PARALLELISM=false


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/lib/python3.9/site-packages/transformers/tokenization_utils_base.py", line 1959, in _from_pretrained
    tokenizer = cls(*init_inputs, **init_kwargs)
  File "/home/ubuntu/anaconda3/lib/python3.9/site-packages/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py", line 155, in __init__
    super().__init__(
  File "/home/ubuntu/anaconda3/lib/python3.9/site-packages/transformers/tokenization_utils_fast.py", line 111, in __init__
    fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_433753/2586126580.py", line 99, in <module>
    class CFG8:
  File "/tmp/ipykernel_433753/2586126580.py", li

TypeError: object of type 'NoneType' has no len()

In [None]:
def get_neighbors(topics, content, cfg):
    # Create topics dataset
    topics_dataset = uns_dataset(topics, cfg)
    # Create content dataset
    content_dataset = uns_dataset(content, cfg)
    # Create topics and content dataloaders
    topics_loader = DataLoader(
        topics_dataset, 
        batch_size = cfg.batch_size, 
        shuffle = False, 
        collate_fn = DataCollatorWithPadding(tokenizer = cfg.tokenizer, padding = 'longest'),
        num_workers = cfg.num_workers, 
        pin_memory = True, 
        drop_last = False
    )
    content_loader = DataLoader(
        content_dataset, 
        batch_size = cfg.batch_size, 
        shuffle = False, 
        collate_fn = DataCollatorWithPadding(tokenizer = cfg.tokenizer, padding = 'longest'),
        num_workers = cfg.num_workers, 
        pin_memory = True, 
        drop_last = False
        )
    # Create unsupervised model to extract embeddings
    model = uns_model(cfg)
    model.to(device)
    # Predict topics
    topics_embeds = get_embeddings(topics_loader, model, device)
    content_embeds = get_embeddings(content_loader, model, device)
    return topics_embeds,content_embeds

In [None]:
topics_embeds_all = []
content_embeds_all = []
for _idx, CFG in enumerate(CFG_list):
    print(f'trying:{CFG.model}')
    topics, content, correlations = read_data(CFG)
    topics_embeds,content_embeds = get_neighbors(topics,content,CFG)
    topics_embeds_all.append(topics_embeds)
    content_embeds_all.append(content_embeds)

In [None]:
#topics_embeds = np.concatenate(topics_embeds_all,axis=1)
#content_embeds = np.concatenate(content_embeds_all,axis=1)

In [None]:
topics_embeds.shape, content_embeds.shape

In [None]:
topics.rename(columns=lambda x: "topic_" + x, inplace=True)
content.rename(columns=lambda x: "content_" + x, inplace=True)

In [None]:
%%time

from annoy import AnnoyIndex
model = AnnoyIndex(topics_embeds.shape[1], 'angular')

for idx in tqdm(range(content_embeds.shape[0])):
    model.add_item(idx, content_embeds[idx])
    
model.build(300)

In [13]:
dfs = []
for topic_idx in tqdm(range(topics_embeds.shape[0])):
    content_idx = model.get_nns_by_vector(topics_embeds[topic_idx],n=50)
    df_temp = content[content.index.isin(content_idx)]
    df_temp['topic_id'] = list(topics[topics.index.isin([topic_idx])].topic_id)[0]
    df_temp['topic_title'] = list(topics[topics.index.isin([topic_idx])].topic_title)[0]
    dfs.append(df_temp)

  0%|          | 0/12304 [00:00<?, ?it/s]

In [14]:
candidates = pd.concat(dfs).reset_index(drop=True)
aa = candidates.groupby(['topic_id'])['content_id'].agg(list).reset_index()
aa['predictions'] = aa.content_id.apply(lambda x: ' '.join(x))
aa.drop('content_id',axis=1,inplace=True)

In [15]:
topics_test = aa.merge(correlations, how = 'inner', left_on = ['topic_id'], right_on = ['topic_id'])

In [16]:
pos_score = get_pos_score(topics_test['content_ids'], topics_test['predictions'])
print(f'Our max positive score is {pos_score}')

f_score = f2_score(topics_test['content_ids'], topics_test['predictions'])
print(f'Our f2_score is {f_score}')

#Validation
#Our max positive score is 0.78403
#Our f2_score is 0.2239

Our max positive score is 0.42087
Our f2_score is 0.1054


In [None]:
Our max positive score is 0.52121
Our f2_score is 0.1356

In [None]:
topics_test['predictions'] = topics_test.predictions.apply(lambda x: x.split(' '))
topics_test['content_ids'] = topics_test.content_ids.apply(lambda x: x.split(' '))
gt = topics_test[['topic_id','content_ids','fold']].explode('content_ids')
preds = topics_test[['topic_id','predictions','fold']].explode('predictions')
candidates_df = preds.merge(gt[['topic_id','content_ids']],how='left',left_on=['topic_id','predictions'], right_on=['topic_id','content_ids'])
candidates_df.loc[candidates_df.content_ids.isnull(),'target'] = 0
['target'] = candidates_df.target.fillna(1)
candidates_df.drop('content_ids',axis=1,inplace=True)

In [79]:
candidates_df = candidates_df.rename(columns={'predictions':'content_id'})

In [81]:
candidates_df = candidates_df.merge(topics[['topic_id','topic_title']],on='topic_id')
candidates_df = candidates_df.merge(content[['content_id','content_title']],on='content_id')

In [82]:
candidates_df

Unnamed: 0,topic_id,content_id,fold,target,topic_title,content_title
0,t_00004da3a1b2,c_c1de9b7501b7,1,0.0,Откриването на резисторите,Капацитет
1,t_261fb7043ad1,c_c1de9b7501b7,3,0.0,Електричен ток и електрично напрежение,Капацитет
2,t_3a1f5ae9f991,c_c1de9b7501b7,0,1.0,Вериги с кондензатори,Капацитет
3,t_46415b46914b,c_c1de9b7501b7,4,1.0,Електростатична индукция и кондензатори,Капацитет
4,t_a76d0d45b2e9,c_c1de9b7501b7,2,0.0,Електричен ток: преговор,Капацитет
...,...,...,...,...,...,...
3075845,t_fff05585df72,c_743e6319d5ae,0,1.0,11: Systems of Equations and Inequalities,11.9: Solving Systems with Cramer's Rule
3075846,t_fff05585df72,c_d9bbe8422c6b,0,1.0,11: Systems of Equations and Inequalities,11.0: Prelude to Systems of Equations and Ineq...
3075847,t_fff9e5407d13,c_b43d07ea6eef,4,0.0,NA_U06 - El periódico,La noria
3075848,t_fff9e5407d13,c_d64037a72376,4,1.0,NA_U06 - El periódico,Introducción: El periódico


In [20]:
pd.read_csv('data/train_top50_fold0_cv_with_groundtruth_final_72044.csv')#.columns

Unnamed: 0,topics_ids,content_ids,title1,title2,target,fold
0,t_3d9ad9931021,c_8a2c8da77d0c,,Agenda,1,3
1,t_3d9ad9931021,c_3f51421a7c85,,ABCD,0,3
2,t_3d9ad9931021,c_db7818729577,,,0,3
3,t_3d9ad9931021,c_eb7d5e2e1744,,Simon,0,3
4,t_3d9ad9931021,c_60dd2fc8a271,,Ihab,0,3
...,...,...,...,...,...,...
3119822,t_70da08637930,c_70b185780f10,8.1.5 Use dot (.) and cross (x) diagrams to il...,Video No. 1: Covalent Bonding,0,2
3119823,t_70da08637930,c_40b1fea5ad01,8.1.5 Use dot (.) and cross (x) diagrams to il...,More on the dot structure for sulfur dioxide,0,2
3119824,t_70da08637930,c_a73aa42d1be9,8.1.5 Use dot (.) and cross (x) diagrams to il...,Covalent bond,0,2
3119825,t_70da08637930,c_dbce33468856,8.1.5 Use dot (.) and cross (x) diagrams to il...,Diamagnetism,0,2


In [84]:
candidates_df.columns = ['topics_ids','content_ids','fold','target','title1','title2']

In [85]:
candidates_df[['topics_ids', 'content_ids', 'title1', 'title2', 'target', 'fold']].to_parquet('data/candidates_50_train_7840.parquet')

ALL DATA
------------------------------------------------------------------
#### NO TUNE
----------------
TOP 50
/kaggle/input/sbert-models/paraphrase-multilingual-MiniLM-L12-v2
Our max positive score is 0.41649
Our f2_score is 0.1007

----------------
TOP 50
/kaggle/input/sentence-embedding-models/paraphrase-MiniLM-L12-v2
Our max positive score is 0.44421
Our f2_score is 0.1099

----------------
TOP 50
/kaggle/input/sentence-embedding-models/paraphrase-mpnet-base-v2
Our max positive score is 0.45422
Our f2_score is 0.1133

----------------
TOP 50
/kaggle/input/sbert-models/paraphrase-multilingual-mpnet-base-v2
Our max positive score is 0.42578
Our f2_score is 0.1033

---------------
TOP 50
/kaggle/input/paraphrasemultilingualmpnetbasev2/all-MiniLM-L6-v2
Our max positive score is 0.47988
Our f2_score is 0.1216

------------------------------------------------------------------
#### TUNED
----------------
TOP 50
'/kaggle/input/paraphrase-multilingual-mpnet-base-v2-tuned/paraphrase-multilingual-mpnet-base-v2-exp_fold0_epochs8'
Our max positive score is 0.68706
Our f2_score is 0.1902

----------------
TOP 50
'/kaggle/input/stage-1-tuned/paraphrase-multilingual-mpnet-base-v2-tuned' ##15 epoch
Our max positive score is 0.72044
Our f2_score is 0.201

---------------
TOP 50
'/kaggle/input/all-minilm-l6-v2-tuned/all-MiniLM-L6-v2_fold0_epochs20/all-MiniLM-L6-v2_fold0_epochs20'
Our max positive score is 0.62932
Our f2_score is 0.1713

---------------
TOP 50
'/kaggle/input/all-minilm-l6-v2-tuned/all-MiniLM-L6-v2_fold0_epochs8/all-MiniLM-L6-v2_fold0_epochs8'
Our max positive score is 0.59703
Our f2_score is 0.1607

In [6]:
# Build training set
full_correlations = pd.read_csv('/kaggle/input/all-minilm-l6-v2-tuned/kfold_correlations.csv')
topics_full = topics.merge(full_correlations, how = 'inner', left_on = ['id'], right_on = ['topic_id'])
topics_full['predictions'] = topics_full.apply(lambda x: ' '.join(list(set(x.predictions.split(' ') + x.content_ids.split(' ')))) \
                                               if x.fold != 0 else x.predictions, axis = 1)
train = build_training_set(topics_full, content, CFG)
print(f'Our training set has {len(train)} rows')
# Save train set to disk to train on another notebook
train.to_csv(f'train_top{CFG.top_n}_fold0_cv_with_groundtruth_final_72044.csv', index = False)
train.head()

  0%|          | 0/61517 [00:00<?, ?it/s]

Our training set has 3119827 rows


Unnamed: 0,topics_ids,content_ids,title1,title2,target,fold
0,t_3d9ad9931021,c_8a2c8da77d0c,,Agenda,1,3
1,t_3d9ad9931021,c_3f51421a7c85,,ABCD,0,3
2,t_3d9ad9931021,c_db7818729577,,,0,3
3,t_3d9ad9931021,c_eb7d5e2e1744,,Simon,0,3
4,t_3d9ad9931021,c_60dd2fc8a271,,Ihab,0,3
