In [1]:
!pip uninstall -y transformers accelerate
!pip install transformers accelerate
!pip install transformers
!pip install datasets
!pip install catboost

[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.20.3-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.6/227.6 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from torch import nn
import torch
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import torch
from transformers import AutoTokenizer, BertLMHeadModel,AutoModelForSequenceClassification,TrainingArguments
from transformers import TrainingArguments, pipeline, Trainer
from datasets import load_dataset, load_metric
from datasets import Dataset, DatasetDict
from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler,random_split



In [15]:
def preprocess_aa(path):
  trump_train = pd.read_csv(path, sep='\t', names=['tweet_id' ,'user_handle','tweet_text','time_stamp','device'])
  trump_train = trump_train.loc[trump_train['device'].isin(['iphone','android'])].reset_index()
  mapping = {'android': 0, 'iphone': 1}
  # Apply the mapping to the 'device' column
  trump_train['device'] = trump_train['device'].map(mapping)
  vectorizer = TfidfVectorizer(min_df=10,max_df=100)
  X_vec = vectorizer.fit_transform(trump_train['tweet_text']).toarray()
  X = pd.DataFrame(X_vec,columns = vectorizer.get_feature_names_out())
  X.insert(0,'tweet_id',trump_train['tweet_id'])
  X.insert(1,'time_stamp',pd.to_datetime(trump_train.time_stamp, format="%Y-%m-%d %H:%M:%S"))
  X.insert(1,'time_year',X['time_stamp'].dt.year)
  X.insert(2,'time_month',X['time_stamp'].dt.month)
  X.insert(3,'time_day',X['time_stamp'].dt.dayofweek)
  X.insert(4,'time_hour',X['time_stamp'].dt.hour)
  X=X.drop(['tweet_id','time_stamp'],axis=1)
  y=trump_train['device']
  return X, y , vectorizer


def preprocess_test_aa(path,vectorizer):
  trump_test = pd.read_csv('trump_tweets_test_a.tsv', sep='\t', names=['user_handle','tweet_text','time_stamp'],quoting=3)
  # Apply the mapping to the 'device' column
  X_vec = vectorizer.transform(trump_test['tweet_text']).toarray()
  X = pd.DataFrame(X_vec,columns = vectorizer.get_feature_names_out())
  X.insert(0,'time_stamp',pd.to_datetime(trump_test.time_stamp, format="%Y-%m-%d %H:%M:%S"))
  X.insert(1,'time_year',X['time_stamp'].dt.year)
  X.insert(2,'time_month',X['time_stamp'].dt.month)
  X.insert(3,'time_day',X['time_stamp'].dt.dayofweek)
  X.insert(4,'time_hour',X['time_stamp'].dt.hour)
  X=X.drop(['time_stamp'],axis=1)
  return X

def preprocess_c(path):
  covid_df = pd.read_csv('intro2nlp_ex3_covid_train - train.csv',names=['text','label'])
  covid_df = covid_df.dropna()
  vectorizer = TfidfVectorizer(min_df=10,max_df=100)
  X_vec = vectorizer.fit_transform(covid_df['text']).toarray()
  X = pd.DataFrame(X_vec,columns = vectorizer.get_feature_names_out())
  y = covid_df['label']
  return X, y


def preprocess_test_c(path,pred=False):
  covid_df_test = pd.read_csv('intro2nlp_ex3_covid_test_a - Sheet1.csv',names=['text'])
  covid_df_test = covid_df_test.dropna()
  return covid_df_test

def predic_bert(model,covid_df_test):
  bert_model = 'onlplab/alephbert-base'
  tokenizer = AutoTokenizer.from_pretrained(bert_model)
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
  test_input_ids = []
  test_attention_masks = []
  for tweet in covid_df_test['text']:
      encoded_dict = tokenizer.encode_plus(
                          tweet,
                          add_special_tokens = True,
                          max_length = 512,
                          pad_to_max_length = True,
                          return_attention_mask = True,
                          return_tensors = 'pt',
                    )
      test_input_ids.append(encoded_dict['input_ids'])
      test_attention_masks.append(encoded_dict['attention_mask'])

  test_input_ids = torch.cat(test_input_ids, dim=0)
  test_attention_masks = torch.cat(test_attention_masks, dim=0)

  test_dataset = TensorDataset(test_input_ids, test_attention_masks)
  test_dataloader = DataLoader(
              test_dataset, # The validation samples.
              sampler = SequentialSampler(test_dataset), # Pull out batches sequentially.
              batch_size = 1 # Evaluate with this batch size.
          )
  predictions = []
  for batch in test_dataloader:
          b_input_ids = batch[0].to(device)
          b_input_mask = batch[1].to(device)
          with torch.no_grad():
              output= model(b_input_ids,
                                    token_type_ids=None,
                                    attention_mask=b_input_mask)
              logits = output.logits
              logits = logits.detach().cpu().numpy()
              pred_flat = np.argmax(logits, axis=1).flatten()
              predictions.extend(list(pred_flat))
  return predictions

def log_reg(X_train, y_train):
  model = LogisticRegression()
  model.fit(X_train, y_train)
  return model


def svm(X_train, y_train):
  model = SVC(C= 10, gamma= 0.1, kernel='rbf')
  model.fit(X_train, y_train)
  return model


def FFNN(X_train, y_train):
  from torch.utils.data import Dataset, DataLoader
  class TrumpTweetsNN(Dataset):
    def __init__(self,x,y):
      self.x = torch.tensor(x,dtype=torch.float32)
      self.y = torch.tensor(y,dtype=torch.float32)
      self.length = self.x.shape[0]
    def __getitem__(self,idx):
      return self.x[idx],self.y[idx]
    def __len__(self):
      return self.length

  dataset = TrumpTweetsNN(X_train.values,y_train.values)

  #creating the network
  class FFNN(nn.Module):
      def __init__(self, input_size, hidden_size, output_size):
          super(FFNN, self).__init__()

          # Define the layers
          self.fc1 = nn.Linear(input_size, hidden_size)
          self.relu = nn.ReLU()
          self.fc2 = nn.Linear(hidden_size, output_size)
          self.sig = nn.Sigmoid()

      def forward(self, x):
          out = self.fc1(x)
          out = self.relu(out)
          out = self.fc2(out)
          out = self.sig(out)
          return out

  # Define the training function
  def train(model, train_loader, criterion, optimizer):
      model.train()
      total_loss = 0
      total_samples = 0

      for batch, (x, y) in enumerate(train_loader):
          optimizer.zero_grad()

          y_pred = model(x).flatten()
          loss = criterion(y_pred, y)
          total_loss+=loss.item()
          total_samples += y.size(0)

          loss.backward()

          optimizer.step()
          if batch % 50 == 0 and batch!=0:
              loss, current = loss.item(), batch * batch_size
      average_loss = total_loss / total_samples
      return average_loss


  # Create DataLoader objects
  dataloader = DataLoader(dataset=dataset,shuffle=True,batch_size=100)
  # test_dataloader = DataLoader(dataset=test_dataset,shuffle=True,batch_size=100)


  # Define hyperparameters
  input_size = X_train.shape[1]
  hidden_size = 20
  output_size = 1
  learning_rate = 0.001
  num_epochs = 100


  # Create an instance of the FFNN model
  model = FFNN(input_size, hidden_size, output_size)

  # Define the loss function and optimizer
  criterion = nn.BCELoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

  # Training loop
  for epoch in range(num_epochs):
      train_loss = train(model, dataloader, criterion, optimizer)
  return model

def catboost(X_train, y_train):
  model = CatBoostClassifier()
  model.fit(X_train, y_train, verbose=False)
  return model

def train_bert(train_fn,task):
  from datasets import Dataset
  if task=='aa':
    trump_train = pd.read_csv(train_fn, sep='\t', names=['tweet_id' ,'user_handle','tweet_text','time_stamp','device'])
    trump_train = trump_train.loc[trump_train['device'].isin(['iphone','android'])].reset_index()
    mapping = {'android': 0, 'iphone': 1}
    trump_train['device'] = trump_train['device'].map(mapping)
    output_size = 2
    bert_df=pd.DataFrame(trump_train[['tweet_text','device']])
    bert_df.columns=['text','label']
    dataset = Dataset.from_pandas(bert_df)
    bert_model = "bert-base-uncased"
  else:
    covid_df = pd.read_csv(train_fn,names=['text','label'])
    covid_df = covid_df.dropna()
    output_size = 3
    dataset = Dataset.from_pandas(covid_df)
    bert_model = 'onlplab/alephbert-base'

  tokenizer = AutoTokenizer.from_pretrained(bert_model)

  def tokenize_function(examples):
      return tokenizer(examples['text'], padding="max_length", truncation=True,max_length=512)

  tokenized_datasets =  dataset.map(tokenize_function,batched = True)
  small_train_dataset = tokenized_datasets.shuffle(seed=42).select(range(len(tokenized_datasets)))

  model = AutoModelForSequenceClassification.from_pretrained(bert_model, num_labels=output_size, vocab_size=tokenizer.vocab_size, ignore_mismatched_sizes=True)
  metric = load_metric("accuracy")
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
  model.to(device)

  def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="no")
  trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    compute_metrics=compute_metrics)
  trainer.train()
  return model

In [4]:
def training_pipeline(task, alg, train_fn):
  """Returns a trained model given the specific task and algorithm.
      The pipeline should include all necessary steps that are needed for the
      specified algoritm (preprocessing, normalization, feature extraction - depending
      on your choice and decisions). Obviously, it is advised to implement the pipeline
      through a sequence of function calls.

    Args:
        task (str): 'aa' (authorship attribution) or 'c' (COVID) deternining,
                     the task of interest.
        alg (int): an integer between 1-5, indicating the algorithmic approach as
                    specified above (1: logistic regression, 2: svm, 3:FFNN, etc.).
        train_ fn (str): full path to the file containing the training data.

  """
  if task == 'aa':
    if alg==5:
      m = train_bert(train_fn,task)
    else:
      X_train, y_train,_ = preprocess_aa(train_fn)
      if alg==1:
        m = log_reg(X_train, y_train)
      elif alg==2:
        m = svm(X_train, y_train)
      elif alg==3:
        m = FFNN(X_train, y_train)
      elif alg==4:
        m = catboost(X_train, y_train)
  if task == 'c':
    if alg==5:
      m = train_bert(train_fn,task)
    else:
      X_train, y_train = preprocess_c(train_fn)
      if alg==1:
        m = log_reg(X_train, y_train)
      elif alg==2:
        m = svm(X_train, y_train)
      elif alg==3:
        m = FFNN(X_train, y_train)
      elif alg==4:
        m = catboost(X_train, y_train)

  return m

In [5]:
for i in range(1,6):
  print(training_pipeline(task='c', alg=i, train_fn='intro2nlp_ex3_covid_train - train.csv'))

LogisticRegression()
SVC(C=10, gamma=0.1)
FFNN(
  (fc1): Linear(in_features=903, out_features=20, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=20, out_features=1, bias=True)
  (sig): Sigmoid()
)
<catboost.core.CatBoostClassifier object at 0x7f6ac6f7e890>


Downloading (…)okenizer_config.json:   0%|          | 0.00/288 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt: 0.00B [00:00, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/3393 [00:00<?, ? examples/s]

Downloading pytorch_model.bin:   0%|          | 0.00/504M [00:00<?, ?B/s]

Some weights of the model checkpoint at onlplab/alephbert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base

Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]



Step,Training Loss
500,0.6405
1000,0.3211


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(52000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [6]:
for i in range(1,6):
  print(training_pipeline(task='aa', alg=i, train_fn='trump_train.tsv'))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()
SVC(C=10, gamma=0.1)
FFNN(
  (fc1): Linear(in_features=700, out_features=20, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=20, out_features=1, bias=True)
  (sig): Sigmoid()
)
<catboost.core.CatBoostClassifier object at 0x7f6aa0308340>


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt: 0.00B [00:00, ?B/s]

Downloading (…)/main/tokenizer.json: 0.00B [00:00, ?B/s]

Map:   0%|          | 0/2897 [00:00<?, ? examples/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Step,Training Loss
500,0.3053
1000,0.1583


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [7]:
def retrain_best_model(task,train_fn='trump_train.tsv'):
  """ Retrains and returns the best performing model for the specified task. The
      function uses the hard coded settings you have found to work best for each
      of the tasks.

      Args:
        task (str): 'aa' (authorship attribution) or 'c' (COVID) deternining,
                     the task of interest.
  """
  if task == 'aa':
    X_train, y_train  , vectorizer = preprocess_aa(train_fn)
    best_model = svm(X_train, y_train)
    return best_model , vectorizer
  if task == 'c':
    best_model = train_bert(train_fn,task)
    return best_model

In [24]:
def predict(m, fn,task,vectorizer=None,path_load = None):
  """ Returns a list of 0s and 1s, corresponding to the lines in the specified file.

    Args:
      m: the trained model to be used.
      fn: the full path to a file in the same format as the test set we have proveded.
  """
  if task == 'aa':
    X_test = preprocess_test_aa(fn,vectorizer)
    y_pred = m.predict(X_test)
  elif task =='c':
    X_test = preprocess_test_c(fn,pred=True)
    y_pred = predic_bert(m,X_test)
  save_model(path_load,y_pred)
  return list(y_pred) #a list containing the predictions


In [23]:
def save_model(path_load,predictions):
  with open(path_load, 'w') as file:
    # Convert each value in the list to a string and join them with spaces
    line = ' '.join(str(value) for value in predictions)
    # Write the line to the file
    file.write(line)

In [10]:
best_model_svm_aa,vectorizer_aa = retrain_best_model('aa',train_fn='trump_train.tsv')
best_model_bert_c = retrain_best_model('c',train_fn='intro2nlp_ex3_covid_train - train.csv')

Map:   0%|          | 0/3393 [00:00<?, ? examples/s]

Some weights of the model checkpoint at onlplab/alephbert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base

Step,Training Loss
500,0.6306
1000,0.3175


In [20]:
predict(best_model_svm_aa,'trump_tweets_test_a.tsv','aa',vectorizer_aa,'result_aa.txt')

[0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0]

In [25]:
predict(best_model_bert_c,'intro2nlp_ex3_covid_test_a - Sheet1.tsv','c',None,'result_c.txt')

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


[2,
 2,
 2,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 2,
 2,
 2,
 1,
 0,
 1,
 1,
 1,
 1,
 2,
 0,
 0,
 1,
 0,
 0,
 0,
 2,
 0,
 2,
 1,
 0,
 2,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 2,
 2,
 0,
 0,
 1,
 0,
 1,
 2,
 0,
 0,
 1,
 0,
 2,
 0,
 1,
 0,
 2,
 1,
 2,
 2,
 0,
 1,
 0,
 2,
 0,
 2,
 0,
 0,
 0,
 2,
 2,
 0,
 1,
 0,
 2,
 2,
 2,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 2,
 1,
 1,
 0,
 0,
 1,
 0,
 2,
 0,
 2,
 2,
 1,
 2,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 2,
 2,
 2,
 0,
 0,
 2,
 1,
 1,
 2,
 1,
 0,
 0,
 0,
 2,
 0,
 0,
 1,
 1,
 2,
 1,
 2,
 0,
 1,
 2,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 2,
 1,
 1,
 1,
 1,
 1,
 2,
 0,
 0,
 1,
 0,
 1,
 0,
 2,
 0,
 2,
 1,
 2,
 2,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 2,
 0,
 1,
 2,
 0,
 1,
 0]

In [21]:
def who_am_i():  # this is not a class method
    """Returns a ductionary with your name, id number and email. keys=['name', 'id','email']
        Make sure you return your own info!
    """
    return {'name': ['Matan Leventer','Rom Amsili'], 'id': ['208447029','316509397'], 'email': ['leventem@post.bgu.ac.il','romams@post.bgu.ac.il']}