In [1]:
!pip uninstall -y opencv-python --quiet
!pip install "opencv-python-headless<4.3" --quiet
!pip install -U "tensorflow-text==2.9.*" --quiet
!pip install tf-models-official --quiet
!pip install keras-metrics --quiet
!pip install gensim==3.8.3 --quiet
!pip install pydot --quiet
!pip install sentencepiece --quiet
!pip install transformers --quiet

[K     |████████████████████████████████| 21.6 MB 2.5 MB/s 
[K     |████████████████████████████████| 4.6 MB 5.3 MB/s 
[K     |████████████████████████████████| 511.7 MB 5.4 kB/s 
[K     |████████████████████████████████| 438 kB 94.0 MB/s 
[K     |████████████████████████████████| 1.6 MB 71.1 MB/s 
[K     |████████████████████████████████| 5.8 MB 89.1 MB/s 
[K     |████████████████████████████████| 2.1 MB 5.1 MB/s 
[K     |████████████████████████████████| 237 kB 80.4 MB/s 
[K     |████████████████████████████████| 352 kB 94.3 MB/s 
[K     |████████████████████████████████| 92 kB 13.3 MB/s 
[K     |████████████████████████████████| 636 kB 90.5 MB/s 
[K     |████████████████████████████████| 99 kB 11.1 MB/s 
[K     |████████████████████████████████| 1.1 MB 74.4 MB/s 
[K     |████████████████████████████████| 43 kB 2.2 MB/s 
[K     |████████████████████████████████| 1.2 MB 81.9 MB/s 
[?25h  Building wheel for py-cpuinfo (setup.py) ... [?25l[?25hdone
  Building wheel for

In [2]:
from google.cloud import storage
import google.oauth2.credentials
import json
import seaborn as sns

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import metrics
import tensorflow_models as tfm

from tensorflow.keras.layers import Embedding, Input, Dense, Lambda
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import tensorflow_datasets as tfds
#from keras.preprocessing.sequence import pad_sequences
import torch

import sklearn as sk
import nltk
from nltk.corpus import reuters
from nltk.data import find

import matplotlib.pyplot as plt

import re

#This continues to work with gensim 3.8.3.  It doesn't yet work with 4.x.  
#Make sure your pip install command specifies gensim==3.8.3
import gensim

from transformers import BertTokenizer, TFBertModel, XLNetTokenizer, TFXLNetForSequenceClassification, TFBertForSequenceClassification, TFRobertaForSequenceClassification, RobertaTokenizer


from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split



## Data loading

In [3]:
# Added this so I can just put the data files in my google drive and access them from there, if you have a preferred way of storing the data feel free to change
# You can download the data here: 
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
train = pd.read_csv("/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_train.csv")
test = pd.read_csv("/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_test.csv")
valid = pd.read_csv("/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_valid.csv")

In [5]:
x_train = list(train[['text']].text)
y_train = np.asarray(train[['label']].label)

x_test = list(test[['text']].text)
y_test = np.asarray(test[['label']].label)

x_valid = list(valid[['text']].text)
y_valid = np.asarray(valid[['label']].label)

## Roberta

In [6]:
roberta_tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [7]:
def create_roberta_model(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5,epsilon=1e-08)):
    """Create a roBERTa model based on the roBERTa paper:
    https://arxiv.org/pdf/1907.11692.pdf 

        - model: TFRobertaForSequenceClassification
    """
    roberta_model = TFRobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

    # Freeze all layers except the last 4 which are the pooled classification layers:
    untrainable = [w.name for w in roberta_model.weights[:-4]]
    trainable = [w.name for w in roberta_model.weights[-4:]]

    for w in roberta_model.weights:
        if w.name in untrainable:
            w._trainable = False
        elif w.name in trainable:
            w._trainable = True

    # Compile the model:
    roberta_model.compile(
        optimizer = optimizer,
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
        metrics = [tf.keras.metrics.SparseCategoricalAccuracy("accuracy")]
    )

    return roberta_model

In [8]:
def tokenize(length, data, tokenizer):
  """Tokenize text using specified tokenizer with the constraint of max_length
    - length: max length of tokenized output
    - data: text to tokenize
    - tokenizer
  """
  encodings = tokenizer(data, 
                max_length=length,
                truncation=True,
                padding='max_length', 
                return_tensors='tf')
  return encodings

def run_roberta(length, tokenizer, model_name, optimizer=None):
  """ Tokenizes, trains and evaluates roBERTa models for different max_lengths
  """
  print(f"This model will be saved as {model_name}")
  print(f'Running roBERTa for encoding max_length: {length}')
  print('Tokenizing data...')
  train_encodings_roberta = tokenize(length, x_train, tokenizer)
  valid_encodings_roberta = tokenize(length, x_valid, tokenizer)
  test_encodings_roberta = tokenize(length, x_test, tokenizer)

  print(f'Created encoding for training data with shape {train_encodings_roberta.input_ids.shape}')
  print(f'Created encoding for validation data with shape {valid_encodings_roberta.input_ids.shape}')
  print(f'Created encoding for test data with shape {test_encodings_roberta.input_ids.shape}')

  if optimizer:
    print("Using custom optimizer")
    model = create_roberta_model(optimizer=optimizer)
  else:
    model = create_roberta_model()
  print('Training model...')
  history = model.fit(
    [train_encodings_roberta.input_ids, train_encodings_roberta.attention_mask], 
    y_train,
    validation_data=(
        [valid_encodings_roberta.input_ids, valid_encodings_roberta.attention_mask], 
        y_valid
        ),
    batch_size=32, 
    epochs=4
  )

  print('Evaluating model...')
  score = model.evaluate([test_encodings_roberta.input_ids, test_encodings_roberta.attention_mask], y_test)

  print("Test loss:", score[0])
  print("Test accuracy:", score[1])

  predictions = model.predict([test_encodings_roberta.input_ids, test_encodings_roberta.attention_mask])
  preds = predictions.to_tuple()[0].argmax(1)
  print('\n Classification Report:\n')
  print(classification_report(y_test, preds))

  model.save(
    str.format("/content/drive/My Drive/models/Project W266/{name}", name = model_name),
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None,
    save_traces=True
  )


### Running for various lengths of embeddings

In [78]:
max_lengths = [64,128,256,320,384,448,512]
for length in max_lengths:
  run_roberta(length, roberta_tokenizer)

Running roBERTa for encoding max_length: 64
Tokenizing data...
Created encoding for training data with shape (47146, 64)
Created encoding for validation data with shape (5893, 64)
Created encoding for test data with shape (5894, 64)


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4








Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.579146146774292
Test accuracy: 0.7127587199211121

 Classification Report:

              precision    recall  f1-score   support

           0       0.73      0.70      0.71      2992
           1       0.70      0.73      0.71      2902

    accuracy                           0.71      5894
   macro avg       0.71      0.71      0.71      5894
weighted avg       0.71      0.71      0.71      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_64/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_64/assets


Running roBERTa for encoding max_length: 128
Tokenizing data...
Created encoding for training data with shape (47146, 128)
Created encoding for validation data with shape (5893, 128)
Created encoding for test data with shape (5894, 128)


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4








Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.5463767051696777
Test accuracy: 0.7465218901634216

 Classification Report:

              precision    recall  f1-score   support

           0       0.73      0.80      0.76      2992
           1       0.77      0.70      0.73      2902

    accuracy                           0.75      5894
   macro avg       0.75      0.75      0.75      5894
weighted avg       0.75      0.75      0.75      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_128/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_128/assets


Running roBERTa for encoding max_length: 256
Tokenizing data...
Created encoding for training data with shape (47146, 256)
Created encoding for validation data with shape (5893, 256)
Created encoding for test data with shape (5894, 256)


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4








Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.5286672711372375
Test accuracy: 0.7677298784255981

 Classification Report:

              precision    recall  f1-score   support

           0       0.78      0.76      0.77      2992
           1       0.76      0.78      0.77      2902

    accuracy                           0.77      5894
   macro avg       0.77      0.77      0.77      5894
weighted avg       0.77      0.77      0.77      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_256/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_256/assets


Running roBERTa for encoding max_length: 320
Tokenizing data...
Created encoding for training data with shape (47146, 320)
Created encoding for validation data with shape (5893, 320)
Created encoding for test data with shape (5894, 320)


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4








Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.5211576223373413
Test accuracy: 0.7702748775482178

 Classification Report:

              precision    recall  f1-score   support

           0       0.76      0.79      0.78      2992
           1       0.78      0.75      0.76      2902

    accuracy                           0.77      5894
   macro avg       0.77      0.77      0.77      5894
weighted avg       0.77      0.77      0.77      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_320/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_320/assets


Running roBERTa for encoding max_length: 384
Tokenizing data...
Created encoding for training data with shape (47146, 384)
Created encoding for validation data with shape (5893, 384)
Created encoding for test data with shape (5894, 384)


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4








Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.521388590335846
Test accuracy: 0.7714625000953674

 Classification Report:

              precision    recall  f1-score   support

           0       0.77      0.78      0.78      2992
           1       0.77      0.76      0.77      2902

    accuracy                           0.77      5894
   macro avg       0.77      0.77      0.77      5894
weighted avg       0.77      0.77      0.77      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384/assets


Running roBERTa for encoding max_length: 448
Tokenizing data...
Created encoding for training data with shape (47146, 448)
Created encoding for validation data with shape (5893, 448)
Created encoding for test data with shape (5894, 448)


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4








Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.5192633867263794
Test accuracy: 0.7706142067909241

 Classification Report:

              precision    recall  f1-score   support

           0       0.77      0.79      0.78      2992
           1       0.77      0.75      0.76      2902

    accuracy                           0.77      5894
   macro avg       0.77      0.77      0.77      5894
weighted avg       0.77      0.77      0.77      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_448/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_448/assets


Running roBERTa for encoding max_length: 512
Tokenizing data...
Created encoding for training data with shape (47146, 512)
Created encoding for validation data with shape (5893, 512)
Created encoding for test data with shape (5894, 512)


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4








Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.5182607769966125
Test accuracy: 0.7709535360336304

 Classification Report:

              precision    recall  f1-score   support

           0       0.76      0.81      0.78      2992
           1       0.79      0.73      0.76      2902

    accuracy                           0.77      5894
   macro avg       0.77      0.77      0.77      5894
weighted avg       0.77      0.77      0.77      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_512/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_512/assets


### Trying out a different learning rate

In [16]:
def create_learning_schedule(initial_learning_rate=2e-5):
  # Set up epochs and steps
  epochs = 4
  batch_size = 32

  train_data_size = len(x_train)
  steps_per_epoch = int(train_data_size / batch_size)
  num_train_steps = steps_per_epoch * epochs
  # Using 6% of the data for warm up as this is what was done in the roBERTa paper
  warmup_steps = int(0.06 * num_train_steps)

  linear_decay = tf.keras.optimizers.schedules.PolynomialDecay(
      initial_learning_rate=initial_learning_rate,
      end_learning_rate=0,
      decay_steps=num_train_steps)

  warmup_schedule = tfm.optimization.lr_schedule.LinearWarmup(
      warmup_learning_rate = 0,
      after_warmup_lr_sched = linear_decay,
      warmup_steps = warmup_steps
  )
  return warmup_schedule


In [11]:
#max_lengths = [64,128,256,320,384,448,512]
max_lengths = [384]
learning_rates = [1e-5, 2e-5, 3e-5]
for initial_learning_rate in learning_rates:
  warmup_schedule = create_learning_schedule(initial_learning_rate)
  for length in max_lengths:
    model_name = str.format('roberta_model_{length}_lr-{learning_rate}', length=384, learning_rate=initial_learning_rate)
    run_roberta(length, roberta_tokenizer, model_name, optimizer = tf.keras.optimizers.experimental.Adam(
      learning_rate = warmup_schedule))

Running roBERTa for encoding max_length: 384
Tokenizing data...
Created encoding for training data with shape (47146, 384)
Created encoding for validation data with shape (5893, 384)
Created encoding for test data with shape (5894, 384)
Using custom optimizer


Downloading:   0%|          | 0.00/627M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4
















Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.5705196261405945
Test accuracy: 0.7612826824188232

 Classification Report:

              precision    recall  f1-score   support

           0       0.77      0.75      0.76      2992
           1       0.75      0.77      0.76      2902

    accuracy                           0.76      5894
   macro avg       0.76      0.76      0.76      5894
weighted avg       0.76      0.76      0.76      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384/assets


In [17]:
max_lengths = [384]
learning_rates = [1e-5, 2e-5, 3e-5]
for initial_learning_rate in learning_rates:
  warmup_schedule = create_learning_schedule(initial_learning_rate)
  for length in max_lengths:
    model_name = str.format('roberta_model_{length}_lr-{learning_rate}', length=384, learning_rate=initial_learning_rate)
    run_roberta(length, roberta_tokenizer, model_name, optimizer = tf.keras.optimizers.experimental.Adam(
      learning_rate = warmup_schedule))

This model will be saved as roberta_model_384_lr-1e-05
Running roBERTa for encoding max_length: 384
Tokenizing data...
Created encoding for training data with shape (47146, 384)
Created encoding for validation data with shape (5893, 384)
Created encoding for test data with shape (5894, 384)
Using custom optimizer


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4
















Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.6212723851203918
Test accuracy: 0.7602646946907043

 Classification Report:

              precision    recall  f1-score   support

           0       0.76      0.77      0.77      2992
           1       0.76      0.75      0.75      2902

    accuracy                           0.76      5894
   macro avg       0.76      0.76      0.76      5894
weighted avg       0.76      0.76      0.76      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-1e-05/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-1e-05/assets


This model will be saved as roberta_model_384_lr-2e-05
Running roBERTa for encoding max_length: 384
Tokenizing data...
Created encoding for training data with shape (47146, 384)
Created encoding for validation data with shape (5893, 384)
Created encoding for test data with shape (5894, 384)
Using custom optimizer


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4
















Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.5704007148742676
Test accuracy: 0.761452317237854

 Classification Report:

              precision    recall  f1-score   support

           0       0.76      0.77      0.77      2992
           1       0.76      0.76      0.76      2902

    accuracy                           0.76      5894
   macro avg       0.76      0.76      0.76      5894
weighted avg       0.76      0.76      0.76      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-2e-05/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-2e-05/assets


This model will be saved as roberta_model_384_lr-3e-05
Running roBERTa for encoding max_length: 384
Tokenizing data...
Created encoding for training data with shape (47146, 384)
Created encoding for validation data with shape (5893, 384)
Created encoding for test data with shape (5894, 384)
Using custom optimizer


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4
















Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.5402488708496094
Test accuracy: 0.7667118906974792

 Classification Report:

              precision    recall  f1-score   support

           0       0.76      0.78      0.77      2992
           1       0.77      0.75      0.76      2902

    accuracy                           0.77      5894
   macro avg       0.77      0.77      0.77      5894
weighted avg       0.77      0.77      0.77      5894





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-3e-05/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_model_384_lr-3e-05/assets


In [9]:
# Larger data set:
train = pd.read_csv("/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_train_large.csv")
test = pd.read_csv("/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_test_large.csv")
valid = pd.read_csv("/content/drive/My Drive/Colab Notebooks/W266 Project/data/yelp_valid_large.csv")

x_train = list(train[['text']].text)
y_train = np.asarray(train[['label']].label)

x_test = list(test[['text']].text)
y_test = np.asarray(test[['label']].label)

x_valid = list(valid[['text']].text)
y_valid = np.asarray(valid[['label']].label)

len(x_train)

471465

In [10]:
max_lengths = [384]
for length in max_lengths:
  run_roberta(length, roberta_tokenizer, 'roberta_384_large')

This model will be saved as roberta_384_large
Running roBERTa for encoding max_length: 384
Tokenizing data...
Created encoding for training data with shape (471465, 384)
Created encoding for validation data with shape (58933, 384)
Created encoding for test data with shape (58934, 384)


Downloading:   0%|          | 0.00/627M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training model...
Epoch 1/4








Epoch 2/4
Epoch 3/4
Epoch 4/4
Evaluating model...
Test loss: 0.4712928533554077
Test accuracy: 0.7852004170417786

 Classification Report:

              precision    recall  f1-score   support

           0       0.78      0.80      0.79     29388
           1       0.79      0.77      0.78     29546

    accuracy                           0.79     58934
   macro avg       0.79      0.79      0.79     58934
weighted avg       0.79      0.79      0.79     58934





INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_384_large/assets


INFO:tensorflow:Assets written to: /content/drive/My Drive/models/Project W266/roberta_384_large/assets
