# Modelo AWD-LSTM

In [None]:
import os
os.getcwd()

In [None]:
from fastai.text.all import *
from IPython.display import display, HTML

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
path = Path('/kaggle/working')
model_path = Path('models')

Conjunto de datos de Wikipedia en forma de tabla.

In [None]:
df = pd.read_csv('../input/es-wiki/es_wiki_dataset.csv')
df.head()

## Modelo de lenguaje con Wikipedia

In [None]:
dls_lm = DataBlock(blocks=TextBlock.from_df('text', is_lm=True),
                get_x=ColReader('text'),
                splitter=ColSplitter()).dataloaders(df, bs=128, seq_len=72)

In [None]:
dls_lm.show_batch(max_n=2)

In [None]:
learn = language_model_learner(
    dls_lm, AWD_LSTM, drop_mult=0.3, pretrained=False,
    metrics=[accuracy, Perplexity()]).to_fp16()

In [None]:
valley = learn.lr_find()

In [None]:
learn.fit_one_cycle(9, valley.valley)

In [None]:
learn.save('es_wiki_9ep')

## Fine-tune con finanzas

In [None]:
df = pd.read_csv('../input/es-wiki/books_dataset.csv')
df.head()

In [None]:
dls_finanzas = DataBlock(blocks=TextBlock.from_df('text', is_lm=True),
                    get_x=ColReader('text'),
                    splitter=ColSplitter()).dataloaders(df, bs=64, seq_len=512)

In [None]:
dls_finanzas.show_batch(max_n=3)

In [None]:
learn = language_model_learner(
    dls_finanzas, AWD_LSTM, drop_mult=0.3, pretrained=True, pretrained_fnames=['es_wiki_9ep', 'es_wiki_vocab'],
    metrics=[accuracy, Perplexity()]).to_fp16()

In [None]:
valley = learn.lr_find()

In [None]:
learn.unfreeze()

In [None]:
learn.fit_one_cycle(10, valley.valley)

In [None]:
learn.save('es_wiki_fin_10ep')

## Regresión con biografías

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
df_reg = pd.read_csv('../input/es-wiki/Etiquetado_Mano1.csv')
df_reg[['F', 'D/C', 'A/C/F', 'L', 'P', 'Ac']] = df_reg[['F', 'D/C', 'A/C/F', 'L', 'P', 'Ac']].apply(pd.to_numeric)

In [None]:
train_df, test_df = train_test_split(df_reg, test_size=0.3, random_state=21)
df = train_df
df.head()

In [None]:
train_dls = TextDataLoaders.from_df(df=train_df, valid_pct=0.1, seed=42, text_col='bio', text_vocab=dls_finanzas.vocab, bs=64, seq_len=512,
                                  label_col=['F', 'D/C', 'A/C/F', 'L', 'P', 'Ac'],
                                  y_block=RegressionBlock(n_out=6))

In [None]:
learn = text_classifier_learner(train_dls, AWD_LSTM, drop_mult=0.5,
                                n_out=6,
                                y_range=(0, 1), metrics=[rmse]
                                ).to_fp16()

In [None]:
learn.load_encoder('es_wiki_fin_encoder')

In [None]:
learn.fit_one_cycle(1, 1e-2)
learn.freeze_to(-2)
learn.fit_one_cycle(1, 1e-2)
learn.freeze_to(-3)
learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(3, 1e-2)

In [None]:
learn.fit_one_cycle(250, 1e-2)

In [None]:
learn.save('es_wiki_reg')