# Setting up Dependancies

In [None]:
pip install simpletransformers

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')



Mounted at /content/gdrive


In [None]:
import os


# Training the model

In [None]:
import pandas as pd
import torch
from simpletransformers.t5 import T5Model


train_df = pd.read_csv("train_df.tsv", sep="\t").astype(str)
eval_df = pd.read_csv("eval_df.tsv", sep="\t").astype(str)
model_args = {
 "reprocess_input_data": True,
 "overwrite_output_dir": True,
 "max_seq_length": 128,
 "train_batch_size": 8,
 "num_train_epochs": 10,
 "save_eval_checkpoints": True,
 "save_steps": -1,
 "use_multiprocessing": False,
 "evaluate_during_training": True,
 "evaluate_during_training_steps": 15000,
 "evaluate_during_training_verbose": True,
 "fp16": False,
 "wandb_project": "Question Generation with T5 TCS 2",
 }

model = T5Model("t5", "t5-large", args=model_args)

model.train_model(train_df, eval_data=eval_df)

# Saving the model

In [None]:
import shutil

# Define the source and destination paths
source_folder = '/content/outputs/best_model'
destination_folder = '/content/gdrive/MyDrive/T5QA'

# Copy the folder
shutil.copytree(source_folder, destination_folder)

# Loading and testing the model

In [None]:
model2 =  T5Model('t5','/content/gdrive/MyDrive/T5QA',args = model_args)

In [None]:
from simpletransformers.t5 import T5Model
import pandas as pd
from pprint import pprint
eval_df = pd.read_csv("eval_df.tsv", sep="\t").astype(str)
model = T5Model('t5','/content/gdrive/MyDrive/T5QA')

df = pd.read_csv("eval_df.tsv", sep="\t").astype(str)
preds = model.predict(
["ask_question: " + description for description in df["input_text"].tolist()]
)

questions = df["target_text"].tolist()
df1 = pd.DataFrame(columns=['context', 'real_question', 'predicted_question'])

for i, desc in enumerate(df["input_text"].tolist()):
  df1 = df1.append({'context': desc, 'real_question': questions[i], 'predicted_question': preds[i]}, ignore_index=True)
df1.to_csv('gdrive/MyDrive/myFolder/generated_questions_sampling10epochs.csv', index=False)