### The usual Kaggle thingy

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/6000-indian-food-recipes-dataset/IndianFoodDatasetXLS.xlsx
/kaggle/input/6000-indian-food-recipes-dataset/IndianFoodDatasetCSV.csv


### Install `aitextgen` package

In [3]:
!pip install -q aitextgen #install the main package

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
allennlp 2.2.0 requires transformers<4.5,>=4.1, but you have transformers 4.9.1 which is incompatible.[0m


### Load `aitextgen`

In [4]:
from aitextgen import aitextgen

### Know your GPU Config 

In [5]:
! nvidia-smi

Fri Aug  6 08:12:38 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.119.04   Driver Version: 450.119.04   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Download the 124M GPT2 Model

In [6]:
ai = aitextgen(tf_gpt2="124M", to_gpu=True)


Fetching checkpoint:   0%|          | 0.00/77.0 [00:00<?, ?it/s]

Fetching hparams.json:   0%|          | 0.00/90.0 [00:00<?, ?it/s]

Fetching model.ckpt.data-00000-of-00001:   0%|          | 0.00/498M [00:00<?, ?it/s]

Fetching model.ckpt.index:   0%|          | 0.00/5.21k [00:00<?, ?it/s]

Fetching model.ckpt.meta:   0%|          | 0.00/471k [00:00<?, ?it/s]

Converting TensorFlow checkpoint from /kaggle/working/aitextgen/124M
Loading TF weight model/h0/attn/c_attn/b with shape [2304]
Loading TF weight model/h0/attn/c_attn/w with shape [1, 768, 2304]
Loading TF weight model/h0/attn/c_proj/b with shape [768]
Loading TF weight model/h0/attn/c_proj/w with shape [1, 768, 768]
Loading TF weight model/h0/ln_1/b with shape [768]
Loading TF weight model/h0/ln_1/g with shape [768]
Loading TF weight model/h0/ln_2/b with shape [768]
Loading TF weight model/h0/ln_2/g with shape [768]
Loading TF weight model/h0/mlp/c_fc/b with shape [3072]
Loading TF weight model/h0/mlp/c_fc/w with shape [1, 768, 3072]
Loading TF weight model/h0/mlp/c_proj/b with shape [768]
Loading TF weight model/h0/mlp/c_proj/w with shape [1, 3072, 768]
Loading TF weight model/h1/attn/c_attn/b with shape [2304]
Loading TF weight model/h1/attn/c_attn/w with shape [1, 768, 2304]
Loading TF weight model/h1/attn/c_proj/b with shape [768]
Loading TF weight model/h1/attn/c_proj/w with shap

Save PyTorch model to aitextgen/pytorch_model.bin
Save configuration file to aitextgen/config.json


### Read the Input Dataset

In [7]:
input_file = pd.read_csv("../input/6000-indian-food-recipes-dataset/IndianFoodDatasetCSV.csv")

### Avoid Pandas truncating strings - Remove any column width limit

In [8]:
pd.set_option('display.max_colwidth', None)

### Text Cleaning


In [9]:
input_file["TranslatedRecipeName"]  = input_file["TranslatedRecipeName"].str.replace('Recipe','').str.replace('(','').str.replace(')','')

### Assign the output to a new DataFrame

In [10]:
df = pd.DataFrame(input_file["TranslatedRecipeName"])

In [11]:
df.shape

(6871, 1)

### More Cleaning

In [12]:
df = df.assign(var1=df['TranslatedRecipeName'].str.split('-')).explode('var1')


In [13]:
df.var1 = df.var1.str.lstrip()

In [14]:
df.shape

(8783, 2)

### Save the text (only that column) as a text file 

In [15]:
df.to_csv("input_text_cleaned.txt", columns=["var1"], header=False, index=False)

### Fine-tuning - set the right parameters 

In [18]:
ai.train('input_text_cleaned.txt',
         line_by_line=False,
         from_cache=False,
         num_steps=500,
         generate_every=100,
         save_every=500,
         save_gdrive=False,
         learning_rate=1e-3,
         fp16=False,
         batch_size=1, 
         )

  0%|          | 0/8783 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

[1m100 steps reached: generating sample texts.[0m
 
Paneer Keema 
Aloo Gobi Thoran 
Kerala Style Broccoli Curry 
Kashmiri Style Masala 
Masala Curry 
Ginger Masala Puli  
Kerala Style Ragi Style Phooli Roti 
Stuffed Mushroom Pie
Baked Potatoes And Onion Gravy
Kashmiri Style Dal Chana Dal Curry
Spiced Spinach Bread 
Andhra Style Broccoli Cake 
Paneer Kofta Curry 
Soya Paratha 
Mangai Masala 
Kashmiri Style Chicken Curry
Laukiyaram Bhindi Style Soya Chutney 
Green Beans & Potato Soup 
Pasian Style Turaiyal Moongal 
Bengali Style Vankaya Chutney 
Spiced Spinach Bread Pulao 
Mooli Tukda Chutney 
Milk Pulao 
Homemade Phooli 
Kale Vadu Shaak  
Cramberry and Apple Soup 
Stuffed Bhindi Cheese Roasted Spinach 
Dry Sweet And Coconut Curry
Paneer Style
[1m200 steps reached: generating sample texts.[0m

Karnataka Style Sorghum 
Kathalandai Chivda   In Hindi
Parsalu Style Mixed Vegetable  With Spinach 
Mutton Kadhi   In Hindi
Methi & Herbed Flatbread With Garlic Mayo 
Grapes Makhani  
Spiked Bu

### Fine tuned model loading

In [20]:
ai = aitextgen(model_folder="./trained_model/", config="./trained_model/config.json", to_gpu=True)

### AI Generated Text

In [21]:
ai.generate(n=3,
            batch_size=1,
            max_length=50,
            temperature=1.0,
            top_p=0.9)

Goan Sorak Podim Recheado 
Green Chana Dal Kebab 
Maharashtrian Style Potatoes 
Aloo Matar Sabzi  
Goan Style Brinjal Poriy
Style Egg Curry 
No Onion No Garlic
Maharashtrian Kadala Curry  With Kadala
Protein Rich Creamy Tomato Kulfi 
Chakki Ki Kadhi 
Spicy Raw Tomato Curry 
Green peas And Pepper Stir Fry 
Masala Keerai Ki Sabzi 
Chaat Masala  
Makhalawali Kothavar Halwa 
Coriander Egg Curry 
C
