## Downloading Extra Libraries

In [1]:
# !pip install git+https://github.com/siddharth17196/english-hindi-transliteration
# !pip install tqdm
# !pip install pickle

## Loading Requied Libraries

In [2]:
import pickle
import tensorflow
import numpy as np
import pandas as pd
from tqdm import tqdm
from elt import translit

to_hindi = translit('hindi')

In [3]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import save_model, load_model
from keras.models import model_from_json

imputer = SimpleImputer(missing_values=np.nan, strategy='mean')

Using TensorFlow backend.


## Reading Boy and Girl Names

In [4]:
boy_english = []
girl_english = []

with open("database/boy.txt", "r") as txt_file:
    lines = txt_file.readlines()
    for name in lines :
        boy_english.append(name[:-1].lower())
        
with open("database/girl.txt", "r") as txt_file:
    lines = txt_file.readlines()
    for name in lines :
        girl_english.append(name[:-1].lower())

## Converting Names into Regional Scirpt

In [None]:
boy_hindi = [to_hindi.convert([name])[0] for name in tqdm(boy_english)]
girl_hindi = [to_hindi.convert([name])[0] for name in tqdm(girl_english)]

 14%|██████████▉                                                                    | 533/3865 [01:55<18:12,  3.05it/s]

## Creating A Name-Gender Dataset

In [None]:
name = []
gender = []
for word in tqdm(boy_hindi) :
    name.append(word)
    gender.append(0)
    
for word in tqdm(girl_hindi) :
    name.append(word)
    gender.append(1)
    
df = pd.DataFrame(list(zip(name, gender)), columns=['NAME', 'GENDER'])
df.to_csv("database/names.csv", index=False)

## Fetching and Names and Labels

In [None]:
x = df.NAME.values.tolist()
y = df.GENDER.values.tolist()

## Padding Names With Zeros

In [None]:
for i in tqdm(range(len(x))):
    try:
        if(len(x[i]) < 20):
            x[i] = (20-len(x[i])) // 2 * '0' + x[i] + (20-len(x[i])) // 2 * '0' + len(x[i]) % 2 * '0'            
    except:
        pass

## Creating Letter-Number Mapping

In [None]:
jn="".join(x)

chars = sorted(list(set(jn)))
mapping = dict((c, i) for i, c in enumerate(chars))

file = open("mapping.pkl", "wb")
pickle.dump(mapping, file)
file.close()

# file = open("mapping.pkl", "rb")
# output = pickle.load(file)
# print(output)
# file.close()

## Performing One-Hot Encoding

In [None]:
lbl = LabelEncoder()
one = OneHotEncoder()
y=lbl.fit_transform(y)
y=one.fit_transform(y.reshape(len(y),1)).toarray()

## Converting Letters into Numbers (Vectorizing)

In [None]:
names = []
for line in x:
    encoded_seq = [mapping[char] for char in line]
    names.append(encoded_seq)
    print(encoded_seq)

In [None]:
x=np.array(names).reshape(len(names),20,1)

## Splitting 'x' and 'y' into test and train data

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2, random_state=42)

## Creating LSTM Model

In [None]:
model = Sequential()
model.add(LSTM(80, input_shape=(x.shape[1], x.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(150, input_shape=(x.shape[1], x.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(150))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(optimizer="Adam", loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

## Training LSTM Model

In [None]:
model.fit(x_train, y_train, batch_size=50, epochs=30, verbose=1, validation_data=(x_test,y_test)) 

In [None]:
accuracy = 0.0
for index in tqdm(range(len(x_test))) :
    original = int(y_test[index][1])
    pred = model.predict_classes([[x_test[index]]])[0]
    if original == pred :
        accuracy += 1

print("Accuracy: ", round(accuracy/len(x_test), 3), "%")

## Saving Model with Weights

In [None]:
from keras.models import model_from_json

model_json = model.to_json()
with open("model.json", "w") as json_file :
    json_file.write(model_json)
    
model.save_weights("weights.h5")

# Loading Model + Weights and Mapping/Vectorizer

In [None]:
file = open('model.json', 'r')
json_file = file.read()
file.close()
loaded_model = model_from_json(json_file)
loaded_model.load_weights("weights.h5")
loaded_model.compile(optimizer="Adam", loss='categorical_crossentropy', metrics=['accuracy'])

## Function to Guess the Gender

In [None]:
def gender(english) :
    name = to_hindi.convert([english])[0]
    name = (20-len(name)) // 2 * '0' + name + (20-len(name)) // 2 * '0' + len(name) % 2 * '0'
    encoded = np.array([[mapping[char] for char in name]]).reshape((1, 20, 1))
    gender = loaded_model.predict_classes(encoded)[0]
    if gender == 0 :
        return "BOY"
    else :
        return "GIRL"
   

In [None]:
names = boy_english[:10]
for name in names :
    print(name, end="")
    print(" >>> ", gender(name))

In [None]:
gender("BabiTaa")