In [3]:
import os
import json
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf1

from tensorflow.compat.v1.keras.layers import Dense, LSTM,GRU,Bidirectional
from tensorflow.compat.v1.keras.layers import Activation, Dropout
#from keras.models import Sequential, load_model
from tensorflow.compat.v1.keras.models import Sequential, load_model



import pandas as pd

In [4]:
def vector(i, n):
    """
    Return vector of n "zeros" and a "one" at index i
    """
    tmp = np.zeros(n)
    tmp[i] = 1
    return tmp

In [5]:

def load_training(csv_file):

    data = pd.read_csv(csv_file)
    #print(data)
    print(data.head())
    print(data.shape)
    data= data.loc[data['percent']> 0.8]  #filtering names based on their prediction percentages
    print(data.shape)
    print(data.head())

    max_len = data.name.map(len).max()
    print("max_len" ,max_len)

    vocab = set(' '.join([str(i) for i in data['name']]))
    vocab.add('END')
    char_index = dict((c, i) for i, c in enumerate(vocab))
    vocab_len = len(vocab)
    print("vocab_len",vocab_len)

    msk = np.random.rand(len(data)) < 0.7
    data_train = data[msk]
    data_test = data[~msk]

    train_x = data_train['name']
    train_x = [list(i)+['END']*(max_len-len(i)) for i in train_x]
    train_x = [[vector(char_index[j], vocab_len) for j in i] for i in train_x]
    train_y = [([1, 0] if i == 'M' else [0, 1]) for i in data_train['gender']]
    train_x = np.asarray(train_x)
    train_y = np.asarray(train_y)
    print(train_x[0],train_y[0])

    print(train_x.shape,train_y.shape)
    test_x = data_test['name']
    test_x = [list(i)+['END']*(max_len-len(i)) for i in test_x]
    test_x = [[vector(char_index[j], vocab_len) for j in i] for i in test_x]
    test_y = [([1, 0] if i == 'M' else [0, 1]) for i in data_test['gender']]
    test_x = np.asarray(test_x)
    test_y = np.asarray(test_y)

    return train_x, train_y, test_x, test_y, int(max_len), int(vocab_len), char_index


In [6]:
train_x, train_y, test_x, test_y, max_len, vocab_len, char_index = load_training('E://NCS//NCS_Gender_Prediction_Kuruparan//data//name_gender.csv')



       name gender  percent
0    Abiyah      F      0.5
1    Adason      F      0.5
2  Adeyinka      F      0.5
3   Alexiel      F      0.5
4    Alique      F      0.5
(95025, 3)
(91890, 3)
       name gender   percent
3135  Emrys      M  0.800633
3136  Kasyn      M  0.800872
3137    Yer      F  0.801120
3138  Haile      F  0.801611
3139  Adean      F  0.801724
max_len 15
vocab_len 54
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 0. 0. 0. 0.

In [7]:

def save_model(model, data, network_path):
    """
    Save model architecture and weights into files
    """
    if not os.path.exists(network_path):
        os.makedirs(network_path)
    open(os.path.join(network_path, 'data.json'), 'w').write(json.dumps(data))
    model.save(os.path.join(network_path, 'server.model'))


In [8]:

def read_model(network_path):
    """
    Load model architecture and weights from files
    """
    if not os.path.exists(network_path):
        raise ValueError('Path not found : {}'.format(network_path))
    data = json.loads(open(os.path.join(network_path, 'data.json')).read())
    model = load_model(os.path.join(network_path, 'server.model'))
    return model, data


In [9]:
# build model  LSTM

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)

model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=(max_len, vocab_len)))
model.add(Dropout(0.2))
model.add(LSTM(512, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(2))
model.add(Activation('softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


# train model
model.fit(train_x, train_y, batch_size=128, epochs=200, validation_data=(test_x, test_y), callbacks=[callback])

# test model
loss, acc = model.evaluate(test_x, test_y)
print('Test loss:', loss)
print('Test accuracy:', acc)

# save model
data = {"max_len": max_len, "vocab_len": vocab_len, "char_index": char_index}
save_model(model, data, 'serving')
#model.save('Filterd_model_LSTM_'+str(acc))


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 15, 256)           318464    
_________________________________________________________________
dropout (Dropout)            (None, 15, 256)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 512)               1574912   
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 2)                 1026      
_________________________________________________________________
activation (Activation)      (None, 2)                 0         
Total params: 1,894,402
Trainable params: 1,894,402
Non-trainable params: 0
______________________________________________

Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200


Epoch 101/200
Epoch 102/200
Test loss: 0.4962320103935507
Test accuracy: 0.91901255
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: serving\server.model\assets


In [10]:
loss, acc = model.evaluate(test_x, test_y)
print('Test loss:', loss)
print('Test accuracy:', acc)



Test loss: 0.4962320103935507
Test accuracy: 0.91901255


In [11]:
# build model  GRU

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)

model = Sequential()
model.add(LSTM(512, return_sequences=True, input_shape=(max_len, vocab_len)))
model.add(Dropout(0.2))
model.add(GRU(2014, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(2))
model.add(Activation('softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


# train model
model.fit(train_x, train_y, batch_size=128, nb_epoch=200, validation_data=(test_x, test_y), callbacks=[callback])

# test model
loss, acc = model.evaluate(test_x, test_y)
print('Test loss:', loss)
print('Test accuracy:', acc)

# save model
data = {"max_len": max_len, "vocab_len": vocab_len, "char_index": char_index}
#save_model(model, data, 'E://NCS//gender-prediction-master//model//')
save_model(model, data, 'serving_GRU')
#model.save('Filterd_model_GRU_'+str(acc))

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 15, 512)           1161216   
_________________________________________________________________
dropout_2 (Dropout)          (None, 15, 512)           0         
_________________________________________________________________
gru (GRU)                    (None, 2014)              15268134  
_________________________________________________________________
dropout_3 (Dropout)          (None, 2014)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 4030      
_________________________________________________________________
activation_1 (Activation)    (None, 2)                 0         
Total params: 16,433,380
Trainable params: 16,433,380
Non-trainable params: 0
__________________________________________

Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Test loss: 0.5021752181319434
Test accuracy: 0.9197054
INFO:tensorflow:Assets written to: serving_GRU\server.model\assets


In [12]:
loss, acc = model.evaluate(test_x, test_y)
print('Test loss:', loss)
print('Test accuracy:', acc)


Test loss: 0.5021752181319434
Test accuracy: 0.9197054


In [13]:
# build model Bidiorectional LSTM

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)

model = Sequential()
#model.add(Bidirectional(LSTM(512, return_sequences=True, input_shape=(max_len, vocab_len))))
model.add(Bidirectional(LSTM(512, return_sequences=True), input_shape=(max_len, vocab_len)))
model.add(Dropout(0.2))
model.add(LSTM(512, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(2))
model.add(Dropout(0.2))
model.add(Dense(2))
model.add(Activation('softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# train model
model.fit(train_x, train_y, batch_size=128, epochs=200, validation_data=(test_x, test_y), callbacks=[callback])

# test model
loss, acc = model.evaluate(test_x, test_y)
print('Test loss:', loss)
print('Test accuracy:', acc)

# save model
data = {"max_len": max_len, "vocab_len": vocab_len, "char_index": char_index}
#save_model(model, data, 'E://NCS//gender-prediction-master//model//')
save_model(model, data, 'serving_BiLSTM')
#model.save('Filterd_model_BiLSTM_'+str(acc))

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 15, 1024)          2322432   
_________________________________________________________________
dropout_4 (Dropout)          (None, 15, 1024)          0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 512)               3147776   
_________________________________________________________________
dropout_5 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 1026      
_________________________________________________________________
dropout_6 (Dropout)          (None, 2)                 0         
_________________________________________________________________
dense_3 (Dense)              (None, 2)                

Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Test loss: 0.5442555474716584
Test accuracy: 0.9243364
INFO:tensorflow:Assets written to: serving_BiLSTM\server.model\assets


In [14]:
loss, acc = model.evaluate(test_x, test_y)
print('Test loss:', loss)
print('Test accuracy:', acc)


Test loss: 0.5442555474716584
Test accuracy: 0.9243364


In [16]:
# model2=load_model('mymodel_epoch50')
# loss, acc = model2.evaluate(test_x, test_y)
# print('Test loss:', loss)
# print('Test accuracy:', acc)


In [None]:
print(data)

In [1]:
import os
import json
import numpy as np
import tensorflow as tf



#from keras.models import load_model
from tensorflow.keras.models import Sequential, load_model

import tensorflow as tf

class GenderAPI:
    def __init__(self):
        #self.graph = tf.compat.v1.get_default_graph() #tf.get_default_graph()
        #self.model=load_model('Filterd_model_BiLSTM_0.9210641')
        data={'max_len': 15, 'vocab_len': 54, 'char_index': {'H': 0, 'd': 1, 'a': 2, 'C': 3, 'K': 4, 's': 5, 'Z': 6, 'z': 7, 'P': 8, 'N': 9, 't': 10, 'R': 11, 'b': 12, 'L': 13, 'B': 14, 'n': 15, 'S': 16, 'x': 17, 'Y': 18, 'U': 19, 'END': 20, 'r': 21, 'F': 22, 'e': 23, 'm': 24, 'v': 25, 'W': 26, 'f': 27, 'o': 28, 'h': 29, 'T': 30, 'w': 31, 'j': 32, 'q': 33, 'y': 34, 'i': 35, 'u': 36, 'O': 37, 'c': 38, 'D': 39, 'J': 40, 'V': 41, ' ': 42, 'g': 43, 'I': 44, 'k': 45, 'G': 46, 'p': 47, 'X': 48, 'A': 49, 'Q': 50, 'E': 51, 'l': 52, 'M': 53}}
        self.model, data = self.read_model('serving')
        self.max_len = data['max_len']
        self.vocab_len = data['vocab_len']
        self.char_index = data['char_index']

    def vector(self, i, n):
        tmp = np.zeros(n)
        tmp[i] = 1
        return tmp

    def read_model(self, network_path):
        if not os.path.exists(network_path):
            raise ValueError('Path not found : {}'.format(network_path))
        dat = json.loads(open(os.path.join(network_path, 'data.json')).read())
        #dat=data
        print(dat)
        mod = load_model(os.path.join(network_path, 'server.model'))
        #mod=load_model('Filterd_model_BiLSTM_0.9210641')
        return mod,dat

    def predict(self, names, labelize=True):
        """
        Returns gender of given names

        Args:
            names:      list of strings
            labelize:   returns 'M' or 'F' labels if set to True,
                        returns list of porbabilities otherwise
        """
        #with self.graph.as_default():
            # format input
        #print(names)
        names = [s.lower() for s in names]
        #print("lower",names)

        names = [list(i)+['END']*(self.max_len-len(i)) for i in names]
        names = [[self.vector(self.char_index[j], self.vocab_len) for j in i] for i in names]
        names = np.asarray(names)

        # predict gender
        out = self.model.predict(names).tolist()
        print(out)
        return [('M' if p[0] > p[1] else 'F') for p in out] if labelize else out


In [None]:
#get array of data from postman post ///// set headers :key,value , set url for POST // inset json body ["Kelvin","maria"]

from flask import Flask, request, jsonify
#from gender_api import GenderAPI

# Init Flask and GenderAPI
app = Flask(__name__)
api = GenderAPI()

@app.route('/')
def hello_world():
    return "Welcome to GenderAPI. Please request on /predict using \
    'Content-Type: application/json' header and a json array of names in the body."

@app.route('/predict', methods=['POST'])
def predict():
    names = request.get_json()
    print(names)
    labels = api.predict(names)
    print(labels)
    return jsonify(labels)

# Run Flask
app.run(host='127.0.0.1', port=5000, debug=False)


In [2]:
#get array of data from HTML form /// example  "Kelvin","maria"



from flask import Flask, request,redirect, url_for,  jsonify,render_template
#from gender_api import GenderAPI

# Init Flask and GenderAPI
app = Flask(__name__)
api = GenderAPI()


@app.route('/')
def my_form():
    return render_template('form.html')

@app.route('/', methods=['POST'])

# def my_form_post():
#     text = request.form['text']
#     processed_text = text.upper()
#     return processed_text

def predict():
    names = request.form['text']
    print(str(names))
    names=str(names).split(",")
    print(names)
    labels = api.predict(names)
    return jsonify(labels)



app.run(host='127.0.0.1', port=5000, debug=False)


{'max_len': 15, 'vocab_len': 54, 'char_index': {'n': 0, 'END': 1, 'C': 2, 'D': 3, 'W': 4, 'V': 5, 'r': 6, 'q': 7, 'X': 8, 'j': 9, 'v': 10, 'T': 11, 'E': 12, 'g': 13, 'Z': 14, 'u': 15, 'i': 16, 'l': 17, 'b': 18, 'G': 19, 'h': 20, 'R': 21, 'S': 22, 'x': 23, 'e': 24, 'Y': 25, 'K': 26, 'w': 27, 'z': 28, 'U': 29, 'c': 30, 'N': 31, 'M': 32, 'L': 33, 'A': 34, 'I': 35, 'm': 36, 'O': 37, 'k': 38, ' ': 39, 'f': 40, 's': 41, 'F': 42, 'B': 43, 'J': 44, 'y': 45, 'P': 46, 'H': 47, 'p': 48, 'o': 49, 'a': 50, 't': 51, 'Q': 52, 'd': 53}}
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [05/Feb/2021 05:59:56] "[37mGET / HTTP/1.1[0m" 200 -


abinaya,kurupara
['abinaya', 'kurupara']


127.0.0.1 - - [05/Feb/2021 05:59:59] "[37mPOST / HTTP/1.1[0m" 200 -


[[0.009048513136804104, 0.990951418876648], [0.5194451212882996, 0.48055487871170044]]
