In [None]:
import numpy as np
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.animation import FuncAnimation
import math
# Import pandas as a alias 'pd'
import pandas as pd

# Load the CSV files "marathon_results_2015 ~ 2017.csv" under "data" folder
marathon_2015_2017_qualifying = pd.read_csv("./data/marathon_2015_2017_qualifying.csv")

# Merge 2015, 2016 and 2017 files into marathon_2015_2017 file index by Official Time
# record = pd.DataFrame(marathon_2015_2017,columns=['5K',  '10K',  '15K',  '20K', 'Half',  '25K',  '30K',  '35K',  '40K',  'Official Time']).sort_values(by=['Official Time'])
marathon_2015_2016 = marathon_2015_2017_qualifying[marathon_2015_2017_qualifying['Year'] != 2017]
marathon_2017 = marathon_2015_2017_qualifying[marathon_2015_2017_qualifying['Year'] == 2017]

df_2015_2016 = pd.DataFrame(marathon_2015_2016,columns=['M/F',  'Age',  'Pace',  'qualifying'])
df_2017 = pd.DataFrame(marathon_2017,columns=['M/F',  'Age',  'Pace',  'qualifying'])

# Dataframe to List
record_2015_2016 = df_2015_2016.values.tolist()
record_2017 = df_2017.values.tolist()

gender_list = ['Female', 'Male']
grad_fig = Figure(figsize=(10, 6), dpi=100)
grad_ax = grad_fig.add_subplot(111)
grad_ax.set_xlim(15, 88)
grad_ax.set_ylim(0, 1300)
grad_ax.set_ylabel("Pace : Runner's overall minute per mile pace")
grad_ax.set_xlabel("Age : Age on race day")

def seconds_to_hhmmss(seconds):
    hours = seconds // (60*60)
    seconds %= (60*60)
    minutes = seconds // 60
    seconds %= 60
    return "%02i:%02i:%02i" % (hours, minutes, seconds)

def normalization(record):
    r0 = record[0]
    r1 = record[1] / 10
    r2 = record[2] / 100
    return [r0, r1, r2]

# X and Y data from 0km to 30km    
# x_train = [ r[0:3] for r in record_2015_2016]
x_train = [ normalization(r[0:3]) for r in record_2015_2016]
y_train = [ [r[-1]] for r in record_2015_2016]
# x_test = [ r[0:3] for r in record_2017]
x_test = [ r[0:3] for r in record_2017]
y_test = [ [r[-1]] for r in record_2017]

def histogram(rank):
    t_a = int(rank) - 1
    runner = x_test[t_a]
    print(runner)
    t_g = int(runner[0])
    t_y = int(runner[1])
    t_p = int(runner[2])
    if(t_g):
        gender_color = 'b'
    else:
        gender_color = 'r'  
    gender_record = df_2017[df_2017['M/F'] == t_g]
    gender_age_record = gender_record[gender_record.Age == t_y] 
    gender_age_record_list = gender_age_record.values.tolist() 
    
    grad_ax.plot(gender_record.Age, gender_record.Pace, '.', color=gender_color, alpha=0.5)
    grad_ax.plot(t_y, t_p, 'yd')
    stat = gender_age_record['Pace'].describe()
    print(stat)
    title = 'Gender : '+gender_list[t_g]+', Age : '+str(t_y)+', Pace : '+str(t_p)
    grad_ax.set_title(title)
    grad_ax.annotate('['+gender_list[t_g]+', '+str(t_y)+']', (75, 1200), fontsize=10)
    grad_ax.annotate("%10s %7i" % ('Count : ', stat[0]), (75, 1150), fontsize=10)
    grad_ax.annotate("%10s %7.3f" % ('Mean :  ', stat[1]), (75, 1100), fontsize=10)
    grad_ax.annotate("%10s %7.3f" % ('25% :   ', stat[3]), (75, 1050), fontsize=10)
    grad_ax.annotate("%10s %7.3f" % ('75% :   ', stat[5]), (75, 1000), fontsize=10)
        
    grad_fig.canvas.draw()

def learning(rank, train_num, rate): 
    """
    MAchine Learning, Tensorflow2 
    """
    # Tensorflow2 Linear Regression
    import tensorflow as tf
    import numpy as np
    
    t_a = int(rank) - 1
    runner = x_test[t_a]
    t_g = int(runner[0])
    t_y = int(runner[1])
    t_p = int(runner[2])

    t_t = int(train_num)
    t_r = float(rate)

    # Define Sequential model and Dense
    model =  tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(1, input_shape=(3,), activation='sigmoid'))
    # Use softmax  
    # model.add(Activation('sigmoid'))
    # Stochastic gradient descent (SGD) Optimizer
    sgd = tf.keras.optimizers.SGD(lr=t_r)
    # categorical_crossentropy loss function
    model.compile(loss='binary_crossentropy', 
                  optimizer=sgd,
                  metrics=['accuracy'])

    # prints summary of the model to the terminal
    model.summary()

    # Train the model
    history = model.fit(np.array(x_train), np.array(y_train), epochs=t_t)

    # Fit the line
    print('\nGender :'+gender_list[t_g]+', Age :'+str(t_y)+', Pace :'+str(t_p)+'\n', 'TITLE')
    print('\n\nCost Decent\n\n','HEADER')
    print("%10s %20s %20s" % ('Step', 'Cost', 'Accuracy(%)')+'\n')
    for step in range(t_t):
        if step % 100 == 0:
            cost_val = history.history['loss'][step]
            a_val = history.history['accuracy'][step]
            print("%10i %20.5f %20.7f" % (step, cost_val, a_val*100)+'\n')
    
    winner = [ t_g, t_y, t_p ]
    result = model.predict(np.array([normalization(winner)]))

    print('\n')
    print("%10s %20s" % ('Value        ', 'Qualifying Prediction\n'), 'HEADER')
    if(result[0] > 0.5):
        print("%10.7f %20s" % (result[0], 'Qualifier\n'), 'Qualifier')
    else:
        print("%10.7f %20s" % (result[0], 'DisQualifier\n'), 'DisQualifier')
learning(7,10000,0.01)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1)                 4         
Total params: 4
Trainable params: 4
Non-trainable params: 0
_________________________________________________________________
Train on 53228 samples
Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
   32/53228 [..............................] - ETA: 11s - loss: 0.3757 - accuracy: 0.8750