## Dependencies

In [1]:
import numpy as np
import pandas as pd
import tensorflow

# Olympic Data
We pared down a Kaggle data set to the Summer data. We included only the data necessary for our searching.
## The Dataset
The following information about each participant in each event are included within the CSV:

* ID: unique identifier for each participant
* Name: each participant's name
* Sex: M for male, F for female
* Age: in years (11 to 71)
* Height: in cm
* Weight: in kilograms
* Team: Group the participant is competing with
* NOC: Three letter country abbreviation
* Year: Year of Olympic event (1896-2016)
* Sport: Category of competetion
* Medal: NA(no medal), Bronze, Silver, Gold

In [2]:
olympics_unfiltered = pd.read_csv('Summer_Olympics.csv')
olympics_unfiltered.head()

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Year,Sport,Medal_Type,Medals
0,1,A Dijiang,M,24,180,80.0,China,CHN,1992,Basketball,No Medal,No Medal
1,2,A Lamusi,M,23,170,60.0,China,CHN,2012,Judo,No Medal,No Medal
2,12,Jyri Tapani Aalto,M,31,172,70.0,Finland,FIN,2000,Badminton,No Medal,No Medal
3,13,Minna Maarit Aalto,F,30,159,55.5,Finland,FIN,1996,Sailing,No Medal,No Medal
4,13,Minna Maarit Aalto,F,34,159,55.5,Finland,FIN,2000,Sailing,No Medal,No Medal


In [3]:
olympics_filtered = olympics_unfiltered[olympics_unfiltered["Year"] > 1913].reset_index()
olympics_filtered = olympics_unfiltered[olympics_unfiltered["Sport"] != "Art Competitions"].reset_index()

In [9]:
physical_event_df = olympics_filtered[["Sex", "Age", "Height", "Weight", "Year", "Sport", "Medal_Type ", "NOC"]]
physical_event_df.head()

Unnamed: 0,Sex,Age,Height,Weight,Year,Sport,Medal_Type,NOC
0,M,24,180,80.0,1992,Basketball,No Medal,CHN
1,M,23,170,60.0,2012,Judo,No Medal,CHN
2,M,31,172,70.0,2000,Badminton,No Medal,FIN
3,F,30,159,55.5,1996,Sailing,No Medal,FIN
4,F,34,159,55.5,2000,Sailing,No Medal,FIN


## Data Pre-Processing Medal

In [10]:
X = physical_event_df.drop("Medal_Type ", axis=1)
y = physical_event_df["Medal_Type "]
print(X.shape, y.shape)

(166677, 7) (166677,)


In [11]:
X = pd.get_dummies(X)
X.head()
X.shape

(166677, 274)

## Dependencies

In [12]:
import tensorflow.keras
from sklearn.preprocessing import *
from keras.models import Sequential
from keras.utils import to_categorical
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

Using TensorFlow backend.


In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

## One-hot encode the labels

In [14]:
## y_train Encoder
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y = label_encoder.transform(y_train)
len(encoded_y)

125007

In [15]:
one_hot_y = to_categorical(encoded_y)
len(one_hot_y)

125007

In [16]:
## y_test Encoder
label_encoder = LabelEncoder()
label_encoder.fit(y_test)
encoded_y_test = label_encoder.transform(y_test)
len(encoded_y_test)

41670

In [17]:
one_hot_y_test = to_categorical(encoded_y_test)
len(one_hot_y_test)

41670

# Create a Deep Learning Model

In [18]:
#Scale the Dataset
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

  return self.partial_fit(X, y)


In [25]:
#Create the Model
model = tensorflow.keras.Sequential()
model.add(Dense(units=100, activation='relu', input_dim=274))
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=4, activation='softmax'))

In [26]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 100)               27500     
_________________________________________________________________
dense_7 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_8 (Dense)              (None, 4)                 204       
Total params: 32,754
Trainable params: 32,754
Non-trainable params: 0
_________________________________________________________________


In [27]:
# Fit the model
model.fit(
    X_train_scaled,
    one_hot_y,
    epochs=10,
    shuffle=True,
    verbose=2,)

Epoch 1/10
125007/125007 - 5s - loss: 0.5095 - acc: 0.8519
Epoch 2/10
125007/125007 - 5s - loss: 0.4719 - acc: 0.8560
Epoch 3/10
125007/125007 - 5s - loss: 0.4582 - acc: 0.8572
Epoch 4/10
125007/125007 - 5s - loss: 0.4492 - acc: 0.8581
Epoch 5/10
125007/125007 - 5s - loss: 0.4430 - acc: 0.8588
Epoch 6/10
125007/125007 - 5s - loss: 0.4381 - acc: 0.8596
Epoch 7/10
125007/125007 - 5s - loss: 0.4336 - acc: 0.8600
Epoch 8/10
125007/125007 - 5s - loss: 0.4294 - acc: 0.8609
Epoch 9/10
125007/125007 - 5s - loss: 0.4265 - acc: 0.8611
Epoch 10/10
125007/125007 - 5s - loss: 0.4234 - acc: 0.8616


<tensorflow.python.keras.callbacks.History at 0xb313c5320>

In [28]:
print(" ")
print(" ")
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, one_hot_y_test, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")
print(" ")
print(" ")

 
 
41670/41670 - 1s - loss: 0.4468 - acc: 0.8613
Normal Neural Network - Loss: 0.4467886146604085, Accuracy: 0.8613150715827942
 
 


In [24]:
## https://www.tensorflow.org/model_optimization/guide/pruning/pruning_with_keras
import tensorflow_model_optimization as tfmot

In [48]:
pruning_schedule = tfmot.sparsity.keras.PolynomialDecay(
                        initial_sparsity=0, final_sparsity=.9,
                        begin_step=1000, end_step=5000)

model_for_pruning = tfmot.sparsity.keras.prune_low_magnitude(model, pruning_schedule=pruning_schedule)



In [49]:
model_for_pruning.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [51]:
model_for_pruning.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
prune_low_magnitude_dense_3  (None, 100)               54902     
_________________________________________________________________
prune_low_magnitude_dense_4  (None, 50)                10052     
_________________________________________________________________
prune_low_magnitude_dense_5  (None, 2)                 204       
Total params: 65,158
Trainable params: 32,652
Non-trainable params: 32,506
_________________________________________________________________


In [50]:
model_for_pruning.fit(
    X_train_scaled,
    one_hot_y,
    batch_size=1000,
    epochs=2,
    shuffle=True,
    verbose=1,
    validation_data=(X_test_scaled, one_hot_y_test)
)

Train on 125007 samples, validate on 41670 samples
Epoch 1/2


InvalidArgumentError: assertion failed: [Prune() wrapper requires the UpdatePruningStep callback to be provided during training. Please add it as a callback to your model.fit call.] [Condition x >= y did not hold element-wise:x (prune_low_magnitude_dense_3_2/cond/assert_greater_equal/ReadVariableOp:0) = ] [-1] [y (prune_low_magnitude_dense_3_2/cond/assert_greater_equal/y:0) = ] [0]
	 [[{{node prune_low_magnitude_dense_3_2/cond/assert_greater_equal/Assert/Assert}}]]