In [1]:
# Import dependencies
import pandas as pd
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import tensorflow as tf

# Read in data
olympic_df = pd.read_csv("./archive/SampleData.csv")
olympic_df.head()

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
0,1188,"Benjamin Alexandro ""Ben"" Agosto",M,28,178,75,United States-2,USA,2010 Winter,2010,Winter,Vancouver,Figure Skating,Figure Skating Mixed Ice Dancing,Gold
1,2773,"Michael Charles ""Mike"" Aljoe",M,23,185,100,United States-1,USA,1988 Winter,1988,Winter,Calgary,Bobsleigh,Bobsleigh Men's Two,Gold
2,7068,Jill Bakken (-Linder),F,25,167,65,United States-2,USA,2002 Winter,2002,Winter,Salt Lake City,Bobsleigh,Bobsleigh Women's Two,Gold
3,12170,Dain J. Blanton,M,28,190,92,United States-1,USA,2000 Summer,2000,Summer,Sydney,Beach Volleyball,Beach Volleyball Men's Beach Volleyball,Gold
4,6312,Emily Dianne Azevedo,F,26,172,80,United States-3,USA,2010 Winter,2010,Winter,Vancouver,Bobsleigh,Bobsleigh Women's Two,Gold


In [2]:
#Create new DataFrame for medals
medal_won= olympic_df[['Medal']]

medal_won.head()

Unnamed: 0,Medal
0,Gold
1,Gold
2,Gold
3,Gold
4,Gold


In [3]:
#Assign Medal types a number
medal_wonx= medal_won.replace("Gold", 0)
medal_wony= medal_wonx.replace("Silver",1)
medal_wonz= medal_wony.replace("Bronze",2)
medal_wonz

Unnamed: 0,Medal
0,0
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0
9,0


In [4]:
#Drop non-feature columns
olympic_df.drop(['ID', 'Name', "Team", "NOC", "Games", "City","Sport", "Event","Medal"], inplace=True, axis=1)
olympic_df.head()

Unnamed: 0,Sex,Age,Height,Weight,Year,Season
0,M,28,178,75,2010,Winter
1,M,23,185,100,1988,Winter
2,F,25,167,65,2002,Winter
3,M,28,190,92,2000,Summer
4,F,26,172,80,2010,Winter


In [5]:
#check unique value count for encoding
olympic_df.nunique(axis=0)

Sex        2
Age       18
Height    20
Weight    29
Year      17
Season     2
dtype: int64

In [6]:
#create categorical variable list for encoding
olympic_cat = olympic_df.dtypes[olympic_df.dtypes == "object"].index.tolist()
olympic_cat

['Sex', 'Season']

In [7]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(olympic_df[olympic_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(olympic_cat)
encode_df.head()

Unnamed: 0,Sex_F,Sex_M,Season_Summer,Season_Winter
0,0.0,1.0,0.0,1.0
1,0.0,1.0,0.0,1.0
2,1.0,0.0,0.0,1.0
3,0.0,1.0,1.0,0.0
4,1.0,0.0,0.0,1.0


In [8]:
#Merge encoded values and numbered medals and drop original columns
olympic_df = olympic_df.merge(encode_df,left_index=True, right_index=True)
olympic_df = olympic_df.drop(olympic_cat,1)

olympic_df = olympic_df.merge(medal_wonz,left_index=True, right_index=True)
olympic_df.head()

Unnamed: 0,Age,Height,Weight,Year,Sex_F,Sex_M,Season_Summer,Season_Winter,Medal
0,28,178,75,2010,0.0,1.0,0.0,1.0,0
1,23,185,100,1988,0.0,1.0,0.0,1.0,0
2,25,167,65,2002,1.0,0.0,0.0,1.0,0
3,28,190,92,2000,0.0,1.0,1.0,0.0,0
4,26,172,80,2010,1.0,0.0,0.0,1.0,0


In [9]:
# Split preprocessed data into features and target arrays
y = olympic_df["Medal"].values
X = olympic_df.drop(["Medal"],1).values

# Split the preprocessed data into training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=7)

In [10]:
# Create StandardScaler instance
scaler = StandardScaler()

# Fit StandardScaler
X_scaler = scaler.fit(X_train)

# Scale data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
# Define model
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="tanh"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 8)                 72        
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 45        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 6         
Total params: 123
Trainable params: 123
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Compile model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [13]:
# Train model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [14]:
# Evaluate model using test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1/1 - 0s - loss: -1.2932e-01 - accuracy: 0.7333
Loss: -0.129317969083786, Accuracy: 0.7333333492279053
