In [1]:

from   sklearn.linear_model    import LinearRegression
from   sklearn.metrics         import mean_squared_error, r2_score
import scipy.stats             as     stats
import statsmodels.api         as     sm
import pandas                  as     pd
import numpy                   as     np
import plotly
import plotly.plotly           as     py
import plotly.graph_objs       as     go
import seaborn                 as     sns
import matplotlib.pyplot       as     plt
from   config                  import plotly_id, plotly_key
import warnings

plotly.tools.set_credentials_file(username=plotly_id, api_key=plotly_key)
sns.set(color_codes=True)
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'plotly'

In [None]:
medals = pd.read_csv('Olympic_final_data_191111d.csv')
print(medals.shape)
medals.head()

In [None]:
# create dummy variables
model        = pd.get_dummies(data=medals, columns=['NOC','Team','Year','Sport'])
model['NOC'] = medals['NOC']
model['Team'] = medals['Team']
model['Year'] = medals['Year']
model['Sport'] = medals['Sport']
print(model.shape)
model.head()

In [None]:
model = model[model['Year'] >= 2000]
model = model.reset_index().drop(['index'], axis=1)
print(model.shape)
model.head()

In [None]:
y = model[['Golds','Silvers','Bronzes']]
print(y.shape)
y.head()

In [None]:
X = model[['NOC','Team','Year','Sport']]
print(X.shape)
X.head()

In [None]:
X_train = X[X['Year'] <  2016]
X_test  = X[X['Year'] == 2016]
X_test  = X_test.reset_index().drop(['index'], axis=1)
print(X_train.shape)
print(X_test.shape)

y_train = y[y['Year'] <  2016]
y_test  = y[y['Year'] == 2016]
y_test  = y_test.reset_index().drop(['index'], axis=1)
print(y_train.shape)
print(y_test.shape)

In [None]:
X_train.head()

# Create linear regression objects


In [None]:

regr_golds   = LinearRegression()
regr_silvers = LinearRegression()
regr_bronzes = LinearRegression()

In [None]:
# Train the models using the training sets
regr_golds.fit(X_train), (y_train['Golds])
regr_silvers.fit(X_train), (y_train['Silvers'])
regr_bronzes.fit(X_train), (y_train['Bronzes'])

In [None]:
# Make predictions using the training sets
y_train['Golds Prediction']   = pd.DataFrame(
    regr_golds.predict(X_train.drop(['Year','NOC','Team'], axis=1)), columns=['Golds Prediction'])
y_train['Golds Prediction']   = y_train['Golds Prediction'].astype('int64')
y_train['Golds Prediction']   = y_train['Golds Prediction'].clip(lower=0)

y_train['Silvers Prediction'] = pd.DataFrame(
    regr_silvers.predict(X_train.drop(['Year','NOC','Team'], axis=1)), columns=['Silvers Prediction'])
y_train['Silvers Prediction'] = y_train['Silvers Prediction'].astype('int64')
y_train['Silvers Prediction'] = y_train['Silvers Prediction'].clip(lower=0)

y_train['Bronzes Prediction'] = pd.DataFrame(
    regr_bronzes.predict(X_train.drop(['Year','NOC','Team'], axis=1)), columns=['Bronzes Prediction'])
y_train['Bronzes Prediction'] = y_train['Bronzes Prediction'].astype('int64')
y_train['Bronzes Prediction'] = y_train['Bronzes Prediction'].clip(lower=0)

y_train['Medals Prediction']  = y_train['Golds Prediction'] + y_train['Silvers Prediction'] + y_train['Bronzes Prediction']
y_train.head()

In [None]:
# Intercepts
print(regr_golds.intercept_)
print(regr_silvers.intercept_)
print(regr_bronzes.intercept_)

In [None]:
# The coefficients
columns                          = X_train.columns.drop(['Year','NOC','Team'])
features                         = pd.DataFrame(columns.T, columns=['Feature'])
features['Golds Coefficients']   = regr_golds.coef_.T
features['Silvers Coefficients'] = regr_silvers.coef_.T
features['Bronzes Coefficients'] = regr_bronzes.coef_.T

features = features.sort_values(by='Golds Coefficients', ascending=False).reset_index().drop(['index'],axis=1)
features

In [None]:
print(X_test.shape)
X_test.head(100)

In [None]:
# Make predictions using the test set
y_test['Golds Prediction']   = pd.DataFrame(
    regr_golds.predict(X_test.drop(['Year','NOC','Team'], axis=1)), columns=['Golds Prediction'])
y_test['Golds Prediction']   = y_test['Golds Prediction'].astype('int64')
y_test['Golds Prediction']   = y_test['Golds Prediction'].clip(lower=0)

y_test['Silvers Prediction'] = pd.DataFrame(
    regr_silvers.predict(X_test.drop(['Year','NOC','Team'], axis=1)), columns=['Silvers Prediction'])
y_test['Silvers Prediction'] = y_test['Silvers Prediction'].astype('int64')
y_test['Silvers Prediction'] = y_test['Silvers Prediction'].clip(lower=0)

y_test['Bronzes Prediction'] = pd.DataFrame(
    regr_bronzes.predict(X_test.drop(['Year','NOC','Team'], axis=1)), columns=['Bronzes Prediction'])
y_test['Bronzes Prediction'] = y_test['Bronzes Prediction'].astype('int64')
y_test['Bronzes Prediction'] = y_test['Bronzes Prediction'].clip(lower=0)

y_test['Medals Prediction']  = y_test['Golds Prediction'] + y_test['Silvers Prediction'] + y_test['Bronzes Prediction']
y_test.head(100)