# Extract Golf Score Tag Heuer

This program extract data form tag Heuer json file. The data extracted is for 1 payer (email, or name give as a key).

The outcome will be:
- row number
- round id
- round date
- course
- player index
- hole
- handicap
- par (normal for the hole)
- strokes
- putts
- penalties
- fairwayHit
- bunkerHit


The Class will implement a fit transform method. Initialization will be done with the json file.
Init mthod read the json and normalize.
Fit method prepare features.
Transform returns pandas dataset with all holes payed.

In [22]:
"""
This library specific to golf data extracted from the Tag Heuer App.
The library creates a pandas dataframe and gives some statistical and
analyticals KPIs.

"""

from datetime import datetime
import json
from pandas.io.json import json_normalize
from collections import OrderedDict
import pandas as pd
import numpy as np

class TagHeuerData:
    def __init__(self, filename = None, player = None, courses = None):

        """
        Initialize the collection with a user.json file extracted from Tag Heuer App.
        
        Parameter:
        - filename: str, location of the source file
        - player: str, email of the player to filter on one player
        - courses: dataframe, a df which contains courses definition
        
        """
        
        self.holes_played = None
        self.first_date_round = None
        self.last_date_round = None
        
        
        with open(filename) as data_file:
            data = json.load(data_file, object_pairs_hook=OrderedDict)
        
        
        """
        Extract scorecards from data
        
        """
        rounds=pd.json_normalize(data['rounds'])
        self.rounds = rounds
        scorecards=rounds['scorecard.players']
        sc=scorecards
        df = pd.DataFrame()
        for i in range(0,int(sc.size)):
            one = sc[i]            
            temp=pd.json_normalize(one, errors='ignore')
            score=pd.json_normalize(temp.scores[0]).sort_values('holeNumber')
            rp=pd.json_normalize(one, errors='ignore')

            for idx, r in score.iterrows():
                score['rounddate']=datetime.fromtimestamp(r.date).strftime('%Y-%m-%d')
            
            score['golfCourse']=rounds.iloc[i,0]   
            score['firstName']=rp['roundPlayer.firstName'][0]
            score['lastName']=rp['roundPlayer.lastName'][0]
            score['email']=rp['roundPlayer.email'][0]
            df=pd.concat([df,score])
            df=df.drop('date',axis=1)

        df=df.reset_index(drop=True)
        
        if isinstance(courses, pd.DataFrame):
            #merge with courses
            df = df.merge(courses, left_on='holeNumber', right_on='hole')
            df['stroke'] = df['score.strokes']-df['par']
            self.with_courses_data = True
        else:
            self.with_courses_data = False

        self.holes_played = df
        self.first_date_round = df.rounddate.min()
        self.last_date_round = df.rounddate.max()
    
    def holesScores(self, datefrom=None, dateto=None, chart=False):
        if self.with_courses_data == False:
            print('You must instanciate the class with course parameter')
            return False
        
        t = self.holes_played.copy()
        if datefrom == None:
            datefrom = self.first_date_round
        if dateto == None:
            dateto = self.last_date_round
            
        # query t
        scope = t.query('rounddate>=@datefrom and rounddate<=@dateto')
        return scope[['par','stroke','score.strokes']].groupby('par').mean()
        
 

In [25]:
coursesdata = pd.read_csv('data/courses.csv')
coursesdata.shape
df = TagHeuerData('data/user.json','Daryl', courses=coursesdata)
df.holes_played.head()

Unnamed: 0,holeNumber,score.strokes,score.putts,score.penalties,score.fairwayHit,score.bunkerHit,rounddate,golfCourse,firstName,lastName,email,id,hole,course,courseid,par,distance,handicap,stroke
0,1,6,2.0,0,,,2019-11-10,Golf de Gonville,Daryl,Felix,resquatordaryl@gmail.com,1,1,Golf de Gonville,1,5,480,1,1
1,1,6,3.0,0,,,2020-05-17,Golf Club Montreux,Daryl,Felix,resquatordaryl@gmail.com,1,1,Golf de Gonville,1,5,480,1,1
2,1,6,1.0,0,,,2020-05-21,Golf Parc du Signal de Bougy,Daryl,Felix,resquatordaryl@gmail.com,1,1,Golf de Gonville,1,5,480,1,1
3,1,5,2.0,0,,,2020-05-22,Golf Club Montreux,Daryl,Felix,resquatordaryl@gmail.com,1,1,Golf de Gonville,1,5,480,1,0
4,1,7,2.0,0,,,2020-05-24,Golf Club Montreux,Daryl,Felix,resquatordaryl@gmail.com,1,1,Golf de Gonville,1,5,480,1,2


In [31]:
df.holesScores()

Unnamed: 0_level_0,stroke,score.strokes
par,Unnamed: 1_level_1,Unnamed: 2_level_1
3,1.085714,4.085714
4,0.891837,4.891837
5,0.766667,5.766667


In [17]:
ty=str(type(coursesdata))
print(ty)
if 'DataFrame' in ty:
    True

<class 'pandas.core.frame.DataFrame'>


In [3]:
holes = df.holes_played
holes.rounddate.min()

'2019-11-10'

In [None]:
hp_gonville = df.scorecards.query('golfCourse == "Golf de Gonville"')

In [None]:
hp_gonville.describe()

In [None]:
courses = pd.read_csv('data/courses.csv')
courses.shape

In [None]:
scores=hp_gonville.merge(courses, left_on = 'holeNumber', right_on= 'hole')
scores.sort_values(['rounddate','holeNumber'], inplace=True)

In [None]:
scores

In [None]:
scores.columns.tolist()

In [None]:
features = ['rounddate','holeNumber','par','distance','handicap',
            'score.strokes','score.putts','score.penalties']

In [None]:
scores=scores[features]
scores['stroke']=scores['score.strokes']-scores['par']
scores

In [None]:
scores.query('rounddate > "2000-02-01"').groupby('holeNumber').mean()['stroke'].cumsum().plot()
scores.query('rounddate > "2020-02-01"').groupby('holeNumber').mean()['stroke'].cumsum().plot()
scores.query('rounddate > "2021-02-01"').groupby('holeNumber').mean()['stroke'].cumsum().plot()
scores.query('rounddate > "2022-02-01"').groupby('holeNumber').mean()['stroke'].cumsum().plot()

In [None]:
pd.DataFrame(scores.groupby('holeNumber').mean()['stroke']).hist()
plt.title('Cumulative score')
plt.show()

In [None]:
scores.groupby('holeNumber').mean()['stroke'].cumsum().plot()