# This notebook contains the following Python tutorials
1. Object-orientied programming
2. Matrix calculus using numpy
3. Statistical analysis using pandas
4. Write/read results to SQLDB 

Further explanation of notebook can be found in following blog:

## 1: Object-orientied programming

### 1a. Load necessary libraries

In [1]:
# Load necessary libraries
import numpy as np
from datetime import date

### 1b. Create 3 classes

In [2]:
# Class 1: Player
class Player(object):
    _positionList = {'goalkeeper', 'defender', 'midfielder', 'striker'}
    
    def __init__(self,p):
        # Python and (C#,Java) differ in this behaviour, construct variable in constructor, otherwise static behaviour
        # See also https://bugs.python.org/issue1437
        self._playerName=p
        self._trainingList=[]
        self._trainingRawData = np.empty((0,3), int)

    def setAge(self, age):
        self._age=age
        print(self._playerName + " has age " + str(age))
        
    def setPosition(self, pos):
        if pos in self._positionList:
            self._position=pos
            print(self._playerName + " will be playing on position " + pos)
        else:
            raise ValueError("Value {} not in Enum list.".format(pos))        

    def setTraining(self, t, rawdata):
        self._trainingList.append(t)
        self._trainingRawData = np.append(self._trainingRawData, rawdata, axis=0)

    def getTrainingRawData(self):
        return self._trainingRawData

    def getTrainingFilter(self, stage, tt, date):
        for index, training in enumerate(self._trainingList):
            if training.getStage() == stage and training.getTrainingType() == tt and training.getDate() == date:
                return self._trainingRawData[index]

    def getName(self):
        return self._playerName

In [3]:
# Class 2: FirstTeamPlayer
class FirstTeamPlayer(Player):
    def __init__(self,ftp):
        Player.__init__(self, ftp)

    def setPosition(self,pos1, pos2):
        if pos1 in self._positionList and pos2 in self._positionList:
            self._positionComp=pos1
            self._positionCL=pos2
            print(self._playerName + " will be playing on position " + pos1 + " in the competition and " + pos2 + " in the CL")
        else:
            raise ValueError("Value {} and/or {} not in Enum list.".format(pos1, pos2))   

    def setNumber(self,number):
        self._number=number
        print(self._playerName + " has number " + str(number))

In [4]:
### Class 3: Training
class Training(object):
    _stageList = {'ArenA', 'Toekomst', 'Pool', 'Gym'}
    _trainingTypeList = {'strength', 'cardio', 'technique', 'friendly game'}

    def __init__(self, stage, tt, date):

        if stage in self._stageList:
            self._stage = stage
        else:
            raise ValueError("Value {} not in Enum list.".format(stage))
        if tt in self._trainingTypeList:
            self._trainingType = tt
        else:
            raise ValueError("Value {} not in Enum list.".format(tt))

        # To do: Test for valid date (python does not use strong typing)
        self._date = date

    def getStage(self):
        return self._stage

    def getTrainingType(self):
        return self._trainingType

    def getDate(self):
        return self._date

### 1c. Inheritance

In [5]:
# Construct two players, FirstTeamPlayer class inherits from Player class
player1 = Player("Janssen")
player2 = FirstTeamPlayer("Tadic")

In [6]:
# Set age of players. Notice that setAge method is inherited and is not present in class FirstTeamPlayer
player1.setAge(20)
player2.setAge(30)

Janssen has age 20
Tadic has age 30


### 1d. Polymorphism

In [7]:
# Set position of player. Notice that setPosition method is redefined (overloaded) in class FirstTeamPlayer
player1.setPosition("goalkeeper")
player2.setPosition("midfielder", "striker")

Janssen will be playing on position goalkeeper
Tadic will be playing on position midfielder in the competition and striker in the CL


In [8]:
# Set Numer of Player. Notice that setNumber method is only present in class  
player2.setNumber(10)
#player.setNumber(99) will result in failure

Tadic has number 10


### 1e. Data encapsulation

In [9]:
# Create new traning object and add traningsdata to player object. Notice that a numpy array is used as attribute
training1=Training('Toekomst', 'cardio', date(2019,4,19))
player1.setTraining(training1, rawdata=np.random.rand(1,3))
player2.setTraining(training1, rawdata=np.random.rand(1,3))

In [10]:
# Add two new trainings objects
training2=Training('ArenA', 'friendly game', date(2019,4,20))
player1.setTraining(training2, rawdata=np.random.rand(1,3))
player2.setTraining(training2, rawdata=np.random.rand(1,3))

training3=Training('Gym', 'strength', date(2019,4,20))
player1.setTraining(training3, rawdata=np.random.rand(1,3))
player2.setTraining(training3, rawdata=np.random.rand(1,3))

In [11]:
# Retrieve data using get methods. Notice that if an attribute does not have get method, its data cannot be retrieved
print ("All trainingdata from player: " + player1.getName())
print (player1.getTrainingRawData())
print ("All trainingdata from player: " + player2.getName())
print (player2.getTrainingRawData())
print ("Trainingdata from player " + player1.getName() + " for first training ")
print (player1.getTrainingFilter('ArenA', 'friendly game', date(2019,4,20)))
print ("Trainingdata from player " + player2.getName() + " for last training ")
print (player2.getTrainingFilter('Gym', 'strength', date(2019,4,20)))

All trainingdata from player: Janssen
[[0.91874195 0.95283054 0.27498952]
 [0.15119496 0.64071022 0.0586225 ]
 [0.62107961 0.49377632 0.57355772]]
All trainingdata from player: Tadic
[[0.47795618 0.64172318 0.25835417]
 [0.72819817 0.29756663 0.86216762]
 [0.96381473 0.91784352 0.258435  ]]
Trainingdata from player Janssen for first training 
[0.15119496 0.64071022 0.0586225 ]
Trainingdata from player Tadic for last training 
[0.96381473 0.91784352 0.258435  ]


In [12]:
# this will not work
#player.getAge()
# This still works and differs from Java, C#. See explanation: https://stackoverflow.com/questions/797771/python-protected-attributes
player1._age

20

## 2: Matrix calculus using numpy

## 2a. Create numpy matrix data

In [13]:
# Take the matrix data from player objecs that were created earlier
m1=player1.getTrainingRawData()
m2=player2.getTrainingRawData()
print(m1)
print(m2)

[[0.91874195 0.95283054 0.27498952]
 [0.15119496 0.64071022 0.0586225 ]
 [0.62107961 0.49377632 0.57355772]]
[[0.47795618 0.64172318 0.25835417]
 [0.72819817 0.29756663 0.86216762]
 [0.96381473 0.91784352 0.258435  ]]


## 2b. Simple operations

In [14]:
#Access values
print(m1.shape)

print(m1[0][1])
print(m1[0,1])
print(m1[0])

print(m1[0:1])
print(m1[0:2])

print(m1[:,0])
print(m1[0:1,0:])
print(m1[1:3,1:3])

(3, 3)
0.952830543186292
0.952830543186292
[0.91874195 0.95283054 0.27498952]
[[0.91874195 0.95283054 0.27498952]]
[[0.91874195 0.95283054 0.27498952]
 [0.15119496 0.64071022 0.0586225 ]]
[0.91874195 0.15119496 0.62107961]
[[0.91874195 0.95283054 0.27498952]]
[[0.64071022 0.0586225 ]
 [0.49377632 0.57355772]]


In [15]:
#Arithmetic
tmp1=m1-m2
tmp2=m1+m2
tmp3=m1*m2 # [m2_11*m1_11,  ..,  m1_33*m2_33]
tmp4=m1/m2 # [m2_11/m1_11,  ..,  m1_33/m2_33]

print(tmp1)
print(tmp2)
print(tmp3)
print(tmp4)

[[ 0.44078577  0.31110737  0.01663535]
 [-0.57700321  0.34314359 -0.80354512]
 [-0.34273512 -0.42406719  0.31512272]]
[[1.39669812 1.59455372 0.5333437 ]
 [0.87939313 0.93827686 0.92079012]
 [1.58489434 1.41161984 0.83199271]]
[[0.43911839 0.61145344 0.07104469]
 [0.11009989 0.19065398 0.05054242]
 [0.59860568 0.4532094  0.14822739]]
[[1.92223051 1.48479996 1.06438971]
 [0.20762886 2.15316554 0.06799432]
 [0.6443973  0.53797441 2.21935003]]


### 2c. Matrix operations

In [16]:
tmp1 = m1.dot(m2) # [m1_11 * m2_11 + m1_23 * m2_32 +  m1_13 * m2_31, ..., m1_31 * m2_13 + m1_23 * m2_32 +  m1_33 * m2_33]
tmp2 = m1.dot(np.array([0,1,2])) 
tmp3 = m1 + 1

print(tmp1)
print(tmp2)
print(tmp3)

[[1.3980068  1.12550593 1.12992738]
 [0.59532981 0.34148558 0.60661156]
 [1.20921923 1.07192877 0.73440385]]
[1.50280959 0.75795523 1.64089176]
[[1.91874195 1.95283054 1.27498952]
 [1.15119496 1.64071022 1.0586225 ]
 [1.62107961 1.49377632 1.57355772]]


In [17]:
#more advanced operations
# tranpose matrix
tmp1 = m1.T
print(tmp3)

# inverse matrix
m1_inv = np.linalg.inv(m1)
print("inverse matrix")
print(m1_inv)
print(m1.dot(m1_inv))

# calculate eigenvalues
tmp2, tmp3 = np.linalg.eig(m1)
print("eigen values")
print(tmp2)
print(tmp3)

# singular value decomposition
u, s, vh = np.linalg.svd(m1, full_matrices=True)
print("singular value decomposition")
print(u)
print(s)
print(vh)

[[1.91874195 1.95283054 1.27498952]
 [1.15119496 1.64071022 1.0586225 ]
 [1.62107961 1.49377632 1.57355772]]
inverse matrix
[[ 1.94344365 -2.35781929 -0.69078554]
 [-0.28881332  2.04461345 -0.0705068 ]
 [-1.85582727  0.79296601  2.55222336]]
[[ 1.00000000e+00 -1.28615691e-16  1.44632206e-16]
 [-3.20462908e-17  1.00000000e+00  2.90484605e-17]
 [-1.05952891e-16 -1.08474000e-16  1.00000000e+00]]
eigen values
[1.41475334 0.28267078 0.43558577]
[[-0.73205556 -0.45322235  0.01172907]
 [-0.19248553  0.04563043 -0.28275253]
 [-0.65348602  0.89022883  0.95912118]]
singular value decomposition
[[-0.77835511 -0.18454495 -0.60008873]
 [-0.32837102 -0.69499591  0.63965081]
 [-0.53510354  0.69492723  0.4803544 ]]
[1.72473525 0.44297391 0.22799994]
[[-0.63609614 -0.70518223 -0.31320876]
 [ 0.35436866 -0.62755938  0.69324748]
 [-0.6854229   0.32998068  0.64908258]]


## 3. Statistical analysis using pandas

### 3a. Load libraries, create Pandas dataframes

In [18]:
# load libraries
import pandas as pd

In [19]:
# Create the same matrices as earlier
m1=player1.getTrainingRawData()
m2=player2.getTrainingRawData()

columns = np.array(['col1', 'col2', 'col3'])

# Create pandas dataframe
df_1=pd.DataFrame(data=m1, columns=columns)
df_2=pd.DataFrame(data=m2, columns=columns)

In [20]:
data = player1.getTrainingRawData()
columns = np.array(['col1', 'col2', 'col3'])
print(data)
print(data[0])

df_1=pd.DataFrame(data=data, columns=columns)

print(df_1)
print(df_2)

[[0.91874195 0.95283054 0.27498952]
 [0.15119496 0.64071022 0.0586225 ]
 [0.62107961 0.49377632 0.57355772]]
[0.91874195 0.95283054 0.27498952]
       col1      col2      col3
0  0.918742  0.952831  0.274990
1  0.151195  0.640710  0.058623
2  0.621080  0.493776  0.573558
       col1      col2      col3
0  0.477956  0.641723  0.258354
1  0.728198  0.297567  0.862168
2  0.963815  0.917844  0.258435


### 3b. Simple operations

In [21]:
print(df_1['col1'])
print(df_1.iloc[0])
print(df_1[['col1', 'col2']])
print(df_1[['col1', 'col2']].iloc[1:2])

tmp1 = df_1 - df_2
tmp2 = df_1 + df_2
tmp3 = df_1 * df_2
tmp4 = df_1 / df_2

print(tmp1)
print(tmp2)
print(tmp3)
print(tmp4)

0    0.918742
1    0.151195
2    0.621080
Name: col1, dtype: float64
col1    0.918742
col2    0.952831
col3    0.274990
Name: 0, dtype: float64
       col1      col2
0  0.918742  0.952831
1  0.151195  0.640710
2  0.621080  0.493776
       col1     col2
1  0.151195  0.64071
       col1      col2      col3
0  0.440786  0.311107  0.016635
1 -0.577003  0.343144 -0.803545
2 -0.342735 -0.424067  0.315123
       col1      col2      col3
0  1.396698  1.594554  0.533344
1  0.879393  0.938277  0.920790
2  1.584894  1.411620  0.831993
       col1      col2      col3
0  0.439118  0.611453  0.071045
1  0.110100  0.190654  0.050542
2  0.598606  0.453209  0.148227
       col1      col2      col3
0  1.922231  1.484800  1.064390
1  0.207629  2.153166  0.067994
2  0.644397  0.537974  2.219350


In [22]:
## 3c. Simple operations

row_count=df_1.col1.count()
col_count=len(df_1.columns)
# add column
tmp1 = df_1.assign(col4=pd.Series(np.random.rand(row_count)).values)
# add record
tmp2 = df_1.loc[df_1.index.max()+1] = np.random.rand(col_count)
# remove column
tmp3 = df_1.drop(['col1'], axis=1)
# remove record
tmp4 = df_1.drop(1)

print(tmp1)
print(tmp2)
print(tmp3)
print(tmp4)

       col1      col2      col3      col4
0  0.918742  0.952831  0.274990  0.837764
1  0.151195  0.640710  0.058623  0.869395
2  0.621080  0.493776  0.573558  0.745112
[0.22185515 0.41283858 0.26600653]
       col2      col3
0  0.952831  0.274990
1  0.640710  0.058623
2  0.493776  0.573558
3  0.412839  0.266007
       col1      col2      col3
0  0.918742  0.952831  0.274990
2  0.621080  0.493776  0.573558
3  0.221855  0.412839  0.266007


### 3c. Statistical operations

In [23]:
tmp1=df_1.stack().sum() 
tmp2=df_1.stack().mean() 
tmp3=df_1.stack().median() 
tmp4=df_1.stack().std() 

print("statistics all")
print("sum all:" + str(tmp1))
print("mean all:" + str(tmp2))
print("median all:" + str(tmp3))
print("std all:" + str(tmp4))

tmp1=df_1['col1'].sum()
tmp2=df_1['col1'].mean()
tmp3=df_1['col1'].median()
tmp4=df_1['col1'].std()

print("statistics col1")
print("sum col1:" + str(tmp1))
print("mean col1:" + str(tmp2))
print("median col1:" + str(tmp3))
print("std col1:" + str(tmp4))

statistics all
sum all:5.586203610989052
mean all:0.4655169675824209
median all:0.4533074503130595
std all:0.2880757245855384
statistics col1
sum col1:1.9128716689298828
mean col1:0.4782179172324707
median col1:0.4214673821332439
std col1:0.35922915891128215


In [24]:
print("multiple statistics")
tmp1=df_1.stack().describe()
tmp2=df_1['col1'].describe()

print("statistics all:" + str(tmp1))
print("statistics col1:" + str(tmp2))

multiple statistics
statistics all:count    12.000000
mean      0.465517
std       0.288076
min       0.058623
25%       0.254969
50%       0.453307
75%       0.625987
max       0.952831
dtype: float64
statistics col1:count    4.000000
mean     0.478218
std      0.359229
min      0.151195
25%      0.204190
50%      0.421467
75%      0.695495
max      0.918742
Name: col1, dtype: float64


In [25]:
tmp1=df_1.cov()
tmp2=df_1.corr()

print("correlation:\n " + str(tmp1))
print("\n")
print("covariance:\n " + str(tmp2))

correlation:
           col1      col2      col3
col1  0.129046  0.058308  0.038571
col2  0.058308  0.056651 -0.013558
col3  0.038571 -0.013558  0.044899


covariance:
           col1      col2      col3
col1  1.000000  0.681949  0.506725
col2  0.681949  1.000000 -0.268836
col3  0.506725 -0.268836  1.000000


## 4. Write/read results to SQLDB 

### 4a. Load necessary libraries

In [26]:
import pyodbc

### 4b. Create tables in SQLDB

In [27]:
# Go to your SQLDB using SQL Server Management Studio (SMSS) and execute to following script in SQLDB:
script = """

USE [Master]
GO

CREATE DATABASE pythontest
GO

USE [pythontest]

CREATE TABLE [dbo].[trainingsdata]
(  
 [col1] [float] NOT NULL,
 [col2] [float] NOT NULL,
 [col3] [float] NOT NULL
)
GO

"""

### 4c. Create connection

In [28]:
server  = 'demo-dsvmwin-vm'
database = 'pythontest'
driver= '{ODBC Driver 17 for SQL Server}'

In [36]:
# Make connection to database
cnxn = pyodbc.connect('DRIVER='+driver+';SERVER='+server+';PORT=1433;DATABASE='+database + ';Trusted_Connection=yes;')
cursor = cnxn.cursor()

### 4d. Write/read results to SQLDB

In [37]:
#Write results, use pandas dataframe
for index,row in df_1.iterrows():
    cursor.execute("INSERT INTO dbo.trainingsdata([col1],[col2],[col3]) VALUES (?,?,?)", row['col1'], row['col2'], row['col3'])
    cnxn.commit()

In [38]:
#Read results, use pandas dataframe
sql = "SELECT [col1], [col2], [col3] FROM dbo.trainingsdata"
df_1read = pd.read_sql(sql,cnxn)
print(df_1read)
cursor.close()

        col1      col2      col3
0   0.582270  0.031003  0.802630
1   0.104255  0.641051  0.133616
2   0.172017  0.497840  0.225676
3   0.348590  0.854295  0.151956
4   0.147089  0.216612  0.792685
5   0.393516  0.940553  0.696722
6   0.918742  0.952831  0.274990
7   0.151195  0.640710  0.058623
8   0.621080  0.493776  0.573558
9   0.221855  0.412839  0.266007
10  0.918742  0.952831  0.274990
11  0.151195  0.640710  0.058623
12  0.621080  0.493776  0.573558
13  0.221855  0.412839  0.266007
14  0.918742  0.952831  0.274990
15  0.151195  0.640710  0.058623
16  0.621080  0.493776  0.573558
17  0.221855  0.412839  0.266007
