Import the required libraries

In [1]:
from d3rlpy.datasets import get_cartpole
from d3rlpy.algos import DiscreteCQL, DQN, TD3PlusBC
from d3rlpy.metrics.scorer import discounted_sum_of_advantage_scorer
from d3rlpy.metrics.scorer import evaluate_on_environment
from d3rlpy.dataset import Episode
from d3rlpy.ope import DiscreteFQE
from d3rlpy.dataset import MDPDataset

from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from sklearn.model_selection import train_test_split

import import_ipynb
import numpy as np
from random import random
from create_dataset import CreateDataset
from FootballEnv import FootballEnv
from tqdm import tqdm

from view import Visualiser
import math
# metrics to evaluate with
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer
from d3rlpy.metrics.scorer import soft_opc_scorer


importing Jupyter notebook from FootballEnv.ipynb


Helper function to create a dummy dataset

In [2]:
def create_dataset():

    dataset_maker = CreateDataset()
    # dataset_maker.loadFile('data.json')
    dataset_maker.loadTrackingContentFromDir('three-sixty/*.json')

    dataset_maker.loadFilesFromDir('events/*.json', filterGamesWithoutTrackingData=True)
    observations, actions, rewards, episode_info = dataset_maker.createEpisodeDataset()
    terminals = np.array([ 0 if (i+1) % dataset_maker.lim == 0 else 1 for i in range(len(actions)) ])
    return MDPDataset(
        observations,
        actions,
        rewards, 
        terminals,
    ), observations

In [3]:
dataset, observations = create_dataset()
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2, shuffle=False)

100%|██████████| 3/3 [00:00<00:00,  4.40it/s]
100%|██████████| 1105/1105 [00:00<00:00, 3358.71it/s]


Events length:  5232


In [5]:
len(test_episodes)

120

In [None]:
s = {}
d = CreateDataset()
for o in observations:
    a = o[-3]
    action = d.ID_to_str[a]

    if action in s: s[action] += 1
    else: s[action] = 1

s

In [None]:
# setup CQL algorithm
cql = DiscreteCQL(use_gpu=False, batch_size=32,)

# env = FootballEnv(observations)
# env.counter = 0

# start training
output = cql.fit(

    train_episodes,
    # eval_episodes=test_episodes,
    n_epochs=25,
    
    scorers={
        # 'environment': evaluate_on_environment(env), # evaluate with Football Env
        'advantage': discounted_sum_of_advantage_scorer, # smaller is better
        'td_error': td_error_scorer, # smaller is better
        'value_scale': average_value_estimation_scorer # smaller is better
    }
    
)

In [None]:
output

In [None]:
# TODO: MAKE THIS WORK!

# off-policy evaluation algorithm
fqe = DiscreteFQE(algo=cql)

# train estimators to evaluate the trained policy
fqe.fit(test_episodes,
   eval_episodes=test_episodes,
   n_epochs=50,
   scorers={
      'init_value': initial_state_value_estimation_scorer,
      'soft_opc': soft_opc_scorer(return_threshold=600)
   }
)

Load Saved Model

In [2]:
path =  "C:\\Users\\micha\\Documents\\Masters\\Football-RL\\d3rlpy_logs\\DiscreteCQL_20220210223510\\"
m2 = DiscreteCQL.from_json(f'{path}params.json')
# d3rlpy_logs\DQN_20220410175914\model_2500.pt
# ready to load
m2.load_model(f'{path}model_36975.pt')



In [10]:
def get_softmaxed(model, state, possible_actions):
    values = [i for i in range(len(possible_actions))]

    smallest = float(math.inf)
    for action in possible_actions:
        values[action] = model.predict_value([state], [action])
        smallest = min(smallest, values[action])
        
    print("before before" , values)
    smallest = abs(smallest)
    # make all positive
    for action in possible_actions:
        print(action)
        values[action] += smallest

    print("before", values)
    values = np.array(values)
    values = np.exp(values) / np.sum(np.exp(values), axis=0)
    return values
    


Visualise predictions

In [4]:
ds = CreateDataset()
# ds.loadFilesFromDir('events/*.json')
ds.loadTrackingContentFromDir('three-sixty/*.json')

ds.loadFilesFromDir('events/*.json', filterGamesWithoutTrackingData=True)
# ds.loadFile('data.json')

visualiser = Visualiser()

# ds.loadFilesFromDir('events/*.json')

print("hello")
observations, actions, rewards, event_ids = ds.createEpisodeDataset()
answers = {}
players = {}

a_set = set()
for a in actions:
    a_set.add(a)

for item in a_set:
    print(ds.ID_to_str[item])


print(observations.shape, len(event_ids))
for i, situation in tqdm(enumerate(observations)):

    # print(situation)
    predictions = m2.predict([situation])[0]
    g_s_m = get_softmaxed(m2, situation, list(ds.good_events.values()))
    print(g_s_m, sum(g_s_m))

    print("One Hot", m2.predict([situation])[0])
    for key, value in ds.good_events.items():
        print(key, value)
        print(f"{key}: ", m2.predict_value([situation], [value]))
        
    p = ds.ID_to_str[predictions]
    if not (p in answers): answers[p] = 1
    else: answers[p] += 1

    # if(p == "shot"):
    #     print(event_ids[i])
    #     print([ds.getPlayerFromActionID(event) for event in event_ids[i]])
    event = ds.getActionFromID(event_ids[i][-2])
    
    if(p == "shot"):

        event = ds.getActionFromID(event_ids[i][-2])
        visualiser.visualise_sequence(situation, 3, predictions, ds.getIDFromAction(event))

    player = event['player']
    if not(player['id'] in players):
        players[player['id']] = {}
        players[player['id']]['name'] = player['name']
        players[player['id']]['good'] = 0
        players[player['id']]['bad'] = 0
        

    # print("Action taken:", ds.ID_to_str[ds.getIDFromAction(event)], p)
    if predictions ==  ds.getIDFromAction(event):
        players[player['id']]['good'] += 1
    else:
        players[player['id']]['bad'] += 1

    if((players[player['id']]['good'] + players[player['id']]['bad']) > 0):
        players[player['id']]['ratio'] = players[player['id']]['good'] / (players[player['id']]['good'] + players[player['id']]['bad']) 
    else:
        players[player['id']]['ratio'] = 0
        
    
    # if(p == "clearance"):
    #     visualiser.visualise_sequence(situation, 3, predictions)

print("finished")

In [None]:
players_2 = {}
for key, player in players.items():
    if 'good' in player and 'bad' in player and (player['good'] + player['bad'] > 5):
        players_2[key] = player


players_sorted =  sorted(players_2.items(), key=lambda x: x[1]['ratio'], reverse=True)
players_sorted[:10]


In [None]:

players_sorted =  sorted(players_2.items(), key=lambda x: x[1]['ratio'], reverse=True)
players_sorted[:10]


In [None]:
counts = {}

for item in actions:
    if not (item in counts): counts[item] = 1
    else: counts[item] += 1

counts


In [None]:
self.good_events = {
    'pass': self.PASS,
    'shot': self.SHOOT,
    'carry':self.CARRY,
    'clearance':self.CLEAR,
    # 'foul won': self.FOUL,
    'foul': self.FOUL,
}

In [None]:
import json

player_data = {}
events = {}
with open ("data.json", "r") as file:
    events = json.load(file)

ds.ID_to_str[predictions]

count = 0
for event in events:
    if event['type']['name'].lower() in ds.good_events:
        player = event['player']

        if player['id'] in player_data:
        else:
            player_data['id'] = {}
            player_data['id']['name'] = player['name']
            player_data['id']['good'] += 1

count


Training using images

In [25]:
from d3rlpy.datasets import get_cartpole
from d3rlpy.algos import DiscreteCQL, DQN, CQL
from d3rlpy.metrics.scorer import discounted_sum_of_advantage_scorer
from d3rlpy.metrics.scorer import evaluate_on_environment
from d3rlpy.dataset import Episode
from d3rlpy.ope import DiscreteFQE
from d3rlpy.dataset import MDPDataset

from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from sklearn.model_selection import train_test_split

import import_ipynb
import numpy as np
from random import random
from create_dataset import CreateDataset
from FootballEnv import FootballEnv
from tqdm import tqdm

from view import Visualiser
import math
# metrics to evaluate with
from d3rlpy.metrics.scorer import initial_state_value_estimation_scorer
from d3rlpy.metrics.scorer import soft_opc_scorer


def create_dataset_images():

    datasetMaker = CreateDataset()
    datasetMaker.loadTrackingContentFromDir('three-sixty/*.json')
    datasetMaker.file_limit = 5
    datasetMaker.loadFilesFromDir('events/*.json', filterGamesWithoutTrackingData=True)
    observations, actions, rewards, event_ids, terminals = datasetMaker.createImageDataset()

    print("Actions shape:", actions.shape)
    print("Observation shape:", observations.shape)

    return MDPDataset(
        observations,
        np.array(actions, dtype=np.float32),
        rewards, 
        terminals,
    )

In [26]:
dataset = create_dataset_images()

print(len(dataset))
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2, shuffle=False)
len(dataset), len(train_episodes)

  2%|▏         | 1/50 [00:00<00:06,  7.04it/s]

Loading tracking content:


100%|██████████| 50/50 [00:10<00:00,  4.95it/s]
 58%|█████▊    | 641/1105 [00:00<00:00, 7122.72it/s]
  weights = 1 / dist
  weights /= weights.sum(axis=0)
100%|██████████| 1821/1821 [00:16<00:00, 109.19it/s]


Actions shape: (1821, 3)
Observation shape: (1821, 3, 160, 240)
205


(205, 164)

In [27]:
# setup td3 algorithm
cql = CQL(use_gpu=False, scaler='pixel', n_frames=1)

# start training
output = cql.fit(

    dataset,
    n_epochs = 50,
    # batch_size = 64,
    # eval_episodes=test_episodes,
    # n_epochs=25,
    
    scorers={
        # 'environment': evaluate_on_environment(env), # evaluate with Football Env
        'advantage': discounted_sum_of_advantage_scorer, # smaller is better
        'td_error': td_error_scorer, # smaller is better
        'value_scale': average_value_estimation_scorer # smaller is better
    }
    
)

AssertionError: The action-space of the given dataset is not compatible with the algorithm. Please use discrete action-space algorithms. The algorithms list is available below.
https://d3rlpy.readthedocs.io/en/v1.0.0/references/algos.html

In [None]:
import cv2
# d3rlpy_logs\DQN_20220410175914\model_2500.pt

In [12]:
path =  "C:\\Users\\micha\\Documents\\Masters\\Football-RL\\d3rlpy_logs\\DQN_20220410175914\\"
dqn = DQN.from_json(f'{path}params.json')
# 
# ready to load
dqn.load_model(f'{path}model_2500.pt')



In [18]:
# ds = CreateDataset()
# ds.loadTrackingContentFromDir('three-sixty/*.json')
# ds.file_limit = 10
# ds.loadFilesFromDir('events/*.json', filterGamesWithoutTrackingData=True)
# observations, actions, rewards, event_ids, terminals = ds.createImageDataset()

# answers = {}
# players = {}

import cv2
# for item in a_set:
#     print(ds.ID_to_str[item])

print(observations.shape, len(event_ids))
for i, situation in tqdm(enumerate(observations[:4])):

    print(situation[0].shape)
    cv2.imshow("image1", situation[0])
    cv2.imshow("image2", situation[1])
    cv2.imshow("image3", situation[2])

    cv2.waitKey(0)
    cv2.destroyAllWindows()

    predictions = dqn.predict([situation])
    print(actions.shape)
    print(actions[0])

    print("PREDICTIONS !!!!!!!!!!!!")
    print(predictions)

    break
    
    # if(p == "shot"):
    #     print(event_ids[i])
    #     print([ds.getPlayerFromActionID(event) for event in event_ids[i]])
    # event = ds.getActionFromID(event_ids[i][-2])
    

0it [00:00, ?it/s]

(1821, 3, 160, 240) 0
(160, 240)


0it [00:01, ?it/s]

(1821, 3)
[0. 4. 4.]
PREDICTIONS !!!!!!!!!!!!
[3]



