## Sanity Test DB

### SQL

In [1]:
import sqlite3

con = sqlite3.connect('srcftbl.db')
cur = con.cursor()

In [2]:
cur.execute("SELECT name FROM sqlite_master WHERE type='table';")

print(cur.fetchall())

[('metadata',), ('track',), ('possession',), ('frame',), ('player',)]


In [18]:
cur.execute("PRAGMA table_info('track')")
print(cur.fetchall())

[(0, 'index', 'INTEGER', 0, None, 0), (1, 'game_id', 'TEXT', 0, None, 0), (2, 'frame', 'INTEGER', 0, None, 0), (3, 'track_id', 'INTEGER', 0, None, 0), (4, 'trackable_object', 'TEXT', 0, None, 0), (5, 'is_visible', 'INTEGER', 0, None, 0), (6, 'x', 'REAL', 0, None, 0), (7, 'y', 'REAL', 0, None, 0), (8, 'z', 'REAL', 0, None, 0)]


### Parquet

In [1]:
import pandas as pd

from pyspark import SparkConf,SparkContext
conf = SparkConf()
conf.set('spark.setLogLevel', 'ERROR')
SparkContext(conf=conf)

import pyspark.pandas as ps

ps.set_option('compute.ops_on_diff_frames', True)

23/12/11 17:39:42 WARN Utils: Your hostname, ALDO-DESKTOP resolves to a loopback address: 127.0.1.1; using 172.19.19.57 instead (on interface eth0)
23/12/11 17:39:42 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/12/11 17:39:43 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
23/12/11 17:39:43 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


In [2]:
track_df = pd.read_parquet("track.parquet")

track_df.sample(5)

Unnamed: 0,game_id,frame,track_id,trackable_object,is_visible,x,y,z
1387173,10009,18381,504,504,False,20.58,25.45,
1575970,10009,29119,18862,18862,False,-9.45,-18.42,
1444396,10009,21885,4686,4686,True,25.49,20.04,
632042,10000,34544,11537,11537,True,-29.96,-0.14,
1419324,10009,20130,4174,4174,True,-32.53,-16.51,


In [5]:
track_df.sample(5).to_csv('track_sample.csv', index=False)

In [6]:
player_df = pd.read_parquet("player.parquet")

player_df.sample(5)

Unnamed: 0,player_role,start_time,end_time,number,yellow_card,red_card,injured,goal,own_goal,team_player_id,team_id,id,first_name,last_name,short_name,birthday,trackable_object,gender,game_id
29,"{'id': 10, 'name': 'Right Midfield', 'acronym'...",00:00:00,,14,0,0,False,0,0,1088,2,5308,Jordan,Henderson,J. Henderson,1990-06-17,5318,male,10000
86,"{'id': 5, 'name': 'Left Wing Back', 'acronym':...",00:00:00,,15,0,0,False,0,0,33440,32,12504,Jamal,Lewis,J. Lewis,1998-01-25,12656,male,10013
77,"{'id': 14, 'name': 'Left Forward', 'acronym': ...",00:00:00,01:07:19,10,0,0,False,0,0,23844,52,11172,Daniel,Castelo Podence,Daniel Podence,1995-10-21,11192,male,10013
34,"{'id': 5, 'name': 'Left Wing Back', 'acronym':...",00:00:00,,26,0,0,False,0,0,5532,2,841,Andrew,Robertson,A. Robertson,1994-03-11,851,male,10000
69,"{'id': 4, 'name': 'Right Center Back', 'acrony...",00:00:00,,35,0,0,False,0,0,7434,58,4703,Jan,Bednarek,J. Bednarek,1996-04-12,4713,male,10009


In [7]:
player_df.sample(5).to_csv('player_sample.csv', index=False)

In [8]:
frame_df = pd.read_parquet("frame.parquet")

frame_df.sample(5).to_csv('frame_sample.csv', index=False)

In [10]:
possession_df = pd.read_parquet("possession.parquet")

possession_df.sample(5).to_csv('possession_sample.csv', index=False)

In [12]:
metadata_df = pd.read_parquet("metadata.parquet")

metadata_df.to_csv('metadata_sample.csv', index=False)

In [45]:
# player_df.createOrReplaceTempView("PlayerTable")
# parkSQL = spark.sql("select * from ParquetTable where game_id ")

frame_df.loc[100]

index                                                                     110
game_id                                                                 10017
frame                                                                     110
image_corners_projection    [-58.15015447923706, 12.570352614219146, -20.5...
period                                                                      1
timestamp_in_seconds                                                       10
Name: 100, dtype: object

In [53]:
possession_df.loc[10000]

index                   12342
game_id                 10017
group               home team
trackable_object         <NA>
Name: 10000, dtype: object

In [10]:
player_df.dtypes

player_role         object
start_time          object
end_time            object
number               int64
yellow_card          int64
red_card             int64
injured               bool
goal                 int64
own_goal             int64
team_player_id       int64
team_id              int64
id                   int64
first_name          object
last_name           object
short_name          object
birthday            object
trackable_object     int64
gender              object
game_id              int64
dtype: object

### Get all the players from a specific game

In [7]:
game_id = 10000

In [12]:
game_track_objects_df = player_df[player_df['game_id'] == game_id][['game_id', 'trackable_object']]

game_track_objects_df.shape

(36, 2)

In [13]:
trackable_objects_list = game_track_objects_df['trackable_object'].tolist()

len(trackable_objects_list)



36

36 players from both teams (starters and subtitutes).

### Get all the frames from a specific game

In [8]:
game_frames_df = frame_df[frame_df['game_id'] == game_id][['game_id', 'frame', 'period', 'timestamp_in_seconds']]

game_frames_df.shape

(45697, 4)

In [11]:
game_frames_df.head(1)

Unnamed: 0,game_id,frame,period,timestamp_in_seconds
0,10000,10,1,0


### Get all X & Y coordinate from Track Based on previous queries

In [12]:
track_df.shape

(3036907, 8)

In [17]:
game_tracks_df = track_df.loc[
    (track_df['game_id']==game_id) 
]

game_tracks_df = game_tracks_df.loc[game_tracks_df['trackable_object'] != "55"]

game_tracks_df.shape

(1005334, 8)

In [18]:
game_tracks_df.head(1)

                                                                                

Unnamed: 0,game_id,frame,track_id,trackable_object,is_visible,x,y,z
0,10000,10,11565,11565,False,39.02,-1.15,


Join the frame and track dataframes on the frame to create a dataframe that I will be working on

In [20]:
len(game_tracks_df['trackable_object'].unique())

26

In [19]:
game_df = game_tracks_df.merge(game_frames_df, how="inner", on=["game_id", "frame"])

game_df.shape

(1005334, 10)

In [18]:
game_df.dtypes

game_id                  object
frame                     int64
track_id                  int64
trackable_object         object
is_visible                 bool
x                       float64
y                       float64
z                       float64
period                    int64
timestamp_in_seconds      int64
dtype: object

In [21]:
game_df['trackable_object'] = game_df['trackable_object'].astype(int)

In [20]:
game_df.head(1)

                                                                                

Unnamed: 0,game_id,frame,track_id,trackable_object,is_visible,x,y,z,period,timestamp_in_seconds
0,10000,10,11565,11565,False,39.02,-1.15,,1,0


Now we have a dataframe where "intense" in 5-minute window and "spread" in 2-minute window can be calculated

### 5-minute Intensity Write up

The definition of intense according Merriam Webster is "existing in an extreme degree; : exhibiting strong feeling or earnestness of purpo".
I choose to interpret this as hitting the top average speed in 5-minute window for each player, with or without possession of the ball. FOr simplicity purpose, the 5-minute segment starts at minute mark, e.g. 00:01:00 or 01:33:00.e"

Calculate the distance travel for each frame for each player

In [23]:
game_indexed_df = game_df.copy(True)
game_indexed_df.set_index(["trackable_object"], inplace=True)

In [16]:
game_indexed_df.head(10)

Unnamed: 0_level_0,game_id,frame,track_id,is_visible,x,y,z,period,timestamp_in_seconds
trackable_object,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
11565,10000,10,11565,False,39.02,-1.15,,1,0
3267,10000,10,3267,False,19.51,-4.78,,1,0
11537,10000,10,11537,False,19.55,-14.61,,1,0
11847,10000,10,11847,False,15.94,7.82,,1,0
851,10000,10,851,False,15.29,-22.76,,1,0
3847,10000,10,3847,False,10.45,-8.03,,1,0
5318,10000,10,5318,False,9.57,-0.75,,1,0
9143,10000,10,9143,False,0.86,-13.39,,1,0
2776,10000,10,2776,False,2.24,12.74,,1,0
9449,10000,10,9449,False,4.76,-26.24,,1,0


In [17]:
player_id = 11565
test_df = game_indexed_df.loc[player_id].set_index('frame')

In [50]:
test_df.shape

(45697, 8)

In [37]:
test_df.head(10)

Unnamed: 0_level_0,game_id,track_id,is_visible,x,y,z,period,timestamp_in_seconds
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10,10000,11565,False,39.02,-1.15,,1,0
11,10000,11565,False,39.06,-1.03,,1,0
12,10000,11565,False,39.11,-0.92,,1,0
13,10000,11565,False,39.17,-0.82,,1,0
14,10000,11565,False,39.24,-0.73,,1,0
15,10000,11565,False,39.34,-0.64,,1,0
16,10000,11565,False,39.43,-0.55,,1,0
17,10000,11565,False,39.51,-0.5,,1,0
18,10000,11565,False,39.56,-0.48,,1,0
19,10000,11565,False,39.54,-0.52,,1,0


In [19]:
test_df['delta_x'] = test_df['x'].diff()
test_df = test_df.sort_index()
test_df['delta_y'] = test_df['y'].diff()
test_df = test_df.sort_index()

test_df.head(10)

23/12/09 10:44:43 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/12/09 10:44:43 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/12/09 10:44:43 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/12/09 10:44:43 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/12/09 10:44:43 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/12/09 10:44:43 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/12/09 1

Unnamed: 0_level_0,game_id,track_id,is_visible,x,y,z,period,timestamp_in_seconds,delta_x,delta_y
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10,10000,11565,False,39.02,-1.15,,1,0,,
11,10000,11565,False,39.06,-1.03,,1,0,0.04,0.12
12,10000,11565,False,39.11,-0.92,,1,0,0.05,0.11
13,10000,11565,False,39.17,-0.82,,1,0,0.06,0.1
14,10000,11565,False,39.24,-0.73,,1,0,0.07,0.09
15,10000,11565,False,39.34,-0.64,,1,0,0.1,0.09
16,10000,11565,False,39.43,-0.55,,1,0,0.09,0.09
17,10000,11565,False,39.51,-0.5,,1,0,0.08,0.05
18,10000,11565,False,39.56,-0.48,,1,0,0.05,0.02
19,10000,11565,False,39.54,-0.52,,1,0,-0.02,-0.04


In [36]:
test_df.dtypes

game_id                  object
track_id                  int64
is_visible                 bool
x                       float64
y                       float64
z                       float64
period                    int64
timestamp_in_seconds      int64
delta_x                 float64
delta_y                 float64
dtype: object

In [44]:
import math

def calculate_distance(delta_x, delta_y):
    return math.sqrt(delta_x**2 + delta_y**2)

In [52]:
test_df = test_df.to_pandas()

AttributeError: 'DataFrame' object has no attribute 'to_pandas'

In [54]:
test_df.head(5)

Unnamed: 0_level_0,game_id,track_id,is_visible,x,y,z,period,timestamp_in_seconds,delta_x,delta_y
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10,10000,11565,False,39.02,-1.15,,1,0,,
11,10000,11565,False,39.06,-1.03,,1,0,0.04,0.12
12,10000,11565,False,39.11,-0.92,,1,0,0.05,0.11
13,10000,11565,False,39.17,-0.82,,1,0,0.06,0.1
14,10000,11565,False,39.24,-0.73,,1,0,0.07,0.09


In [56]:
test_df['eucl_dist'] = test_df.apply(lambda x: square_distance(x.delta_x, x.delta_y), axis=1)

test_df.head(10)

Unnamed: 0_level_0,game_id,track_id,is_visible,x,y,z,period,timestamp_in_seconds,delta_x,delta_y,eucl_dist
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10,10000,11565,False,39.02,-1.15,,1,0,,,
11,10000,11565,False,39.06,-1.03,,1,0,0.04,0.12,0.126491
12,10000,11565,False,39.11,-0.92,,1,0,0.05,0.11,0.12083
13,10000,11565,False,39.17,-0.82,,1,0,0.06,0.1,0.116619
14,10000,11565,False,39.24,-0.73,,1,0,0.07,0.09,0.114018
15,10000,11565,False,39.34,-0.64,,1,0,0.1,0.09,0.134536
16,10000,11565,False,39.43,-0.55,,1,0,0.09,0.09,0.127279
17,10000,11565,False,39.51,-0.5,,1,0,0.08,0.05,0.09434
18,10000,11565,False,39.56,-0.48,,1,0,0.05,0.02,0.053852
19,10000,11565,False,39.54,-0.52,,1,0,-0.02,-0.04,0.044721


In [96]:
five_minute_distances = {}
one_minute = 60
five_minute = 5 * one_minute

for p in range(1, 3):
    if p == 1:
        i = 0     # start of 1st half
    else:
        i = 2700  # start of 2nd half

    half_df = test_df.loc[test_df['period'] == p]
    end_of_half = half_df['timestamp_in_seconds'].max()
    while i < end_of_half:
        five_minute_distances[i/one_minute, (i+five_minute)/one_minute, p] = \
            half_df.loc[half_df['timestamp_in_seconds'].between(i, i+five_minute, inclusive='left')]['eucl_dist'].sum()
        i += one_minute # shift 5-minute window to the next minute 

In [97]:
five_minute_distances

{(0.0, 5.0, 1): 240.2196529959379,
 (1.0, 6.0, 1): 222.64018183164308,
 (2.0, 7.0, 1): 205.57206733357023,
 (3.0, 8.0, 1): 217.2652204325521,
 (4.0, 9.0, 1): 227.21912822285626,
 (5.0, 10.0, 1): 226.31191409610426,
 (6.0, 11.0, 1): 187.3145676342383,
 (7.0, 12.0, 1): 130.03750763499653,
 (8.0, 13.0, 1): 96.87338463039109,
 (9.0, 14.0, 1): 110.99135848405336,
 (10.0, 15.0, 1): 112.62415651726573,
 (11.0, 16.0, 1): 156.32127227971068,
 (12.0, 17.0, 1): 205.91862992928017,
 (13.0, 18.0, 1): 241.04032156232176,
 (14.0, 19.0, 1): 239.45359388154807,
 (15.0, 20.0, 1): 257.85628870640164,
 (16.0, 21.0, 1): 258.5324458948519,
 (17.0, 22.0, 1): 239.92912138696437,
 (18.0, 23.0, 1): 241.8003581554322,
 (19.0, 24.0, 1): 244.69741318668312,
 (20.0, 25.0, 1): 224.48990062717567,
 (21.0, 26.0, 1): 241.58412350419744,
 (22.0, 27.0, 1): 251.2769561834868,
 (23.0, 28.0, 1): 244.2169652667775,
 (24.0, 29.0, 1): 235.24401966293743,
 (25.0, 30.0, 1): 242.3102788159049,
 (26.0, 31.0, 1): 259.8026591534812,

In [98]:
max(five_minute_distances, key=five_minute_distances.get)

(45.0, 50.0, 2)

## 2-Minute Spread

In [75]:
test_df = game_indexed_df.loc[game_indexed_df['frame'] == 100]

test_df

Unnamed: 0_level_0,game_id,frame,track_id,is_visible,x,y,z,period,timestamp_in_seconds
trackable_object,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
11565,10000,100,11565,True,44.73,-4.09,,1,9
3267,10000,100,3267,True,31.96,-16.95,,1,9
11537,10000,100,11537,True,32.49,-32.07,,1,9
11847,10000,100,11847,True,29.22,-2.55,,1,9
851,10000,100,851,True,25.5,-30.95,,1,9
3847,10000,100,3847,True,21.32,-26.24,,1,9
5318,10000,100,5318,True,23.16,-19.15,,1,9
9143,10000,100,9143,False,7.02,-23.02,,1,9
2776,10000,100,2776,False,15.42,2.07,,1,9
9449,10000,100,9449,False,14.19,-30.67,,1,9


In [31]:
test_df['x'].sum()

311.66

In [32]:
test_df['y'].sum()

-351.82

In [76]:
centroid_df = test_df.groupby(['frame', 'period', 'timestamp_in_seconds']).agg(
    centroid_x = pd.NamedAgg('x','avg'),
    centroid_y = pd.NamedAgg('y','avg'),
).reset_index()
centroid_df

Unnamed: 0,frame,period,timestamp_in_seconds,centroid_x,centroid_y
0,100,1,9,14.166364,-15.991818


In [77]:
test_df = test_df.merge(centroid_df, how="outer", on=["frame", "period", "timestamp_in_seconds"], left_index=False, right_index=True)

test_df

Unnamed: 0_level_0,game_id,frame,track_id,is_visible,x,y,z,period,timestamp_in_seconds,centroid_x,centroid_y
trackable_object,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
11565,10000,100,11565,True,44.73,-4.09,,1,9,14.166364,-15.991818
3267,10000,100,3267,True,31.96,-16.95,,1,9,14.166364,-15.991818
11537,10000,100,11537,True,32.49,-32.07,,1,9,14.166364,-15.991818
11847,10000,100,11847,True,29.22,-2.55,,1,9,14.166364,-15.991818
851,10000,100,851,True,25.5,-30.95,,1,9,14.166364,-15.991818
3847,10000,100,3847,True,21.32,-26.24,,1,9,14.166364,-15.991818
5318,10000,100,5318,True,23.16,-19.15,,1,9,14.166364,-15.991818
9143,10000,100,9143,False,7.02,-23.02,,1,9,14.166364,-15.991818
2776,10000,100,2776,False,15.42,2.07,,1,9,14.166364,-15.991818
9449,10000,100,9449,False,14.19,-30.67,,1,9,14.166364,-15.991818


In [62]:
test_df = test_df.to_pandas()



In [82]:
def calculate_distance(delta_x, delta_y):
    result = math.sqrt(delta_x**2 + delta_y**2)
    return result

def calculate_distance_to_centroid(df):
    delta_x = df['x'] - df['centroid_x']
    delta_y = df['y'] - df['centroid_y']
    return calculate_distance(delta_x, delta_y)

In [83]:
test_df['dist_to_centroid'] = test_df.apply(lambda x: calculate_distance_to_centroid(x), axis=1)



In [84]:
test_df

Unnamed: 0_level_0,game_id,frame,track_id,is_visible,x,y,z,period,timestamp_in_seconds,centroid_x,centroid_y,dist_to_centroid
trackable_object,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
11565,10000,100,11565,True,44.73,-4.09,,1,9,14.166364,-15.991818,32.799225
3267,10000,100,3267,True,31.96,-16.95,,1,9,14.166364,-15.991818,17.819417
11537,10000,100,11537,True,32.49,-32.07,,1,9,14.166364,-15.991818,24.377522
11847,10000,100,11847,True,29.22,-2.55,,1,9,14.166364,-15.991818,20.181537
851,10000,100,851,True,25.5,-30.95,,1,9,14.166364,-15.991818,18.766953
3847,10000,100,3847,True,21.32,-26.24,,1,9,14.166364,-15.991818,12.49799
5318,10000,100,5318,True,23.16,-19.15,,1,9,14.166364,-15.991818,9.532031
9143,10000,100,9143,False,7.02,-23.02,,1,9,14.166364,-15.991818,10.023266
2776,10000,100,2776,False,15.42,2.07,,1,9,14.166364,-15.991818,18.105272
9449,10000,100,9449,False,14.19,-30.67,,1,9,14.166364,-15.991818,14.678201


In [87]:
spread_df = test_df.groupby(['frame', 'period', 'timestamp_in_seconds'])['dist_to_centroid'].sum().reset_index()



In [88]:
spread_df

Unnamed: 0,frame,period,timestamp_in_seconds,dist_to_centroid
0,100,1,9,418.556966


In [90]:
two_minute_spread = {}
one_minute = 60
two_minute = 2 * one_minute

for p in range(1,3):
    if p == 1:
        i = 0     # start of 1st half
    else:
        i = 2700  # start of 2nd half

    half_df = spread_df.loc[spread_df['period'] == p]
    end_of_half = half_df['timestamp_in_seconds'].max()

    while i < end_of_half:
        two_minute_spread[i/one_minute, (i+two_minute)/one_minute, p] = \
            half_df.loc[half_df['timestamp_in_seconds'].between(i, i+two_minute, inclusive='left')]['dist_to_centroid'].sum()
        i += one_minute # shift 2-minute window to the next minute

two_minute_spread

{(0.0, 2.0, 1): 418.55696568692554}

----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 38902)
Traceback (most recent call last):
  File "/home/reyadji/.pyenv/versions/3.10.12/lib/python3.10/socketserver.py", line 316, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/home/reyadji/.pyenv/versions/3.10.12/lib/python3.10/socketserver.py", line 347, in process_request
    self.finish_request(request, client_address)
  File "/home/reyadji/.pyenv/versions/3.10.12/lib/python3.10/socketserver.py", line 360, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/home/reyadji/.pyenv/versions/3.10.12/lib/python3.10/socketserver.py", line 747, in __init__
    self.handle()
  File "/home/reyadji/.pyenv/versions/3.10.12/lib/python3.10/site-packages/pyspark/accumulators.py", line 295, in handle
    poll(accum_updates)
  File "/home/reyadji/.pyenv/versions/3.10.12/lib/python3.10/site-packages/pyspark/accumulators.