In [1]:
import sys
import os

sys.path.append(os.path.abspath(".."))

In [2]:
import requests
import json
import time
import pandas as pd
from sqlalchemy import create_engine
import hashlib
from functions.utils import create_round_features

### Get data

In [3]:
with open('../data/detailed_scrapped/all_detailed_players.json', 'r') as f:
    players = json.load(f)

In [5]:
players

[{'answer': {'data': {'id': '504e58bb4d8bec9a67000187',
    'name': 'Koke',
    'slug': '67047092',
    'role': 'centrocampista',
    'role2': '',
    'photo': '67047092.png',
    'points': 61,
    'value': 21049633,
    'team': 'Atlético de Madrid',
    'logo': 'atletico-de-madrid.png',
    'teamId': '504e581e4d8bec9a670000c8',
    'status': '',
    'rating': 5,
    'average': {'average': 5.083333333333333,
     'homeAverage': 5.833333333333333,
     'awayAverage': 4.333333333333333,
     'averageLastFive': 4.4,
     'matches': 12,
     'fitness': [2, 8, 5, 5, 2]},
    'x': 'adidas2',
    'change': 85457,
    'computer': False,
    'releaseDate': '2025-11-08T05:21:59.056Z',
    'total': {'points': 61, 'played': 12}},
   'prices': [{'_id': '691d38aeee5bde071c21543a',
     'c': 8778286,
     's': 28699277,
     'date': '2025-11-19T03:25:34.352Z',
     'price': 20280310},
    {'_id': '691e8a41db7f0a06fc2200ec',
     'c': 8775886,
     's': 29409658,
     'date': '2025-11-20T03:25:53.202Z

In [4]:
combined_data = players #+ players_2

In [5]:
all_samples = []
for player_data in combined_data:
    answer = player_data['answer']
    player_info = answer['data']
    
    historical_matches = answer.get('points', [])
    if len(historical_matches) < 4:
        continue
    
    prices = [p['price'] for p in answer['prices']]
    max_price = max(prices)
    min_price = min(prices)
    for i in range(len(historical_matches) - 3):
        match_window = historical_matches[i:i+4]
        feature_matches = match_window[:3]
        target_match = match_window[3]
        
        feature_points = [m['points'] for m in feature_matches]
        last_3_average = sum(feature_points) / 3
        
        home_average = player_info['average']['homeAverage']
        away_average = player_info['average']['awayAverage']
        
        is_home_target = target_match['isHomeTeam']
        target_points = target_match['points']
        round_number = target_match['round']
        
        sample_dict = {
            'player_id': player_info['id'],
            'name': player_info['name'],
            'team': player_info['team'],
            'role': player_info['role'],
            'round': round_number,
            'home_average': home_average,
            'away_average': away_average,
            'overall_average': player_info['average']['average'],
            'last_3_average': last_3_average,
            'current_price': player_info['value'],
            'max_price': max_price,
            'min_price': min_price,
            'is_home_target': is_home_target,
            'match_minus_1': feature_matches[2]['points'],
            'match_minus_2': feature_matches[1]['points'],
            'match_minus_3': feature_matches[0]['points'],
            'target_points': target_points}
        
        all_samples.append(sample_dict)

df = pd.DataFrame(all_samples)

In [6]:
def generate_unique_id(row):
    unique_string = f"{row['player_id']}_{row['round']}"
    return hashlib.sha256(unique_string.encode()).hexdigest()

# Apply function to create new column
df['unique_id'] = df.apply(generate_unique_id, axis=1)

In [7]:
df.shape

(202, 18)

In [88]:
df_unique=df.drop_duplicates(subset=['unique_id'])

In [91]:
df.iloc[0]

player_id                                   504e58bb4d8bec9a67000187
name                                                            Koke
team                                              Atlético de Madrid
role                                                  centrocampista
round                                                              4
home_average                                                5.833333
away_average                                                4.333333
overall_average                                             5.083333
last_3_average                                              0.666667
current_price                                               21049633
max_price                                                   21084293
min_price                                                   20280310
is_home_target                                                  True
match_minus_1                                                      2
match_minus_2                     

### Upload to Postgre

In [53]:
player_features=pd.read_csv('data/player_features.csv')

In [54]:
player_features.shape

(60, 23)

In [55]:
username = 'rodrigo'
# password = 'your_password'   
host = 'localhost'           
port = '5432'               
database = 'futmondo'

In [57]:
engine = create_engine(f'postgresql+psycopg2://{username}@{host}:{port}/{database}')

In [58]:
player_features.to_sql('players_features', engine, if_exists='replace', index=False)

60

In [59]:
query = "SELECT * FROM players_features"
df_from_db = pd.read_sql(query, engine)
df_from_db.shape

(60, 23)

In [61]:
df_from_db.head(2)

Unnamed: 0,player_id,name,team,role,total_points,matches_played,average,home_average,away_average,last_5_average,...,is_home_next,opponent_next,team_win_prob,draw_prob,opponent_win_prob,match_minus_1,match_minus_2,match_minus_3,match_minus_4,match_minus_5
0,504e58bb4d8bec9a67000187,Koke,Atlético de Madrid,centrocampista,61,12,5.083333,5.833333,4.333333,4.4,...,True,R. Oviedo,79.894209,13.554466,6.551325,2,5,5,8,2
1,55a975025bcb491d02476ab2,Álvaro Lemos,R. Oviedo,defensa,0,0,0.0,0.0,0.0,0.0,...,False,Atlético de Madrid,6.551325,13.554466,79.894209,0,0,0,0,0


## New detailed json

In [4]:
import json

In [5]:
with open('../data/detailed_scrapped/all_detailed_players_def.json', 'r', encoding='utf-8') as f:
        players_data = json.load(f)

In [10]:
df_rolling = create_round_features('../data/detailed_scrapped/all_detailed_players_def.json', target_rounds=[14, 15, 16, 17])

  return pd.concat(all_rounds, ignore_index=True)


In [11]:
# Example usage:
# If fitness = [10, 2, 16, 3, 12] and represents rounds 13-17:
#
# Predict round 18 (next round):
# df = create_round_features('players.json', target_rounds=[18])
# Result: match_minus_1=12, match_minus_2=3, target_points=None
#
# Train on historical rounds:
# df = create_round_features('players.json', target_rounds=[14, 15, 16, 17])
# Round 14: match_minus_1=10, match_minus_2=0 (not available), target=2
# Round 15: match_minus_1=2, match_minus_2=10, target=16
# Round 16: match_minus_1=16, match_minus_2=2, target=3
# Round 17: match_min

In [12]:
df_rolling.shape

(2076, 16)

 'slug': '11022221',
  'role': 'delantero',
  'role2': '',
  'photo': '11022221.png',
  'points': 99,
  'value': 41949009,
  'status': '',
  'rating': 6,
  'computer': False,
  'dbp': True,
  'average': {'average': 6.6,
   'homeAverage': 7,
   'awayAverage': 6.25,
   'averageLastFive': 8.6,
   'matches': 15,
   'fitness': [10, 2, 16, 3, 12]},

In [14]:
df_rolling[df_rolling['name']=='Muriqi']

Unnamed: 0,player_id,name,role,round,team_id,home_average,away_average,overall_average,current_price,matches_played,rating,match_minus_1,match_minus_2,last_2_average,target_points,unique_id
406,5f614957d59f315e1fc77bd2,Muriqi,delantero,14,504e581e4d8bec9a670000d2,7.0,6.25,6.6,41949009,15,6,2,10,6.0,16.0,189f7b1b712472cb85fedb99f3509e6826171cfb80bb7f...
925,5f614957d59f315e1fc77bd2,Muriqi,delantero,15,504e581e4d8bec9a670000d2,7.0,6.25,6.6,41949009,15,6,16,2,9.0,3.0,8de954d4a3c5bba471d92479139e6ebef8321cfb7c9156...
1444,5f614957d59f315e1fc77bd2,Muriqi,delantero,16,504e581e4d8bec9a670000d2,7.0,6.25,6.6,41949009,15,6,3,16,9.5,12.0,4ae47bbbdb62c40b0a30a18251287804a099959a4bd8b6...
1963,5f614957d59f315e1fc77bd2,Muriqi,delantero,17,504e581e4d8bec9a670000d2,7.0,6.25,6.6,41949009,15,6,12,3,7.5,,10c762abdc33764d2b8ca3538995178bcfba9a1372504a...


In [15]:
ids_to_remove = ['51ffb6b7113981890700003a', '5211d81592d57d145a0000ce', '520e4ee4a776cc826b00004b']
df_rolling = df_rolling[~df_rolling['team_id'].isin(ids_to_remove)]

In [16]:
team_mapping = {
    '504e581e4d8bec9a670000cf': 'Levante',
    '520347e4b8d07d930b00000f': 'Girona',
    '504e581e4d8bec9a670000d9': 'Celta',
    '504e581e4d8bec9a670000c7': 'Barcelona',
    '51ffb00e78b20d7f0700003f': 'Oviedo',
    '504e581e4d8bec9a670000c9': 'Ath Bilbao',
    '504e581e4d8bec9a670000cb': 'Valencia',
    '504e581e4d8bec9a670000ca': 'Vallecano',
    '504e581e4d8bec9a670000c6': 'Real Madrid',
    '504e581e4d8bec9a670000c8': 'Ath Madrid',
    '51b890f5b986415a2c000012': 'Villarreal',
    '504e581e4d8bec9a670000d1': 'Osasuna',
    '504e581e4d8bec9a670000cd': 'Getafe',
    '51b889b1e401a15f2c0000f0': 'Elche',
    '504e581e4d8bec9a670000d0': 'Espanol',
    '504e581e4d8bec9a670000d5': 'Sevilla',
    '504e581e4d8bec9a670000ce': 'Sociedad',
    '504e581e4d8bec9a670000cc': 'Betis',
    '52038563b8d07d930b00008a': 'Alaves',
    '504e581e4d8bec9a670000d2': 'Mallorca'
}

df_rolling['team'] = df_rolling['team_id'].map(team_mapping)

In [17]:
df_rolling.shape

(2064, 17)

### Upload it to SQL

In [18]:
username = 'rodrigo'
host = 'localhost'           
port = '5432'               
database = 'futmondo_full_players_info'

In [19]:
engine = create_engine(f'postgresql+psycopg2://{username}@{host}:{port}/{database}')

In [20]:
df_rolling.to_sql('player_points', engine, if_exists='replace', index=False)

64