In [21]:
import pandas as pd
import numpy as np
import os
import pickle
from sklearn.ensemble import RandomForestClassifier

In [22]:
# ===============================================================
# (1) Load Data
# ===============================================================
players = pd.read_csv("data/players.csv")
players_teams = pd.read_csv("data/players_teams.csv")
awards = pd.read_csv("data/awards_players.csv")
teams = pd.read_csv("data/teams.csv")
teams_post = pd.read_csv("data/teams_post.csv")

# Create output directory
os.makedirs("c", exist_ok=True)
MODEL_DIR = "c/"

print(f"Players: {players.shape}")
print(f"Players_teams: {players_teams.shape}")
print(f"Awards: {awards.shape}")
print(f"Teams: {teams.shape}")
print(f"Teams_post: {teams_post.shape}")

Players: (893, 10)
Players_teams: (1876, 43)
Awards: (95, 4)
Teams: (142, 61)
Teams_post: (80, 5)


In [None]:
# ===============================================================
# (2) Define main awards
# ===============================================================
main_awards = [
    'Most Valuable Player',
    'Defensive Player of the Year',
    'Most Improved Player',
    'WNBA Finals Most Valuable Player'
]
awards_main = awards[awards['award'].isin(main_awards)]

max_year = players_teams['year'].max()
print(f"Data spans years 1-{max_year}")
print(f"\nAwards distribution:")
for award in main_awards:
    count = len(awards_main[awards_main['award'] == award])
    print(f"  {award}: {count}")

Data spans years 1-10

Awards distribution:
  Most Valuable Player: 10
  Defensive Player of the Year: 10
  Rookie of the Year: 10
  Most Improved Player: 9
  WNBA Finals Most Valuable Player: 10


In [24]:
# ===============================================================
# (3) Calculate player performance stats by year
# ===============================================================
player_stats = players_teams.copy()
player_stats = player_stats[player_stats['GP'] >= 10]

# Per-game statistics
player_stats['ppg'] = player_stats['points'] / player_stats['GP']
player_stats['rpg'] = player_stats['rebounds'] / player_stats['GP']
player_stats['apg'] = player_stats['assists'] / player_stats['GP']
player_stats['spg'] = player_stats['steals'] / player_stats['GP']
player_stats['bpg'] = player_stats['blocks'] / player_stats['GP']
player_stats['mpg'] = player_stats['minutes'] / player_stats['GP']
player_stats['topg'] = player_stats['turnovers'] / player_stats['GP']
player_stats['fg_pct'] = np.where(player_stats['fgAttempted'] > 0, 
                                   player_stats['fgMade'] / player_stats['fgAttempted'], 0)

# Efficiency rating
player_stats['efficiency'] = (player_stats['points'] + player_stats['rebounds'] + 
                               player_stats['assists'] + player_stats['steals'] + 
                               player_stats['blocks'] - player_stats['turnovers']) / player_stats['GP']

# Player rating (like testar_a)
player_stats['player_rating'] = (
    player_stats['points'] +
    player_stats['rebounds'] * 1.2 +
    player_stats['assists'] * 1.5 +
    player_stats['steals'] * 3 +
    player_stats['blocks'] * 3 -
    player_stats['turnovers'] * 2 -
    player_stats['PF'] * 0.5
) / (player_stats['minutes'] / 36 + 1)

player_stats['gs_pct'] = player_stats['GS'] / player_stats['GP']

print(f"Player stats calculated: {player_stats.shape}")

Player stats calculated: (1603, 54)


In [25]:
# ===============================================================
# (4) Calculate league averages by year
# ===============================================================
yearly_avg = player_stats.groupby('year').agg({
    'ppg': 'mean', 'rpg': 'mean', 'apg': 'mean', 'spg': 'mean', 'bpg': 'mean',
    'efficiency': 'mean', 'mpg': 'mean', 'player_rating': 'mean'
}).reset_index()
yearly_avg.columns = ['year', 'league_ppg', 'league_rpg', 'league_apg', 'league_spg', 
                      'league_bpg', 'league_efficiency', 'league_mpg', 'league_player_rating']

# Elite cutoff (top 10%)
elite_cutoffs = player_stats.groupby('year')['player_rating'].quantile(0.90).reset_index()
elite_cutoffs.columns = ['year', 'elite_cutoff']

player_stats = player_stats.merge(yearly_avg, on='year', how='left')
player_stats = player_stats.merge(elite_cutoffs, on='year', how='left')
player_stats['is_elite'] = (player_stats['player_rating'] >= player_stats['elite_cutoff']).astype(int)

# Save for test time
yearly_avg.to_csv("c/league_averages.csv", index=False)
elite_cutoffs.to_csv("c/elite_cutoffs.csv", index=False)
print("League averages saved")

League averages saved


In [26]:
# ===============================================================
# (5) Add team success context
# ===============================================================
teams_success = teams[['year', 'tmID', 'won', 'lost', 'confID', 'playoff', 'o_pts', 'd_pts', 'GP']].copy()
teams_success['win_pct'] = teams_success['won'] / (teams_success['won'] + teams_success['lost'])
teams_success['conf_rank'] = teams_success.groupby(['year', 'confID'])['win_pct'].rank(ascending=False)
teams_success['made_playoffs'] = teams_success['playoff'].notna().astype(int)
teams_success['margin_per_game'] = (teams_success['o_pts'] - teams_success['d_pts']) / teams_success['GP']

# Championship indicator
playoff_wins = teams_post.groupby(['year', 'tmID'])['W'].sum().reset_index()
champion_idx = playoff_wins.groupby('year')['W'].idxmax()
champions = playoff_wins.loc[champion_idx][['year', 'tmID']].copy()
champions['won_championship'] = 1
teams_success = teams_success.merge(champions, on=['year', 'tmID'], how='left')
teams_success['won_championship'] = teams_success['won_championship'].fillna(0).astype(int)

# Playoff wins
playoff_stats = teams_post.groupby(['year', 'tmID']).agg({'W': 'sum'}).reset_index()
playoff_stats.columns = ['year', 'tmID', 'playoff_wins']
teams_success = teams_success.merge(playoff_stats, on=['year', 'tmID'], how='left')
teams_success['playoff_wins'] = teams_success['playoff_wins'].fillna(0).astype(int)

print(f"Teams success calculated: {teams_success.shape}")

Teams success calculated: (142, 15)


In [27]:
# ===============================================================
# (6) Build Training Dataset
# ===============================================================

# predict awards in year Y using features from year Y-1
training_records = []
years = sorted(player_stats['year'].unique())
print(f"Building training data for years {years[1]}-{years[-1]}")

for target_year in years[1:]:  # start from year 2
    prev_year = target_year - 1
    
    prev_year_stats = player_stats[player_stats['year'] == prev_year].copy()
    prev_year_teams = teams_success[teams_success['year'] == prev_year].copy()
    target_year_awards = awards_main[awards_main['year'] == target_year].copy()
    
    for _, player_row in prev_year_stats.iterrows():
        player_id = player_row['playerID']
        tm_id = player_row['tmID']
        
        team_row = prev_year_teams[prev_year_teams['tmID'] == tm_id]
        
        record = {
            'playerID': player_id,
            'tmID': tm_id,
            'target_year': target_year,
            'prev_year': prev_year,
            
            # T-1 player stats
            'ppg_prev': player_row['ppg'],
            'rpg_prev': player_row['rpg'],
            'apg_prev': player_row['apg'],
            'spg_prev': player_row['spg'],
            'bpg_prev': player_row['bpg'],
            'mpg_prev': player_row['mpg'],
            'fg_pct_prev': player_row['fg_pct'],
            'efficiency_prev': player_row['efficiency'],
            'player_rating_prev': player_row['player_rating'],
            'is_elite_prev': player_row['is_elite'],
            'GP_prev': player_row['GP'],
            'GS_prev': player_row['GS'],
            'gs_pct_prev': player_row['gs_pct'],
            'minutes_prev': player_row['minutes'],
            
            # Comparison to league
            'ppg_vs_league': player_row['ppg'] - player_row['league_ppg'],
            'rpg_vs_league': player_row['rpg'] - player_row['league_rpg'],
            'efficiency_vs_league': player_row['efficiency'] - player_row['league_efficiency'],
            'rating_vs_league': player_row['player_rating'] - player_row['league_player_rating'],
        }
        
        # Team context
        if len(team_row) > 0:
            team_row = team_row.iloc[0]
            record['win_pct_prev'] = team_row['win_pct']
            record['conf_rank_prev'] = team_row['conf_rank']
            record['made_playoffs_prev'] = team_row['made_playoffs']
            record['margin_prev'] = team_row['margin_per_game']
            record['won_championship_prev'] = team_row['won_championship']
            record['playoff_wins_prev'] = team_row['playoff_wins']
        else:
            record['win_pct_prev'] = 0.5
            record['conf_rank_prev'] = 6
            record['made_playoffs_prev'] = 0
            record['margin_prev'] = 0
            record['won_championship_prev'] = 0
            record['playoff_wins_prev'] = 0
        
        # Award targets
        for award in main_awards:
            col_name = f"won_{award.replace(' ', '_').lower()}"
            won = len(target_year_awards[
                (target_year_awards['playerID'] == player_id) & 
                (target_year_awards['award'] == award)
            ]) > 0
            record[col_name] = int(won)
        
        training_records.append(record)

train_df = pd.DataFrame(training_records)
print(f"\nTraining dataset shape: {train_df.shape}")

Building training data for years 2-10

Training dataset shape: (1458, 33)

Training dataset shape: (1458, 33)


In [28]:
# ===============================================================
# (7) Add career features
# ===============================================================
player_first_year = player_stats.groupby('playerID')['year'].min().reset_index()
player_first_year.columns = ['playerID', 'first_year']
train_df = train_df.merge(player_first_year, on='playerID', how='left')
train_df['years_experience'] = train_df['prev_year'] - train_df['first_year']

# Career awards
for award in main_awards:
    award_col = f"career_{award.replace(' ', '_').lower()}"
    train_df[award_col] = 0
train_df['career_total_awards'] = 0

for idx, row in train_df.iterrows():
    player_id = row['playerID']
    target_year = row['target_year']
    
    past_awards = awards_main[
        (awards_main['playerID'] == player_id) & 
        (awards_main['year'] < target_year)
    ]
    
    total = 0
    for award in main_awards:
        award_col = f"career_{award.replace(' ', '_').lower()}"
        count = len(past_awards[past_awards['award'] == award])
        train_df.at[idx, award_col] = count
        total += count
    train_df.at[idx, 'career_total_awards'] = total

print(f"Career features added")
print(f"\nAward counts in training:")
for award in main_awards:
    col = f"won_{award.replace(' ', '_').lower()}"
    print(f"  {award}: {train_df[col].sum()}")

Career features added

Award counts in training:
  Most Valuable Player: 7
  Defensive Player of the Year: 7
  Rookie of the Year: 0
  Most Improved Player: 8
  WNBA Finals Most Valuable Player: 9


In [29]:
# ===============================================================
# (8) Add improvement features (T-1 vs T-2)
# ===============================================================
player_stats_sorted = player_stats.sort_values(['playerID', 'year'])
player_stats_sorted['ppg_prev2'] = player_stats_sorted.groupby('playerID')['ppg'].shift(1)
player_stats_sorted['efficiency_prev2'] = player_stats_sorted.groupby('playerID')['efficiency'].shift(1)
player_stats_sorted['player_rating_prev2'] = player_stats_sorted.groupby('playerID')['player_rating'].shift(1)

prev2_cols = ['playerID', 'year', 'ppg_prev2', 'efficiency_prev2', 'player_rating_prev2']
prev2_data = player_stats_sorted[prev2_cols].copy()
prev2_data = prev2_data.rename(columns={'year': 'prev_year'})

train_df = train_df.merge(prev2_data, on=['playerID', 'prev_year'], how='left')

# Calculate improvement
train_df['ppg_improvement'] = train_df['ppg_prev'] - train_df['ppg_prev2'].fillna(train_df['ppg_prev'])
train_df['efficiency_improvement'] = train_df['efficiency_prev'] - train_df['efficiency_prev2'].fillna(train_df['efficiency_prev'])
train_df['rating_improvement'] = train_df['player_rating_prev'] - train_df['player_rating_prev2'].fillna(train_df['player_rating_prev'])

print(f"Improvement features added")

Improvement features added


In [30]:
# ===============================================================
# (9) Add playoff performance features
# ===============================================================
playoff_player_cols = ['playerID', 'year', 'tmID', 'PostGP', 'PostGS', 'PostMinutes', 
                       'PostPoints', 'PostRebounds', 'PostAssists', 'PostSteals', 'PostBlocks']

existing_cols = [c for c in playoff_player_cols if c in players_teams.columns]
playoff_data = players_teams[existing_cols].copy()
playoff_data = playoff_data.rename(columns={'year': 'prev_year'})

rename_dict = {c: f"{c}_prev" for c in existing_cols if c not in ['playerID', 'prev_year', 'tmID']}
playoff_data = playoff_data.rename(columns=rename_dict)

train_df = train_df.merge(playoff_data, on=['playerID', 'prev_year', 'tmID'], how='left')

for col in train_df.columns:
    if 'Post' in col:
        train_df[col] = train_df[col].fillna(0)

print(f"Playoff features added")
print(f"Final training shape: {train_df.shape}")

Playoff features added
Final training shape: (1502, 55)


In [31]:
# ===============================================================
# (10) Save training data
# ===============================================================
train_df.to_csv("c/award_train_data.csv", index=False)
print(f"\n‚úÖ Training data saved: c/award_train_data.csv")


‚úÖ Training data saved: c/award_train_data.csv


In [32]:
# ===============================================================
# (11) Define feature sets
# ===============================================================

# MVP Features
features_mvp = [
    'ppg_prev', 'rpg_prev', 'apg_prev', 'spg_prev', 'bpg_prev', 'mpg_prev',
    'fg_pct_prev', 'efficiency_prev', 'player_rating_prev', 'gs_pct_prev',
    'GP_prev', 'minutes_prev',
    'ppg_vs_league', 'rpg_vs_league', 'efficiency_vs_league', 'rating_vs_league',
    'is_elite_prev',
    'win_pct_prev', 'conf_rank_prev', 'made_playoffs_prev', 'margin_prev',
    'years_experience',
    'career_total_awards', 'career_most_valuable_player'
]

# DPOY Features
features_dpoy = [
    'spg_prev', 'bpg_prev', 'rpg_prev', 'mpg_prev',
    'efficiency_prev', 'player_rating_prev', 'gs_pct_prev',
    'GP_prev', 'minutes_prev',
    'win_pct_prev', 'conf_rank_prev', 'made_playoffs_prev',
    'years_experience',
    'career_total_awards', 'career_defensive_player_of_the_year'
]

# MIP Features
features_mip = [
    'ppg_prev', 'efficiency_prev', 'player_rating_prev',
    'GP_prev', 'gs_pct_prev',
    'ppg_improvement', 'efficiency_improvement', 'rating_improvement',
    'ppg_vs_league', 'efficiency_vs_league',
    'win_pct_prev',
    'years_experience',
    'career_total_awards'
]

# FMVP Features
features_fmvp = [
    'PostPoints_prev', 'PostRebounds_prev', 'PostAssists_prev', 'PostSteals_prev', 'PostBlocks_prev',
    'PostGP_prev', 'PostGS_prev', 'PostMinutes_prev',
    'player_rating_prev', 'efficiency_prev',
    'ppg_prev', 'rpg_prev', 'apg_prev',
    'won_championship_prev', 'playoff_wins_prev',
    'years_experience',
    'career_total_awards', 'career_wnba_finals_most_valuable_player'
]

# Verify features exist
for name, features in [('MVP', features_mvp), ('DPOY', features_dpoy), ('MIP', features_mip), ('FMVP', features_fmvp)]:
    missing = [f for f in features if f not in train_df.columns]
    if missing:
        print(f"‚ö†Ô∏è {name} missing: {missing}")
    else:
        print(f"‚úÖ {name}: {len(features)} features OK")

‚úÖ MVP: 24 features OK
‚úÖ DPOY: 15 features OK
‚úÖ MIP: 13 features OK
‚úÖ FMVP: 18 features OK


In [33]:
# ===============================================================
# (12) Train MVP Model
# ===============================================================
print("\n" + "="*70)
print("TRAINING MVP MODEL")
print("="*70)

target_mvp = 'won_most_valuable_player'
train_mvp = train_df.dropna(subset=features_mvp)

X_mvp = train_mvp[features_mvp]
y_mvp = train_mvp[target_mvp]

print(f"Training samples: {len(X_mvp)}")
print(f"MVP winners: {y_mvp.sum()}")

model_mvp = RandomForestClassifier(
    n_estimators=200, max_depth=10, min_samples_split=5,
    class_weight='balanced', random_state=42, n_jobs=-1
)
model_mvp.fit(X_mvp, y_mvp)

with open(f"{MODEL_DIR}model_mvp.pkl", "wb") as f:
    pickle.dump(model_mvp, f)
print(f"‚úÖ MVP model saved")


TRAINING MVP MODEL
Training samples: 1502
MVP winners: 7
‚úÖ MVP model saved
‚úÖ MVP model saved


In [34]:
# ===============================================================
# (13) Train DPOY Model
# ===============================================================
print("\n" + "="*70)
print("TRAINING DPOY MODEL")
print("="*70)

target_dpoy = 'won_defensive_player_of_the_year'
train_dpoy = train_df.dropna(subset=features_dpoy)

X_dpoy = train_dpoy[features_dpoy]
y_dpoy = train_dpoy[target_dpoy]

print(f"Training samples: {len(X_dpoy)}")
print(f"DPOY winners: {y_dpoy.sum()}")

model_dpoy = RandomForestClassifier(
    n_estimators=200, max_depth=10,
    class_weight='balanced', random_state=42, n_jobs=-1
)
model_dpoy.fit(X_dpoy, y_dpoy)

with open(f"{MODEL_DIR}model_dpoy.pkl", "wb") as f:
    pickle.dump(model_dpoy, f)
print(f"‚úÖ DPOY model saved")


TRAINING DPOY MODEL
Training samples: 1502
DPOY winners: 7
‚úÖ DPOY model saved
‚úÖ DPOY model saved


In [35]:
# ===============================================================
# (14) Train MIP Model
# ===============================================================
print("\n" + "="*70)
print("TRAINING MIP MODEL")
print("="*70)

target_mip = 'won_most_improved_player'
train_mip = train_df[
    (train_df['years_experience'] >= 0)
].copy()
train_mip = train_mip.dropna(subset=features_mip)

X_mip = train_mip[features_mip]
y_mip = train_mip[target_mip]

print(f"Training samples: {len(X_mip)}")
print(f"MIP winners: {y_mip.sum()}")

model_mip = RandomForestClassifier(
    n_estimators=300, max_depth=8, min_samples_leaf=2,
    class_weight='balanced', random_state=42, n_jobs=-1
)
model_mip.fit(X_mip, y_mip)

with open(f"{MODEL_DIR}model_mip.pkl", "wb") as f:
    pickle.dump(model_mip, f)
print(f"‚úÖ MIP model saved")


TRAINING MIP MODEL
Training samples: 1502
MIP winners: 8
‚úÖ MIP model saved
‚úÖ MIP model saved


In [36]:
# ===============================================================
# (15) Train FMVP Model
# ===============================================================
print("\n" + "="*70)
print("TRAINING FMVP MODEL")
print("="*70)

target_fmvp = 'won_wnba_finals_most_valuable_player'
train_fmvp = train_df[
    (train_df['made_playoffs_prev'] == 1) &
    (train_df['PostGP_prev'] > 0)
].copy()

available_fmvp = [f for f in features_fmvp if f in train_fmvp.columns]
train_fmvp = train_fmvp.dropna(subset=available_fmvp)

X_fmvp = train_fmvp[available_fmvp].fillna(0)
y_fmvp = train_fmvp[target_fmvp]

print(f"Training samples: {len(X_fmvp)}")
print(f"FMVP winners: {y_fmvp.sum()}")

if y_fmvp.sum() > 0:
    model_fmvp = RandomForestClassifier(
        n_estimators=200, max_depth=8,
        class_weight='balanced', random_state=42, n_jobs=-1
    )
    model_fmvp.fit(X_fmvp, y_fmvp)
    with open(f"{MODEL_DIR}model_fmvp.pkl", "wb") as f:
        pickle.dump(model_fmvp, f)
    print(f"‚úÖ FMVP model saved")
    features_fmvp = available_fmvp
else:
    print("‚ö†Ô∏è No FMVP winners - skipping")


TRAINING FMVP MODEL
Training samples: 716
FMVP winners: 6
‚úÖ FMVP model saved
‚úÖ FMVP model saved


In [37]:
# ===============================================================
# (16) Save feature sets
# ===============================================================
feature_sets = {
    'mvp': features_mvp,
    'dpoy': features_dpoy,
    'mip': features_mip,
    'fmvp': features_fmvp,
    'main_awards': main_awards
}

with open(f"{MODEL_DIR}feature_sets.pkl", "wb") as f:
    pickle.dump(feature_sets, f)

print("\n" + "="*70)
print("‚úÖ ALL MODELS SAVED")
print("="*70)
for f in sorted(os.listdir(MODEL_DIR)):
    print(f"  - {f}")


‚úÖ ALL MODELS SAVED
  - award_train_data.csv
  - elite_cutoffs.csv
  - feature_sets.pkl
  - league_averages.csv
  - model_dpoy.pkl
  - model_fmvp.pkl
  - model_mip.pkl
  - model_mvp.pkl


In [38]:
# ===============================================================
# (17) Cross-validation
# ===============================================================
print("\n" + "="*70)
print("CROSS-VALIDATION (Leave-One-Year-Out)")
print("="*70)

years = sorted(train_df['target_year'].unique())
results = []

for test_year in years[1:]:
    train_mask = train_df['target_year'] < test_year
    test_mask = train_df['target_year'] == test_year
    
    train_cv = train_df[train_mask].dropna(subset=features_mvp)
    test_cv = train_df[test_mask].dropna(subset=features_mvp)
    
    if len(train_cv) == 0 or len(test_cv) == 0:
        continue
    
    X_tr, y_tr = train_cv[features_mvp], train_cv[target_mvp]
    X_te, y_te = test_cv[features_mvp], test_cv[target_mvp]
    
    model_cv = RandomForestClassifier(
        n_estimators=200, max_depth=10, min_samples_split=5,
        class_weight='balanced', random_state=42, n_jobs=-1
    )
    model_cv.fit(X_tr, y_tr)
    
    proba = model_cv.predict_proba(X_te)[:, 1]
    test_cv = test_cv.copy()
    test_cv['mvp_proba'] = proba
    test_cv['mvp_rank'] = test_cv['mvp_proba'].rank(ascending=False, method='first')
    
    actual_winner = test_cv[test_cv[target_mvp] == 1]
    if len(actual_winner) > 0:
        winner_rank = actual_winner['mvp_rank'].values[0]
        results.append({'year': test_year, 'winner_rank': winner_rank})

if results:
    results_df = pd.DataFrame(results)
    print("\nMVP Model Results:")
    print(results_df.to_string(index=False))
    print(f"\nAverage Winner Rank: {results_df['winner_rank'].mean():.1f}")
    print(f"Top-3 Accuracy: {(results_df['winner_rank'] <= 3).mean()*100:.1f}%")


CROSS-VALIDATION (Leave-One-Year-Out)

MVP Model Results:
 year  winner_rank
    4          4.0
    5          1.0
    6          7.0
    7          1.0
    8          3.0
   10          7.0

Average Winner Rank: 3.8
Top-3 Accuracy: 50.0%

MVP Model Results:
 year  winner_rank
    4          4.0
    5          1.0
    6          7.0
    7          1.0
    8          3.0
   10          7.0

Average Winner Rank: 3.8
Top-3 Accuracy: 50.0%


In [None]:
# ===============================================================
# (18) Summary
# ===============================================================
print("\n" + "="*70)
print("TRAINING COMPLETE")
print("="*70)

print(f"\nüìä Approach:")
print(f"   Predict awards in year Y using stats from year Y-1")
print(f"   All features are T-1 (previous year) based")
print(f"   Predictions can be made BEFORE season starts")

print(f"\nü§ñ Models:")
print(f"   - MVP: {len(features_mvp)} features")
print(f"   - DPOY: {len(features_dpoy)} features")
print(f"   - MIP: {len(features_mip)} features")
print(f"   - FMVP: {len(features_fmvp)} features")

print(f"\n‚úÖ Ready for testar_c.ipynb")


TRAINING COMPLETE

üìä Approach:
   Predict awards in year Y using stats from year Y-1
   All features are T-1 (previous year) based
   Predictions can be made BEFORE season starts

ü§ñ Models:
   - MVP: 24 features
   - DPOY: 15 features
   - MIP: 13 features
   - FMVP: 18 features

‚ö†Ô∏è Note on ROY:
   ROY requires special handling - rookies have no prior NBA stats
   For test, identify players not in historical data

‚úÖ Ready for testar_c.ipynb
