In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('jobfair_train.csv')
df = df.drop(['club_id','cohort_season','season','dynamic_payment_segment', 'registration_country', 'registration_platform_specific'], axis=1)

In [None]:
df.describe()

In [None]:
columns_to_rank = ['training_count_last_28_days', 'avg_stars_top_11_players', 'global_competition_level', 'session_count_last_28_days', 'tokens_spent_last_28_days']
rank_suffixes = ['training', 'stars', 'global_level', 'session', 'token']

df['global_competition_level'] = df['global_competition_level'].fillna(0)

for col, suffix in zip(columns_to_rank, rank_suffixes):
    rank_col_name = f'rank_{suffix}'
    df[rank_col_name] = df.groupby('league_id')[col].rank(ascending=False, method='min').astype(int)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

correlation_matrix = df.corr()

plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Matrica Korelacije')
plt.show()

In [None]:
df = df.drop(['morale_boosters_stash', 'rests_stash', 'tokens_stash', 'tokens_spent_last_28_days'], axis=1)
df = df.drop(['avg_training_factor_top_11_players'], axis = 1)
df = df.drop(['avg_age_top_11_players'], axis = 1)
df = df.drop(['league_match_watched_count_last_28_days'], axis = 1)
df = df.drop(['global_competition_level'], axis = 1)
df = df.drop(['avg_stars_top_14_players',  'days_active_last_28_days', 'session_count_last_28_days', 'playtime_last_28_days', 'league_match_won_count_last_28_days', 'training_count_last_28_days'], axis = 1)
df = df.drop(['avg_stars_top_11_players'], axis = 1)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import RobustScaler

x = df.drop(['league_rank', 'league_id'], axis = 1)
y = df['league_rank']

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = RobustScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


model = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    max_iter=500,
    learning_rate_init=0.001,
    random_state=42
)
model.fit(X_train, y_train)


predictions = np.round(model.predict(X_test))

In [None]:
mean_value = np.mean(predictions)
std_dev = np.std(predictions)
min_value = np.min(predictions)
max_value = np.max(predictions)


print(f"Mean: {mean_value}")
print(f"Standard Deviation: {std_dev}")
print(f"Minimum: {min_value}")
print(f"Maximum: {max_value}")

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score
mae = mean_absolute_error(y_test, predictions)
print(f"Mean Absolute Error (MAE): {mae}")
r2 = r2_score(y_test, predictions)
print(f"R-squared: {r2}")

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler

test_df = pd.read_csv('jobfair_test.csv')

columns_to_rank = ['training_count_last_28_days', 'avg_stars_top_11_players', 'global_competition_level', 'session_count_last_28_days', 'tokens_spent_last_28_days']
rank_suffixes = ['training', 'stars', 'global_level', 'session', 'token']

test_df['global_competition_level'] = test_df['global_competition_level'].fillna(0)

for col, suffix in zip(columns_to_rank, rank_suffixes):
    rank_col_name = f'rank_{suffix}'
    test_df[rank_col_name] = test_df.groupby('league_id')[col].rank(ascending=False, method='min').astype(int)

x_test = test_df[['rank_training', 'rank_stars', 'rank_global_level', 'rank_session', 'rank_token']]
x_test = scaler.transform(x_test)

y_predictions = np.round(model.predict(x_test))


In [None]:
test_df['league_rank'] = y_predictions
result_df = test_df[['club_id', 'league_rank']]
result_df.to_csv('test_predictions.csv', index=False)