In [1]:
import pandas as pd
min_games = 10

In [2]:
player_stats_df = pd.read_csv("player_stats.csv", index_col=0)

In [3]:
players_df = player_stats_df[["player_id", "player_name", "team", "age", "country",
                             "avg_rating", "height_cm","position", "weight_kg"]].copy()
players_df.drop_duplicates(inplace=True)

player_stats_df = player_stats_df.loc[player_stats_df["position"] != "GK"]
player_stats_df = player_stats_df.loc[player_stats_df["position"] != "Goalkeeper"]

player_stats_df["games_played"] = player_stats_df["first_eleven"]+player_stats_df["substituted_on"]

player_stats_df.drop(["team", "age", "avg_rating", "country", "first_eleven", "height_cm", "minutes_played", "player_name",
                      "position", "substituted_on", "weight_kg"], axis=1, inplace=True)

counts_df = player_stats_df.groupby(["player_id"]).size().reset_index(name="counts")

player_stats_df = player_stats_df.merge(counts_df, on="player_id")

transferred_df = player_stats_df.loc[player_stats_df["counts"] > 1]

player_stats_df = player_stats_df.loc[player_stats_df["counts"] < 2]
player_stats_df.drop(["counts"], axis=1, inplace=True)

In [4]:
transferred_df["accurate_passes"] = transferred_df["pass_accuracy"]*transferred_df["passes"]*transferred_df["games_played"]/100
transferred_df.drop(["pass_accuracy", "counts"], axis=1, inplace=True)

averages_df = transferred_df[["aerials_won", "bad_controls", "blocks", "clearances", "crosses",
               "dispossessed", "dribbled_past", "dribbles", "fouled", "fouls",
               "interceptions", "key_passes", "long_balls", "offsides", "offsides_won",
               "passes", "shots", "tackles", "through_balls"]].copy()

transferred_df.drop(["aerials_won", "bad_controls", "blocks", "clearances", "crosses",
               "dispossessed", "dribbled_past", "dribbles", "fouled", "fouls",
               "interceptions", "key_passes", "long_balls", "offsides", "offsides_won",
               "passes", "shots", "tackles", "through_balls"], axis=1, inplace=True)

averages_df = averages_df.mul(transferred_df["games_played"], axis=0)
averages_df["player_id"] = transferred_df["player_id"]

averages_df = averages_df.groupby(["player_id"], as_index=False).sum()
player_ids = averages_df["player_id"]
averages_df.drop(["player_id"], axis=1, inplace=True)

transferred_df = transferred_df.groupby(["player_id"], as_index=False).sum()

averages_df = averages_df.div(transferred_df["games_played"], axis=0)
averages_df["player_id"] = player_ids

transferred_df = transferred_df.merge(averages_df, on="player_id")

transferred_df["pass_accuracy"] = 100*transferred_df["accurate_passes"]/(transferred_df["passes"]*transferred_df["games_played"])
transferred_df.drop(["accurate_passes"], axis=1, inplace=True)

In [5]:
player_stats_df = player_stats_df.append(transferred_df)

player_stats_df["assists"] = player_stats_df["assists"]/player_stats_df["games_played"]
player_stats_df["goals"] = player_stats_df["goals"]/player_stats_df["games_played"]
player_stats_df["man_of_the_match"] = player_stats_df["man_of_the_match"]/player_stats_df["games_played"]
player_stats_df["own_goals"] = player_stats_df["own_goals"]/player_stats_df["games_played"]
player_stats_df["red_cards"] = player_stats_df["red_cards"]/player_stats_df["games_played"]
player_stats_df["yellow_cards"] = player_stats_df["yellow_cards"]/player_stats_df["games_played"]

player_stats_df = player_stats_df.loc[player_stats_df["games_played"] >= min_games]

player_stats_df.drop(["games_played"], axis=1, inplace=True)

player_stats_df.reset_index(drop=True, inplace=True)

In [6]:
for column in player_stats_df:
    if column != "player_id":
        player_stats_df[column] = player_stats_df[column] / player_stats_df[column].max()

In [7]:
players_df.to_csv("players.csv", encoding="utf-8")
player_stats_df.to_csv("stats.csv", encoding="utf-8")