## Importando bibliotecas e ambiente

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window

In [0]:
spark = SparkSession.builder.appName("AtividadePraticaSpark").getOrCreate()

## Teoria

Espaço para rodar a teoria passada na atividade.

In [0]:
pkmn = spark.table("workspace.data.pokemon_data")

display(pkmn.limit(5))

#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [0]:
len_pkmn = pkmn.count()
pkmn_clean = pkmn.dropDuplicates()
len_db_pkmn_cleaned = pkmn_clean.count()

removed = len_pkmn - len_db_pkmn_cleaned

print(f"Total de Registros: {len_pkmn}")
print(f"Linhas Duplicadas:   {removed}")

Total de Registros: 800
Linhas Duplicadas:   0


In [0]:
pkmn_clean = pkmn_clean.withColumn("Sum_Attack_Speed", F.col("Attack") + F.col("Speed"))
display(pkmn_clean.limit(10))

#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Sum_Attack_Speed
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False,94
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False,122
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False,162
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False,180
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False,117
5,Charmeleon,Fire,,405,58,64,58,80,65,80,1,False,144
6,Charizard,Fire,Flying,534,78,84,78,109,85,100,1,False,184
6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False,230
6,CharizardMega Charizard Y,Fire,Flying,634,78,104,78,159,115,100,1,False,204
7,Squirtle,Water,,314,44,48,65,50,64,43,1,False,91


In [0]:
pkmn_clean.filter(pkmn_clean["Speed"] > 100).select("Type 1").distinct().display()

Type 1
Grass
Fire
Water
Bug
Normal
Poison
Electric
Ground
Fighting
Psychic


In [0]:
pkmn_clean = pkmn_clean.withColumn(
    "Type 1",
    F.when(pkmn["Speed"] > 100, "Fire").otherwise(pkmn["Type 1"])
)
pkmn_clean.filter(pkmn["Speed"] > 100).select("Type 1").distinct().display()

In [0]:
pkmn = spark.table("workspace.data.pokemon_data")

pkmn = (
    pkmn
    .withColumnRenamed("Type 1", "Type_1")
    .withColumnRenamed("Type 2", "Type_2")
    .withColumnRenamed("Sp. Atk", "Sp_Atk")
    .withColumnRenamed("Sp. Def", "Sp_Def")
)

## Exercícios

### Exercício 1

#### Exercício 1.1

In [0]:
fut_players_db = spark.table("workspace.data.fut_players_data")

fut_players_db.printSchema()
display(fut_players_db.limit(5))

root
 |-- player_id: long (nullable = true)
 |-- player_name: string (nullable = true)
 |-- player_extended_name: string (nullable = true)
 |-- quality: string (nullable = true)
 |-- revision: string (nullable = true)
 |-- origin: string (nullable = true)
 |-- overall: long (nullable = true)
 |-- club: string (nullable = true)
 |-- league: string (nullable = true)
 |-- nationality: string (nullable = true)
 |-- position: string (nullable = true)
 |-- age: long (nullable = true)
 |-- date_of_birth: date (nullable = true)
 |-- height: long (nullable = true)
 |-- weight: long (nullable = true)
 |-- intl_rep: long (nullable = true)
 |-- added_date: date (nullable = true)
 |-- pace: double (nullable = true)
 |-- pace_acceleration: long (nullable = true)
 |-- pace_sprint_speed: long (nullable = true)
 |-- dribbling: double (nullable = true)
 |-- drib_agility: long (nullable = true)
 |-- drib_balance: long (nullable = true)
 |-- drib_reactions: long (nullable = true)
 |-- drib_ball_control: l

player_id,player_name,player_extended_name,quality,revision,origin,overall,club,league,nationality,position,age,date_of_birth,height,weight,intl_rep,added_date,pace,pace_acceleration,pace_sprint_speed,dribbling,drib_agility,drib_balance,drib_reactions,drib_ball_control,drib_dribbling,drib_composure,shooting,shoot_positioning,shoot_finishing,shoot_shot_power,shoot_long_shots,shoot_volleys,shoot_penalties,passing,pass_vision,pass_crossing,pass_free_kick,pass_short,pass_long,pass_curve,defending,def_interceptions,def_heading,def_marking,def_stand_tackle,def_slid_tackle,physicality,phys_jumping,phys_stamina,phys_strength,phys_aggression,gk_diving,gk_reflexes,gk_handling,gk_speed,gk_kicking,gk_positoning,pref_foot,att_workrate,def_workrate,weak_foot,skill_moves,cb,rb,lb,rwb,lwb,cdm,cm,rm,lm,cam,cf,rf,lf,rw,lw,st,traits,specialities,base_id
1,Pelé,Arantes Nascimento Edson,Gold - Rare,Icon,Prime,98,Icons,Icons,Brazil,CAM,78,1940-10-23,173,70,0,2018-09-19,95.0,95,95,96.0,94,93,98,97,96,98,96.0,97,98,94,94,95,93,93.0,97,90,89,96,88,89,60.0,67,94,55,53,49,76.0,88,86,76,59,,,,,,,Right,High,Med,4,5,70.0,77.0,77.0,79.0,79.0,77.0,91.0,95.0,95.0,96.0,96.0,96.0,96.0,96.0,96.0,95.0,Finesse Shot,"Speedster, Aerial Threat, Dribbler, Play Maker, Distance Shooter, Crosser, FK Specialist, Acrobat, Clinical Finisher, Midfielder, Complete Forward, Poacher",237067
2,Maradona,Diego Maradona,Gold - Rare,Icon,Prime,97,Icons,Icons,Argentina,CAM,58,1960-10-30,165,70,0,2018-09-19,92.0,94,90,97.0,91,98,94,97,98,95,93.0,92,97,85,94,88,94,92.0,95,88,96,93,89,96,40.0,44,67,27,42,37,76.0,82,78,75,76,,,,,,,Left,High,Med,3,5,57.0,66.0,66.0,71.0,71.0,69.0,88.0,93.0,93.0,95.0,94.0,94.0,94.0,94.0,94.0,90.0,"Avoids Using Weaker Foot, Finesse Shot, Flair, Takes Finesse Free Kicks","Speedster, Dribbler, Play Maker, Distance Shooter, Crosser, FK Specialist, Acrobat, Clinical Finisher, Midfielder, Complete Forward",190042
3,Ronaldo,Nazário de Lima Ronaldo Luís,Gold - Rare,Icon,Prime,96,Icons,Icons,Brazil,ST,42,1976-09-22,183,78,0,2018-09-19,97.0,97,97,95.0,91,85,95,96,97,89,95.0,95,98,93,90,96,89,81.0,81,75,90,86,73,87,45.0,41,84,39,44,38,76.0,82,81,85,47,,,,,,,Right,Med,Med,5,5,60.0,67.0,67.0,70.0,70.0,66.0,83.0,90.0,90.0,91.0,94.0,94.0,94.0,92.0,92.0,94.0,"Tries To Beat Defensive Line, Finesse Shot","Speedster, Dribbler, Distance Shooter, FK Specialist, Acrobat, Clinical Finisher, Complete Forward, Poacher",37576
4,Pelé,Arantes Nascimento Edson,Gold - Rare,Icon,Medium,95,Icons,Icons,Brazil,CF,78,1940-10-23,173,69,0,2018-09-19,96.0,95,96,95.0,96,95,95,95,94,95,93.0,94,95,90,91,93,91,90.0,93,88,88,91,87,85,56.0,64,89,51,49,45,75.0,89,87,74,57,,,,,,,Right,High,Med,4,5,66.0,74.0,74.0,77.0,77.0,74.0,88.0,93.0,93.0,93.0,94.0,94.0,94.0,94.0,94.0,92.0,Finesse Shot,"Speedster, Dribbler, Distance Shooter, Crosser, FK Specialist, Acrobat, Clinical Finisher, Complete Forward, Poacher",237068
5,Maradona,Diego Maradona,Gold - Rare,Icon,Medium,95,Icons,Icons,Argentina,CAM,58,1960-10-30,165,72,0,2018-09-19,88.0,91,85,95.0,89,98,93,95,96,95,91.0,92,94,83,91,86,92,90.0,95,87,93,90,87,94,42.0,46,67,30,44,39,75.0,80,75,74,76,,,,,,,Left,High,Med,3,5,58.0,66.0,66.0,70.0,70.0,69.0,86.0,91.0,91.0,93.0,92.0,92.0,92.0,92.0,92.0,88.0,"Avoids Using Weaker Foot, Finesse Shot, Flair, Takes Finesse Free Kicks","Dribbler, Play Maker, Distance Shooter, Crosser, FK Specialist, Acrobat, Clinical Finisher, Midfielder",237074


In [0]:
print(fut_players_db.dtypes)

[('player_id', 'bigint'), ('player_name', 'string'), ('player_extended_name', 'string'), ('quality', 'string'), ('revision', 'string'), ('origin', 'string'), ('overall', 'bigint'), ('club', 'string'), ('league', 'string'), ('nationality', 'string'), ('position', 'string'), ('age', 'bigint'), ('date_of_birth', 'date'), ('height', 'bigint'), ('weight', 'bigint'), ('intl_rep', 'bigint'), ('added_date', 'date'), ('pace', 'double'), ('pace_acceleration', 'bigint'), ('pace_sprint_speed', 'bigint'), ('dribbling', 'double'), ('drib_agility', 'bigint'), ('drib_balance', 'bigint'), ('drib_reactions', 'bigint'), ('drib_ball_control', 'bigint'), ('drib_dribbling', 'bigint'), ('drib_composure', 'bigint'), ('shooting', 'double'), ('shoot_positioning', 'bigint'), ('shoot_finishing', 'bigint'), ('shoot_shot_power', 'bigint'), ('shoot_long_shots', 'bigint'), ('shoot_volleys', 'bigint'), ('shoot_penalties', 'bigint'), ('passing', 'double'), ('pass_vision', 'bigint'), ('pass_crossing', 'bigint'), ('pass_

In [0]:
len_db = fut_players_db.count()
db_clean = fut_players_db.dropDuplicates()
len_db_cleaned = db_clean.count()

removed = len_db - len_db_cleaned

print(f"Total de Registros: {len_db}")
print(f"Linhas Duplicadas:   {removed}")

Total de Registros: 18831
Linhas Duplicadas:   0


#### Exercício 1.2

In [0]:
the_best = db_clean.filter(((F.col('dribbling') > 90) & (F.col('shooting') > 90)))
display(the_best)

player_id,player_name,player_extended_name,quality,revision,origin,overall,club,league,nationality,position,age,date_of_birth,height,weight,intl_rep,added_date,pace,pace_acceleration,pace_sprint_speed,dribbling,drib_agility,drib_balance,drib_reactions,drib_ball_control,drib_dribbling,drib_composure,shooting,shoot_positioning,shoot_finishing,shoot_shot_power,shoot_long_shots,shoot_volleys,shoot_penalties,passing,pass_vision,pass_crossing,pass_free_kick,pass_short,pass_long,pass_curve,defending,def_interceptions,def_heading,def_marking,def_stand_tackle,def_slid_tackle,physicality,phys_jumping,phys_stamina,phys_strength,phys_aggression,gk_diving,gk_reflexes,gk_handling,gk_speed,gk_kicking,gk_positoning,pref_foot,att_workrate,def_workrate,weak_foot,skill_moves,cb,rb,lb,rwb,lwb,cdm,cm,rm,lm,cam,cf,rf,lf,rw,lw,st,traits,specialities,base_id
1,Pelé,Arantes Nascimento Edson,Gold - Rare,Icon,Prime,98,Icons,Icons,Brazil,CAM,78,1940-10-23,173,70,0,2018-09-19,95.0,95,95,96.0,94,93,98,97,96,98,96.0,97,98,94,94,95,93,93.0,97,90,89,96,88,89,60.0,67,94,55,53,49,76.0,88,86,76,59,,,,,,,Right,High,Med,4,5,70.0,77.0,77.0,79.0,79.0,77.0,91.0,95.0,95.0,96.0,96.0,96.0,96.0,96.0,96.0,95.0,Finesse Shot,"Speedster, Aerial Threat, Dribbler, Play Maker, Distance Shooter, Crosser, FK Specialist, Acrobat, Clinical Finisher, Midfielder, Complete Forward, Poacher",237067
2,Maradona,Diego Maradona,Gold - Rare,Icon,Prime,97,Icons,Icons,Argentina,CAM,58,1960-10-30,165,70,0,2018-09-19,92.0,94,90,97.0,91,98,94,97,98,95,93.0,92,97,85,94,88,94,92.0,95,88,96,93,89,96,40.0,44,67,27,42,37,76.0,82,78,75,76,,,,,,,Left,High,Med,3,5,57.0,66.0,66.0,71.0,71.0,69.0,88.0,93.0,93.0,95.0,94.0,94.0,94.0,94.0,94.0,90.0,"Avoids Using Weaker Foot, Finesse Shot, Flair, Takes Finesse Free Kicks","Speedster, Dribbler, Play Maker, Distance Shooter, Crosser, FK Specialist, Acrobat, Clinical Finisher, Midfielder, Complete Forward",190042
3,Ronaldo,Nazário de Lima Ronaldo Luís,Gold - Rare,Icon,Prime,96,Icons,Icons,Brazil,ST,42,1976-09-22,183,78,0,2018-09-19,97.0,97,97,95.0,91,85,95,96,97,89,95.0,95,98,93,90,96,89,81.0,81,75,90,86,73,87,45.0,41,84,39,44,38,76.0,82,81,85,47,,,,,,,Right,Med,Med,5,5,60.0,67.0,67.0,70.0,70.0,66.0,83.0,90.0,90.0,91.0,94.0,94.0,94.0,92.0,92.0,94.0,"Tries To Beat Defensive Line, Finesse Shot","Speedster, Dribbler, Distance Shooter, FK Specialist, Acrobat, Clinical Finisher, Complete Forward, Poacher",37576
4,Pelé,Arantes Nascimento Edson,Gold - Rare,Icon,Medium,95,Icons,Icons,Brazil,CF,78,1940-10-23,173,69,0,2018-09-19,96.0,95,96,95.0,96,95,95,95,94,95,93.0,94,95,90,91,93,91,90.0,93,88,88,91,87,85,56.0,64,89,51,49,45,75.0,89,87,74,57,,,,,,,Right,High,Med,4,5,66.0,74.0,74.0,77.0,77.0,74.0,88.0,93.0,93.0,93.0,94.0,94.0,94.0,94.0,94.0,92.0,Finesse Shot,"Speedster, Dribbler, Distance Shooter, Crosser, FK Specialist, Acrobat, Clinical Finisher, Complete Forward, Poacher",237068
5,Maradona,Diego Maradona,Gold - Rare,Icon,Medium,95,Icons,Icons,Argentina,CAM,58,1960-10-30,165,72,0,2018-09-19,88.0,91,85,95.0,89,98,93,95,96,95,91.0,92,94,83,91,86,92,90.0,95,87,93,90,87,94,42.0,46,67,30,44,39,75.0,80,75,74,76,,,,,,,Left,High,Med,3,5,58.0,66.0,66.0,70.0,70.0,69.0,86.0,91.0,91.0,93.0,92.0,92.0,92.0,92.0,92.0,88.0,"Avoids Using Weaker Foot, Finesse Shot, Flair, Takes Finesse Free Kicks","Dribbler, Play Maker, Distance Shooter, Crosser, FK Specialist, Acrobat, Clinical Finisher, Midfielder",237074
7,Ronaldo,Nazário de Lima Ronaldo Luís,Gold - Rare,Icon,Medium,94,Icons,Icons,Brazil,ST,42,1976-09-22,183,82,0,2018-09-19,93.0,94,93,93.0,87,81,94,94,95,91,93.0,96,96,91,89,90,90,80.0,84,72,90,84,72,84,46.0,43,81,39,46,38,80.0,81,79,89,58,,,,,,,Right,Med,Low,5,5,61.0,66.0,66.0,69.0,69.0,66.0,82.0,89.0,89.0,90.0,92.0,92.0,92.0,91.0,91.0,92.0,Finesse Shot,"Speedster, Dribbler, Distance Shooter, FK Specialist, Acrobat, Clinical Finisher, Complete Forward, Poacher",237064
12,Best,George Best,Gold - Rare,Icon,Prime,93,Icons,Icons,Northern Ireland,RW,72,1946-05-22,175,65,0,2018-09-19,93.0,94,92,94.0,93,91,90,95,95,92,91.0,89,94,89,90,88,88,84.0,86,89,75,85,79,82,58.0,63,87,52,53,50,71.0,80,87,68,54,,,,,,,Right,Med,Med,4,4,66.0,74.0,74.0,77.0,77.0,72.0,85.0,90.0,90.0,90.0,91.0,91.0,91.0,92.0,92.0,90.0,Flair,"Speedster, Dribbler, Distance Shooter, Crosser, Acrobat, Clinical Finisher, Complete Forward, Poacher",226764
18,Del Piero,Alessandro Del Piero,Gold - Rare,Icon,Prime,92,Icons,Icons,Italy,CF,44,1974-11-09,174,74,0,2018-09-19,83.0,84,83,92.0,84,66,89,95,95,93,92.0,94,95,86,91,87,94,89.0,94,89,91,89,80,90,43.0,41,76,38,42,36,67.0,58,82,64,59,,,,,,,Right,Med,Med,5,4,56.0,65.0,65.0,70.0,70.0,66.0,85.0,90.0,90.0,91.0,92.0,92.0,92.0,91.0,91.0,88.0,"Finesse Shot, Team Player, Set Play Specialist, Takes Finesse Free Kicks","Dribbler, Distance Shooter, Crosser, FK Specialist, Clinical Finisher, Complete Forward, Poacher",238382
30,Stoichkov,Hristo Stoichkov,Gold - Rare,Icon,Prime,92,Icons,Icons,Bulgaria,ST,53,1966-02-08,178,73,0,2018-09-19,92.0,93,91,92.0,90,84,89,92,93,89,93.0,92,94,92,92,92,92,86.0,88,85,89,85,82,90,50.0,53,82,36,53,48,86.0,79,86,83,93,,,,,,,Left,Med,Med,3,4,65.0,70.0,70.0,73.0,73.0,72.0,85.0,90.0,90.0,90.0,91.0,91.0,91.0,91.0,91.0,90.0,"Power Free-Kick, Technical Dribbler (CPU AI Only)","Speedster, Dribbler, Distance Shooter, FK Specialist, Acrobat, Clinical Finisher, Complete Forward, Poacher",4833
74,Zola,Gianfranco Zola,Gold - Rare,Icon,Prime,90,Icons,Icons,Italy,CF,52,1966-07-05,168,67,0,2018-09-19,86.0,88,84,91.0,85,86,90,92,93,79,91.0,89,92,91,90,83,87,88.0,88,85,92,90,83,90,43.0,33,64,41,46,38,63.0,52,76,57,64,,,,,,,Right,High,Med,4,4,54.0,64.0,64.0,68.0,68.0,66.0,83.0,89.0,89.0,90.0,90.0,90.0,90.0,90.0,90.0,86.0,"Finesse Shot, Flair, Long Shot Taker (CPU AI Only), Outside Foot Shot, Chip Shot (CPU AI Only), Technical Dribbler (CPU AI Only)","Dribbler, Distance Shooter, FK Specialist, Clinical Finisher",1201


In [0]:
nationalities = (
    db_clean
    .select('player_id', 'player_name', 'nationality')
)
display(nationalities.limit(5))

player_id,player_name,nationality
1,Pelé,Brazil
2,Maradona,Argentina
3,Ronaldo,Brazil
4,Pelé,Brazil
5,Maradona,Argentina


In [0]:
the_best_with_nationality = the_best.select(
    'player_id', 'position', 'dribbling', 'shooting', 'overall'
).join(
    nationalities,
    on='player_id',
    how='inner'
).select(
    'player_id', 'player_name', 'nationality', 
    'position', 'dribbling', 'shooting', 'overall'
)
display(the_best_with_nationality.limit(5))

player_id,player_name,nationality,position,dribbling,shooting,overall
17576,Messi,Argentina,CF,99.0,99.0,99
17577,Mbappé,France,RW,97.0,96.0,97
17578,De Bruyne,Belgium,CAM,96.0,96.0,97
30,Stoichkov,Bulgaria,ST,92.0,93.0,92
17469,Messi,Argentina,ST,98.0,93.0,96


### Exercício 2

In [0]:
country_avg_overall = fut_players_db.groupBy("nationality") \
                                      .agg( F.avg("overall").alias("avg_overall") ).orderBy("avg_overall", ascending=False)
country_avg_overall.limit(5).display()

nationality,avg_overall
Dominican Republic,79.0
United Arab Emirates,76.5
Central African Republic,76.0
Tanzania,76.0
Fiji,75.5


In [0]:
best = country_avg_overall.limit(1).collect()[0]
brazil = (
    country_avg_overall
    .filter(F.col('nationality') == 'Brazil')
    .collect()[0]
)
display({
    "Melhor overall médio": f"{best['nationality']}: {best['avg_overall']:.2f}",
    "Overall médio do Brasil": round(brazil['avg_overall'], 2)
})

{'Melhor overall médio': 'Dominican Republic: 79.00',
 'Overall médio do Brasil': 75.22}

In [0]:
players_classification = F.when(F.col('overall') <= 50, "Amador") \
                      .when(F.col('overall') <= 60, "Ruim") \
                      .when(F.col('overall') <= 70, "Ok") \
                      .when(F.col('overall') <= 80, "Bom") \
                      .when(F.col('overall') <= 90, "Ótimo") \
                      .otherwise("Lenda")


fut_players_with_classification = (fut_players_db  
               .select('player_id', 'overall')
               .withColumn('classification', players_classification) 
)

In [0]:
fut_players_with_classification.groupBy("classification").count().orderBy("count", ascending=False).display()

classification,count
Ok,9500
Bom,4785
Ruim,2788
Ótimo,1482
Lenda,141
Amador,135


### Desafios

In [0]:
df_brazil = (fut_players_db.filter(F.col("nationality") == "Brazil")).select("player_name","player_id","nationality","position","overall","age")
pos_goleiro = ["GK"]
pos_defesa = ["CB", "LB", "RB", "LWB", "RWB"]
pos_meio = ["CM", "CDM", "CAM", "LM", "RM"]
pos_ataque = ["ST", "CF", "LW", "RW", "LF", "RF"]


In [0]:
pos_group_logic = F.when(F.col("position").isin(pos_goleiro), "Goleiro") \
                   .when(F.col("position").isin(pos_defesa), "Defesa") \
                   .when(F.col("position").isin(pos_meio), "Meio") \
                   .when(F.col("position").isin(pos_ataque), "Ataque") \
                   .otherwise("Outros")

df_pos_grouped = (df_brazil.withColumn("position_group", pos_group_logic)).filter(F.col("position_group") != "Outros")
display(df_pos_grouped.limit(5))

player_name,player_id,nationality,position,overall,age,position_group
Pelé,1,Brazil,CAM,98,78,Meio
Ronaldo,3,Brazil,ST,96,42,Ataque
Pelé,4,Brazil,CF,95,78,Ataque
Ronaldo,7,Brazil,ST,94,42,Ataque
Ronaldinho,9,Brazil,LW,94,38,Ataque


In [0]:
window_spec_pos = Window.partitionBy("position_group") \
                    .orderBy(F.col("overall").desc(), F.col("age").asc())
df_ranked = df_pos_grouped.withColumn("rank", F.row_number().over(window_spec_pos))
display(df_ranked.limit(15))

player_name,player_id,nationality,position,overall,age,position_group,rank
Neymar Jr,17620,Brazil,LW,97,27,Ataque,1
Ronaldo,3,Brazil,ST,96,42,Ataque,2
Pelé,4,Brazil,CF,95,78,Ataque,3
Ronaldinho,9,Brazil,LW,94,38,Ataque,4
Ronaldo,7,Brazil,ST,94,42,Ataque,5
Neymar Jr,16951,Brazil,LW,93,27,Ataque,6
Neymar Jr,17014,Brazil,LF,93,27,Ataque,7
Vinícius Júnior,17727,Brazil,LW,92,18,Ataque,8
Neymar Jr,846,Brazil,LW,92,27,Ataque,9
Neymar Jr,17616,Brazil,LW,92,27,Ataque,10


In [0]:
filter_442 = (
    (F.col("position_group") == "Goleiro") & (F.col("rank") <= 1) |
    (F.col("position_group") == "Defesa") & (F.col("rank") <= 4) |
    (F.col("position_group") == "Meio") & (F.col("rank") <= 4) |
    (F.col("position_group") == "Ataque") & (F.col("rank") <= 2)
)
df_dream_team = (df_ranked.filter(filter_442)).select("player_name", "nationality", "position_group", "overall","rank")
print("🇧🇷 DREAM TEAM BRAZIL 🇧🇷")
print(f"Players Count: {df_dream_team.count()}")
display(df_dream_team)

🇧🇷 DREAM TEAM BRAZIL 🇧🇷
Players Count: 11


player_name,nationality,position_group,overall,rank
Neymar Jr,Brazil,Ataque,97,1
Ronaldo,Brazil,Ataque,96,2
Marcelo,Brazil,Defesa,94,1
Carlos,Brazil,Defesa,91,2
Thiago Silva,Brazil,Defesa,90,3
Dani Alves,Brazil,Defesa,90,4
Ederson,Brazil,Goleiro,87,1
Pelé,Brazil,Meio,98,1
Neymar Jr,Brazil,Meio,94,2
Ronaldinho,Brazil,Meio,91,3


#### Bônus

In [0]:
window_best_card = Window.partitionBy("player_name") \
                         .orderBy(F.col("overall").desc(),F.col("age").asc())

df_brazil_ranked = df_brazil.withColumn("rank_carta", F.row_number().over(window_best_card))
display(df_brazil_ranked.limit(20))

player_name,player_id,nationality,position,overall,age,rank_carta
Adaílton,11303,Brazil,ST,67,28,1
Ademilson,8636,Brazil,ST,71,25,1
Adriano,1374,Brazil,LB,78,34,1
Adriano,7369,Brazil,ST,71,31,2
Adriano Facchini,11932,Brazil,GK,71,35,1
Adryan,9149,Brazil,LM,71,24,1
Ailton,2272,Brazil,LB,75,23,1
Alan Carvalho,13509,Brazil,LW,76,29,1
Alan Kardec,13505,Brazil,ST,76,30,1
Alan Patrick,2024,Brazil,CAM,77,27,1


In [0]:
df_brazil_best_cards = df_brazil_ranked.filter(F.col("rank_carta") == 1)
display(df_brazil_best_cards.limit(10))

player_name,player_id,nationality,position,overall,age,rank_carta
Adaílton,11303,Brazil,ST,67,28,1
Ademilson,8636,Brazil,ST,71,25,1
Adriano,1374,Brazil,LB,78,34,1
Adriano Facchini,11932,Brazil,GK,71,35,1
Adryan,9149,Brazil,LM,71,24,1
Ailton,2272,Brazil,LB,75,23,1
Alan Carvalho,13509,Brazil,LW,76,29,1
Alan Kardec,13505,Brazil,ST,76,30,1
Alan Patrick,2024,Brazil,CAM,77,27,1
Alan Schons,8283,Brazil,CM,67,25,1


In [0]:
df_pos_grouped_fixed = (df_brazil_best_cards.withColumn("position_group", pos_group_logic)).filter(F.col("position_group") != "Outros")
display(df_pos_grouped_fixed.orderBy("overall",ascending=False).limit(10))

player_name,player_id,nationality,position,overall,age,rank_carta,position_group
Pelé,1,Brazil,CAM,98,78,1,Meio
Neymar Jr,17620,Brazil,LW,97,27,1,Ataque
Ronaldo,3,Brazil,ST,96,42,1,Ataque
Ronaldinho,9,Brazil,LW,94,38,1,Ataque
Marcelo,17604,Brazil,LB,94,30,1,Defesa
Rivaldo,13091,Brazil,LW,92,46,1,Ataque
Vinícius Júnior,17727,Brazil,LW,92,18,1,Ataque
Carlos,52,Brazil,LB,91,45,1,Defesa
Sócrates,13096,Brazil,CAM,91,64,1,Meio
Casemiro,18461,Brazil,CDM,90,26,1,Meio


In [0]:
df_ranked_unique = df_pos_grouped_fixed.withColumn("rank", F.row_number().over(window_spec_pos))
display(df_ranked_unique.limit(15))

player_name,player_id,nationality,position,overall,age,rank_carta,position_group,rank
Neymar Jr,17620,Brazil,LW,97,27,1,Ataque,1
Ronaldo,3,Brazil,ST,96,42,1,Ataque,2
Ronaldinho,9,Brazil,LW,94,38,1,Ataque,3
Vinícius Júnior,17727,Brazil,LW,92,18,1,Ataque,4
Rivaldo,13091,Brazil,LW,92,46,1,Ataque,5
Coutinho,17503,Brazil,LW,90,26,1,Ataque,6
Firmino,17447,Brazil,CF,88,27,1,Ataque,7
Alexandre Pato,17305,Brazil,ST,88,29,1,Ataque,8
Joelinton,18148,Brazil,ST,86,22,1,Ataque,9
Willian,17165,Brazil,LW,86,30,1,Ataque,10


In [0]:
df_dream_team_unique = df_ranked_unique.filter(filter_442).select("player_name", "nationality", "position_group", "overall","rank")
print("🇧🇷 DREAM TEAM BRAZIL(UNIQUE) 🇧🇷")
print(f"Players Count: {df_dream_team_unique.count()}")
display(df_dream_team_unique)

🇧🇷 DREAM TEAM BRAZIL(UNIQUE) 🇧🇷
Players Count: 11


player_name,nationality,position_group,overall,rank
Neymar Jr,Brazil,Ataque,97,1
Ronaldo,Brazil,Ataque,96,2
Marcelo,Brazil,Defesa,94,1
Carlos,Brazil,Defesa,91,2
Thiago Silva,Brazil,Defesa,90,3
Dani Alves,Brazil,Defesa,90,4
Ederson,Brazil,Goleiro,87,1
Pelé,Brazil,Meio,98,1
Sócrates,Brazil,Meio,91,2
Arthur,Brazil,Meio,90,3
